Merge remote-tracking branch 'public/master' into vulkan

This commit is contained in:
Jason Ekstrand 2016-04-04 13:58:05 -07:00
commit 6a04968784
84 changed files with 2249 additions and 1849 deletions

View file

@ -16,6 +16,13 @@
<h1>News</h1>
<h2>April 4, 2016</h2>
<p>
<a href="relnotes/11.2.0.html">Mesa 11.2.0</a> is released. This is a
new development release. See the release notes for more information
about the release.
</p>
<h2>February 10, 2016</h2>
<p>
<a href="relnotes/11.1.2.html">Mesa 11.1.2</a> is released.

View file

@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
</p>
<ul>
<li><a href="relnotes/11.2.0.html">11.2.0 release notes</a>
<li><a href="relnotes/11.1.2.html">11.1.2 release notes</a>
<li><a href="relnotes/11.0.9.html">11.0.9 release notes</a>
<li><a href="relnotes/11.1.1.html">11.1.1 release notes</a>

View file

@ -14,7 +14,7 @@
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 11.2.0 Release Notes / TBD</h1>
<h1>Mesa 11.2.0 Release Notes / 4 April 2016</h1>
<p>
Mesa 11.2.0 is a new development release.
@ -33,7 +33,8 @@ because compatibility contexts are not supported.
<h2>SHA256 checksums</h2>
<pre>
TBD.
dea3d8143929aad5c24ef0993ddb05807b30c284b488fc62903adfcc1c127887 mesa-11.2.0.tar.gz
1c1fed2674abf3f16ed2623e9a5694d6752c293194e18462ebc644a19cfaafb2 mesa-11.2.0.tar.xz
</pre>
@ -70,7 +71,217 @@ Note: some of the new features are only available with certain drivers.
<h2>Bug fixes</h2>
TBD.
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27512">Bug 27512</a> - Illegal instruction _mesa_x86_64_transform_points4_general</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75165">Bug 75165</a> - compute.c:464:49: error: function definition is not allowed here</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79783">Bug 79783</a> - Distorted output in obs-studio where other vendors &quot;work&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89330">Bug 89330</a> - piglit glsl-1.50 invariant-qualifier-in-out-block-01 regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89969">Bug 89969</a> - nouveau: add support for chunk decoding in order to support vaapi (st/va)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90348">Bug 90348</a> - Spilling failure of b96 merged value</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91806">Bug 91806</a> - configure does not test whether assembler supports sse4.1</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91927">Bug 91927</a> - [SKL] [regression] piglit compressed textures tests fail with kernel upgrade</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92193">Bug 92193</a> - [SKL] ES2-CTS.gtf.GL2ExtensionTests.compressed_astc_texture.compressed_astc_texture fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92229">Bug 92229</a> - [APITRACE] SOMA have serious graphical errors</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92233">Bug 92233</a> - Unigine Heaven 4.0 silhuette run</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92363">Bug 92363</a> - [BSW/BDW] ogles1conform Gets test fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92438">Bug 92438</a> - Segfault in pushbuf_kref when running the android emulator (qemu) on nv50</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92589">Bug 92589</a> - [BDW BSW SKL CTS] ES31-CTS.texture_gather.* GPU_HANG</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92595">Bug 92595</a> - [HSW,BDW,SKL][GLES 3.1 CTS] Big difference in the results for the ES31-CTS.shader_bitfield_operation.* tests</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92609">Bug 92609</a> - [BDW, BSW] piglit sampling-2d-array-as-2d-layer fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92687">Bug 92687</a> - Add support for ARB_internalformat_query2</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92706">Bug 92706</a> - glBlitFramebuffer refuses to blit RGBA to RGB with MSAA</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92709">Bug 92709</a> - &quot;LLVM triggered Diagnostic Handler: unsupported call to function ldexpf in main&quot; when starting race in stuntrally</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92743">Bug 92743</a> - Centroid shouldn't have to match between the FS and the VS</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92759">Bug 92759</a> - [Regression, bisected] Visuals without alpha bits are not sRGB-capable</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92849">Bug 92849</a> - [IVB HSW BDW] piglit image load/store load-from-cleared-image.shader_test fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92909">Bug 92909</a> - Offset/alignment issue with layout std140 and vec3</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93004">Bug 93004</a> - Guild Wars 2 crash on nouveau DX11 cards</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93048">Bug 93048</a> - [CTS regression] mesa af2723 breaks GL Conformance for debug extension</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93063">Bug 93063</a> - drm_helper.h:227:1: error: static declaration of pipe_virgl_create_screen follows non-static declaration</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93091">Bug 93091</a> - [opencl] segfault when running any opencl programs (like clinfo)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93092">Bug 93092</a> - lp_test_format regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93126">Bug 93126</a> - wrongly claim supporting GL_EXT_texture_rg</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93180">Bug 93180</a> - [regression] arb_separate_shader_objects.active sampler conflict fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93189">Bug 93189</a> - &quot;./util/u_inlines.h&quot;, line 83: operands have incompatible types: void &quot;:&quot; int</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93215">Bug 93215</a> - [Regression bisected] Ogles1conform Automatic mipmap generation test is fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93235">Bug 93235</a> - [regression] dispatch sanity broken by GetPointerv</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93257">Bug 93257</a> - [SKL, bisected] ASTC dEQP tests segfault</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93264">Bug 93264</a> - Tonga VM Faults since llvm ScheduleDAGInstrs: Rework schedule graph builder.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93266">Bug 93266</a> - gl_arb_shading_language_420pack does not allow binding of image variables</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93300">Bug 93300</a> - Two Worlds 2 renders water incorrectly</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93312">Bug 93312</a> - [SKL][GLES 3.1 CTS] ES31-CTS.layout_binding* GPU_HANG</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93320">Bug 93320</a> - [HSW,BDW,SKL][GLES 3.1 CTS] ES31-CTS.vertex_attrib_binding.advanced-bindingUpdate fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93322">Bug 93322</a> - [HSW,BDW,SKL][GLES 3.1 CTS] ES31-CTS.compute_shader.resource-ubo fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93323">Bug 93323</a> - [HSW,BDW,SKL][GLES 3.1 CTS]ES31-CTS.shader_image_load_store.basic-allTargets-store-fs fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93325">Bug 93325</a> - [HSW,BDW,SKL]ES31-CTS.explicit_uniform_location.uniform-loc-* 2 tests fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93339">Bug 93339</a> - glLinkProgram() should fail when a varying is never written to in a previous stage</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93348">Bug 93348</a> - [HSW,BDW,SKL][GLES 3.1 CTS] ES31-CTS.compute_shader.* segfault</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93358">Bug 93358</a> - [HSW] Unreal Elemental demo - assertion error in copy_image_with_blitter</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93387">Bug 93387</a> - inverse() shouldnt be exposed in GLSL 1.20 and 1.30</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93388">Bug 93388</a> - [i965, regression, bisection] MESA_FORMAT_B8G8R8X8_SRGB changes break kwin</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93407">Bug 93407</a> - [SKL][GLES 3.1 CTS]ES31-CTS.compute_shader.resources-texture fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93410">Bug 93410</a> - [BDW,SKL][GLES 3.1 CTS]ES31-CTS.shader_image_load_store.negative-linkErrors fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93418">Bug 93418</a> - Geometry Shaders output wrong vertices on Sandy Bridge</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93426">Bug 93426</a> - [SKL,BDW,BSW,BXT] CTS regression: es2-cts.gtf.gl2fixedtests.buffer_objects.buffer_object,s</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93524">Bug 93524</a> - Clover doesn't build</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93526">Bug 93526</a> - GfxBench 4 tessellation demos misrender</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93532">Bug 93532</a> - [HSW,BDW,SKL][GLES 3.1 CTS] ES31-CTS.compute_shader.*. Regression, bisected.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93540">Bug 93540</a> - [BISECTED, HSW] Rendering issue in Heaven (and other benchmarks)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93560">Bug 93560</a> - opt_combine_constants failing fabsf(reg-&gt;f) == table.imm[i].val assertion</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93599">Bug 93599</a> - Strange green flashes with &quot;Metro: Last Light Redux&quot; + &quot;Metro 2033 Redux&quot; with Intel Mesa driver</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93648">Bug 93648</a> - Random lines being rendered when playing Dolphin (geometry shaders related, w/ apitrace)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93650">Bug 93650</a> - GL_ARB_separate_shader_objects is buggy (PCSX2)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93667">Bug 93667</a> - Crash in eglCreateImageKHR with huge texture size</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93696">Bug 93696</a> - [HSW,BDW;SKL][GLES 3.1 CTS]ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max-* fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93700">Bug 93700</a> - [SKL, regression] deqp-gles2.functional.texture.completeness</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93717">Bug 93717</a> - Meta mipmap generation can corrupt texture state</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93722">Bug 93722</a> - Segfault when compiling shader with a subroutine that takes a parameter</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93725">Bug 93725</a> - [HSW, regression, bisected] ES31-CTS.texture_gather.*depth*</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93731">Bug 93731</a> - glUniformSubroutinesuiv segfaults when subroutine uniform is bound to a specific location</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93761">Bug 93761</a> - A conditional discard in a fragment shader causes no depth writing at all</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93790">Bug 93790</a> - [HSW] Use after free with compute programs</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93792">Bug 93792</a> - [HSW] intel_mipmap_tree.c:1325: intel_miptree_copy_slice: Assertion `src_mt-&gt;format == dst_mt-&gt;format</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93813">Bug 93813</a> - Incorrect viewport range when GL_CLIP_ORIGIN is GL_UPPER_LEFT</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93840">Bug 93840</a> - [i965] Alien: Isolation fails with GL_ARB_compute_shader enabled</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93862">Bug 93862</a> - [Bisected] &quot;drm/amdgpu: fix amdgpu_bo_pin_restricted VRAM placing v2&quot; is bad</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93878">Bug 93878</a> - [llvmpipe][softpipe] piglit arb_gpu_shader_fp64-double-gettransformfeedbackvarying regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93957">Bug 93957</a> - [HSW] Mishandling of sample count when using an attachment-less framebuffer (assertion error)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93961">Bug 93961</a> - virgl build failure after 2016-02-01 changes - no previous prototype for 'virgl_drm_winsys_create'</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93962">Bug 93962</a> - [HSW, regression, bisected, CTS] ES2-CTS.gtf.GL2FixedTests.scissor.scissor - segfault/asserts</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93989">Bug 93989</a> - build: flex-2.5.39 seems to be failing for glsl_lexer.ll</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94016">Bug 94016</a> - make check MesaExtensionsTest.AlphabeticallySorted regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94019">Bug 94019</a> - [bisected] 3D acceleration broken with gallium/radeon: just get num_tile_pipes from the winsys</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94050">Bug 94050</a> - test_vec4_register_coalesce regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94073">Bug 94073</a> - Miscompilation of abs_vec3_vert_xvary_ref.vert in WebGL conformance</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94081">Bug 94081</a> - [HSW] compute shader shared var + atomic op = fail</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94088">Bug 94088</a> - [llvmpipe] SIGFPE pthread_barrier_destroy.c:40</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94091">Bug 94091</a> - Tonga unreal elemental segfault since radeonsi: put image, fmask, and sampler descriptors into one array</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94100">Bug 94100</a> - [HSW] compute indirect dispatch with 0 work groups causes gpu hang</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94134">Bug 94134</a> - [regression] piglit.spec.arb_texture_view.sampling-2d-array-as-2d-layer assertion</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94139">Bug 94139</a> - [regression, HSW, IVB] piglit.spec.arb_compute_shader.minmax</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94150">Bug 94150</a> - UE4 Suntemple rendering errors</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94186">Bug 94186</a> - Crash when launching glxinfo and World of Warcraft with RV790</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94188">Bug 94188</a> - define (or undef) defined behaves stupidly</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94193">Bug 94193</a> - [llvmpipe] Line antialiasing looks different when GL_LINE_STIPPLE is enabled with pattern 0xffff</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94199">Bug 94199</a> - Shader abort/crash</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94253">Bug 94253</a> - [llvmpipe] piglit gl-1.0-swapbuffers-behavior regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94254">Bug 94254</a> - [llvmpipe] [softpipe] piglit read-front regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94257">Bug 94257</a> - [softpipe] piglit glx-copy-sub-buffer regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94274">Bug 94274</a> - [swrast] piglit arb_occlusion_query2-render regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94284">Bug 94284</a> - [radeonsi] outlast segfault on start</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94388">Bug 94388</a> - r600_blit.c:281: r600_decompress_depth_textures: Assertion `tex-&gt;is_depth &amp;&amp; !tex-&gt;is_flushing_texture' failed.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94412">Bug 94412</a> - Trine 3 misrender</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94481">Bug 94481</a> - softpipe - access violation in img_filter_2d_nearest</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94524">Bug 94524</a> - Wrong gl_TessLevelOuter interpretation for isolines</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94595">Bug 94595</a> - [Mesa AMD&amp;swrast] Texture views attached as framebuffers return their viewed tecture's color encoding and render incorrectly</li>
</ul>
<h2>Changes</h2>
@ -78,7 +289,7 @@ Microsoft Visual Studio 2013 or later is now required for building
on Windows.
Previously, Visual Studio 2008 and later were supported.
TBD.
</div>
</body>

View file

@ -49,9 +49,10 @@ Note: some of the new features are only available with certain drivers.
<li>GL_ARB_shader_image_load_store on radeonsi, softpipe</li>
<li>GL_ARB_shader_image_size on radeonsi</li>
<li>GL_ATI_fragment_shader on all Gallium drivers</li>
<li>GL_EXT_base_instance on all drivers that support GL_ARB_base_instance</li>
<li>GL_OES_draw_buffers_indexed and GL_EXT_draw_buffers_indexed on all drivers that support GL_ARB_draw_buffers_blend</li>
<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
<li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li>
<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
</ul>
<h2>Bug fixes</h2>

View file

@ -178,11 +178,17 @@ typedef struct _RGNDATA {
#undef WINAPI
#endif /* WINAPI*/
#if defined(__x86_64__) || defined(_M_X64)
#define WINAPI __attribute__((ms_abi))
#else /* x86_64 */
#define WINAPI __attribute__((__stdcall__))
#endif /* x86_64 */
#ifdef __GNUC__
#if (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64)
#define WINAPI __attribute__((ms_abi))
#elif defined(__i386) || defined(_M_IX86)
#define WINAPI __attribute__((__stdcall__))
#else /* neither amd64 nor i386 */
#define WINAPI
#endif
#else /* __GNUC__ */
#define WINAPI
#endif
/* Implementation caps */
#define D3DPRESENT_BACK_BUFFERS_MAX 3

View file

@ -3326,6 +3326,7 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
+ qual->flags.q.depth_less
+ qual->flags.q.depth_unchanged;
if (depth_layout_count > 0
&& !state->is_version(420, 0)
&& !state->AMD_conservative_depth_enable
&& !state->ARB_conservative_depth_enable) {
_mesa_glsl_error(loc, state,
@ -3708,7 +3709,8 @@ get_variable_being_redeclared(ir_variable *var, YYLTYPE loc,
earlier->data.interpolation = var->data.interpolation;
/* Layout qualifiers for gl_FragDepth. */
} else if ((state->AMD_conservative_depth_enable ||
} else if ((state->is_version(420, 0) ||
state->AMD_conservative_depth_enable ||
state->ARB_conservative_depth_enable)
&& strcmp(var->name, "gl_FragDepth") == 0
&& earlier->type == var->type

View file

@ -210,6 +210,7 @@ static bool
shader_integer_mix(const _mesa_glsl_parse_state *state)
{
return state->is_version(450, 310) ||
state->ARB_ES3_1_compatibility_enable ||
(v130(state) && state->EXT_shader_integer_mix_enable);
}
@ -478,6 +479,7 @@ static bool
shader_image_atomic_exchange_float(const _mesa_glsl_parse_state *state)
{
return (state->is_version(450, 320) ||
state->ARB_ES3_1_compatibility_enable ||
state->OES_shader_image_atomic_enable);
}

View file

@ -181,7 +181,7 @@ static const struct builtin_type_versions {
T(sampler2DRect, 140, 999)
T(samplerBuffer, 140, 320)
T(sampler2DMS, 150, 310)
T(sampler2DMSArray, 150, 999)
T(sampler2DMSArray, 150, 320)
T(isampler1D, 130, 999)
T(isampler2D, 130, 300)
@ -193,7 +193,7 @@ static const struct builtin_type_versions {
T(isampler2DRect, 140, 999)
T(isamplerBuffer, 140, 320)
T(isampler2DMS, 150, 310)
T(isampler2DMSArray, 150, 999)
T(isampler2DMSArray, 150, 320)
T(usampler1D, 130, 999)
T(usampler2D, 130, 300)
@ -205,7 +205,7 @@ static const struct builtin_type_versions {
T(usampler2DRect, 140, 999)
T(usamplerBuffer, 140, 320)
T(usampler2DMS, 150, 310)
T(usampler2DMSArray, 150, 999)
T(usampler2DMSArray, 150, 320)
T(sampler1DShadow, 110, 999)
T(sampler2DShadow, 110, 300)
@ -305,11 +305,13 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
add_type(symbols, glsl_type::usamplerCubeArray_type);
}
if (state->ARB_texture_multisample_enable ||
state->OES_texture_storage_multisample_2d_array_enable) {
if (state->ARB_texture_multisample_enable) {
add_type(symbols, glsl_type::sampler2DMS_type);
add_type(symbols, glsl_type::isampler2DMS_type);
add_type(symbols, glsl_type::usampler2DMS_type);
}
if (state->ARB_texture_multisample_enable ||
state->OES_texture_storage_multisample_2d_array_enable) {
add_type(symbols, glsl_type::sampler2DMSArray_type);
add_type(symbols, glsl_type::isampler2DMSArray_type);
add_type(symbols, glsl_type::usampler2DMSArray_type);

View file

@ -845,11 +845,6 @@ builtin_variable_generator::generate_constants()
state->Const.MaxImageSamples);
}
if (state->is_version(450, 310)) {
add_const("gl_MaxCombinedShaderOutputResources",
state->Const.MaxCombinedShaderOutputResources);
}
if (state->is_version(400, 0) ||
state->ARB_tessellation_shader_enable) {
add_const("gl_MaxTessControlImageUniforms",
@ -859,6 +854,12 @@ builtin_variable_generator::generate_constants()
}
}
if (state->is_version(450, 310) ||
state->ARB_ES3_1_compatibility_enable) {
add_const("gl_MaxCombinedShaderOutputResources",
state->Const.MaxCombinedShaderOutputResources);
}
if (state->is_version(410, 0) ||
state->ARB_viewport_array_enable)
add_const("gl_MaxViewports", state->Const.MaxViewports);
@ -880,7 +881,8 @@ builtin_variable_generator::generate_constants()
}
if (state->is_version(450, 320) ||
state->OES_sample_variables_enable)
state->OES_sample_variables_enable ||
state->ARB_ES3_1_compatibility_enable)
add_const("gl_MaxSamples", state->Const.MaxSamples);
}
@ -1174,7 +1176,7 @@ builtin_variable_generator::generate_fs_special_vars()
var->data.interpolation = INTERP_QUALIFIER_FLAT;
}
if (state->is_version(450, 310)/* || state->ARB_ES3_1_compatibility_enable*/)
if (state->is_version(450, 310) || state->ARB_ES3_1_compatibility_enable)
add_system_value(SYSTEM_VALUE_HELPER_INVOCATION, bool_t, "gl_HelperInvocation");
}

File diff suppressed because it is too large Load diff

View file

@ -226,7 +226,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->supported_versions[this->num_supported_versions].es = true;
this->num_supported_versions++;
}
if (_mesa_is_gles31(ctx)) {
if (_mesa_is_gles31(ctx) || ctx->Extensions.ARB_ES3_1_compatibility) {
this->supported_versions[this->num_supported_versions].ver = 310;
this->supported_versions[this->num_supported_versions].es = true;
this->num_supported_versions++;
@ -565,6 +565,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
/* ARB extensions go here, sorted alphabetically.
*/
EXT(ARB_ES3_1_compatibility, true, false, ARB_ES3_1_compatibility),
EXT(ARB_arrays_of_arrays, true, false, ARB_arrays_of_arrays),
EXT(ARB_compute_shader, true, false, ARB_compute_shader),
EXT(ARB_conservative_depth, true, false, ARB_conservative_depth),

View file

@ -510,6 +510,8 @@ struct _mesa_glsl_parse_state {
/*@{*/
/* ARB extensions go here, sorted alphabetically.
*/
bool ARB_ES3_1_compatibility_enable;
bool ARB_ES3_1_compatibility_warn;
bool ARB_arrays_of_arrays_enable;
bool ARB_arrays_of_arrays_warn;
bool ARB_compute_shader_enable;

View file

@ -173,6 +173,7 @@ void ir_print_visitor::visit(ir_variable *ir)
const char *const samp = (ir->data.sample) ? "sample " : "";
const char *const patc = (ir->data.patch) ? "patch " : "";
const char *const inv = (ir->data.invariant) ? "invariant " : "";
const char *const prec = (ir->data.precise) ? "precise " : "";
const char *const mode[] = { "", "uniform ", "shader_storage ",
"shader_shared ", "shader_in ", "shader_out ",
"in ", "out ", "inout ",
@ -182,8 +183,8 @@ void ir_print_visitor::visit(ir_variable *ir)
const char *const interp[] = { "", "smooth", "flat", "noperspective" };
STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_QUALIFIER_COUNT);
fprintf(f, "(%s%s%s%s%s%s%s%s) ",
loc, cent, samp, patc, inv, mode[ir->data.mode],
fprintf(f, "(%s%s%s%s%s%s%s%s%s) ",
loc, cent, samp, patc, inv, prec, mode[ir->data.mode],
stream[ir->data.stream],
interp[ir->data.interpolation]);

View file

@ -44,18 +44,6 @@ get_storage(gl_uniform_storage *storage, unsigned num_storage,
return NULL;
}
static unsigned
get_uniform_block_index(const gl_shader_program *shProg,
const char *uniformBlockName)
{
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName))
return i;
}
return GL_INVALID_INDEX;
}
void
copy_constant_to_storage(union gl_constant_value *storage,
const ir_constant *val,
@ -168,22 +156,14 @@ set_opaque_binding(void *mem_ctx, gl_shader_program *prog,
void
set_block_binding(gl_shader_program *prog, const char *block_name, int binding)
{
const unsigned block_index = get_uniform_block_index(prog, block_name);
if (block_index == GL_INVALID_INDEX) {
assert(block_index != GL_INVALID_INDEX);
return;
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
if (!strcmp(prog->BufferInterfaceBlocks[i].Name, block_name)) {
prog->BufferInterfaceBlocks[i].Binding = binding;
return;
}
}
/* This is a field of a UBO. val is the binding index. */
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
int stage_index = prog->InterfaceBlockStageIndex[i][block_index];
if (stage_index != -1) {
struct gl_shader *sh = prog->_LinkedShaders[i];
sh->BufferInterfaceBlocks[stage_index]->Binding = binding;
}
}
unreachable("Failed to initialize block binding");
}
void

View file

@ -282,7 +282,7 @@ public:
: num_active_uniforms(0), num_hidden_uniforms(0), num_values(0),
num_shader_samplers(0), num_shader_images(0),
num_shader_uniform_components(0), num_shader_subroutines(0),
is_ubo_var(false), is_shader_storage(false), map(map),
is_buffer_block(false), is_shader_storage(false), map(map),
hidden_map(hidden_map)
{
/* empty */
@ -299,7 +299,7 @@ public:
void process(ir_variable *var)
{
this->current_var = var;
this->is_ubo_var = var->is_in_buffer_block();
this->is_buffer_block = var->is_in_buffer_block();
this->is_shader_storage = var->is_in_shader_storage_block();
if (var->is_interface_instance())
program_resource_visitor::process(var->get_interface_type(),
@ -340,7 +340,7 @@ public:
*/
unsigned num_shader_subroutines;
bool is_ubo_var;
bool is_buffer_block;
bool is_shader_storage;
struct string_to_uint_map *map;
@ -380,7 +380,7 @@ private:
* Note that samplers do not count against this limit because they
* don't use any storage on current hardware.
*/
if (!is_ubo_var && !is_shader_storage)
if (!is_buffer_block)
this->num_shader_uniform_components += values;
}
@ -460,30 +460,33 @@ public:
field_counter = 0;
this->record_next_sampler = new string_to_uint_map;
ubo_block_index = -1;
buffer_block_index = -1;
if (var->is_in_buffer_block()) {
struct gl_uniform_block **blks = var->is_in_shader_storage_block() ?
prog->ShaderStorageBlocks : prog->UniformBlocks;
unsigned num_blks = var->is_in_shader_storage_block() ?
prog->NumShaderStorageBlocks : prog->NumUniformBlocks;
if (var->is_interface_instance() && var->type->is_array()) {
unsigned l = strlen(var->get_interface_type()->name);
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
if (strncmp(var->get_interface_type()->name,
prog->BufferInterfaceBlocks[i].Name,
l) == 0
&& prog->BufferInterfaceBlocks[i].Name[l] == '[') {
ubo_block_index = i;
for (unsigned i = 0; i < num_blks; i++) {
if (strncmp(var->get_interface_type()->name, blks[i]->Name, l)
== 0 && blks[i]->Name[l] == '[') {
buffer_block_index = i;
break;
}
}
} else {
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
if (strcmp(var->get_interface_type()->name,
prog->BufferInterfaceBlocks[i].Name) == 0) {
ubo_block_index = i;
for (unsigned i = 0; i < num_blks; i++) {
if (strcmp(var->get_interface_type()->name, blks[i]->Name) ==
0) {
buffer_block_index = i;
break;
}
}
}
assert(ubo_block_index != -1);
assert(buffer_block_index != -1);
/* Uniform blocks that were specified with an instance name must be
* handled a little bit differently. The name of the variable is the
@ -497,7 +500,7 @@ public:
var->get_interface_type()->name);
} else {
const struct gl_uniform_block *const block =
&prog->BufferInterfaceBlocks[ubo_block_index];
blks[buffer_block_index];
assert(var->data.location != -1);
@ -519,7 +522,7 @@ public:
delete this->record_next_sampler;
}
int ubo_block_index;
int buffer_block_index;
int ubo_byte_offset;
gl_shader_stage shader_type;
@ -659,7 +662,7 @@ private:
virtual void enter_record(const glsl_type *type, const char *,
bool row_major, const unsigned packing) {
assert(type->is_record());
if (this->ubo_block_index == -1)
if (this->buffer_block_index == -1)
return;
if (packing == GLSL_INTERFACE_PACKING_STD430)
this->ubo_byte_offset = glsl_align(
@ -672,7 +675,7 @@ private:
virtual void leave_record(const glsl_type *type, const char *,
bool row_major, const unsigned packing) {
assert(type->is_record());
if (this->ubo_block_index == -1)
if (this->buffer_block_index == -1)
return;
if (packing == GLSL_INTERFACE_PACKING_STD430)
this->ubo_byte_offset = glsl_align(
@ -719,7 +722,7 @@ private:
/* For array of arrays or struct arrays the base location may have
* already been set so don't set it again.
*/
if (ubo_block_index == -1 && current_var->data.location == -1) {
if (buffer_block_index == -1 && current_var->data.location == -1) {
current_var->data.location = id;
}
@ -766,8 +769,8 @@ private:
this->uniforms[id].is_shader_storage =
current_var->is_in_shader_storage_block();
if (this->ubo_block_index != -1) {
this->uniforms[id].block_index = this->ubo_block_index;
if (this->buffer_block_index != -1) {
this->uniforms[id].block_index = this->buffer_block_index;
unsigned alignment = type->std140_base_alignment(row_major);
if (packing == GLSL_INTERFACE_PACKING_STD430)

View file

@ -240,7 +240,16 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
/* Check that all of the qualifiers match between stages.
*/
if (input->data.centroid != output->data.centroid) {
/* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
* should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
* conformance test suite does not verify that the qualifiers must match.
* The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
* OpenGLES 3.0 drivers, so we relax the checking in all cases.
*/
if (false /* always skip the centroid check */ &&
prog->Version < (prog->IsES ? 310 : 430) &&
input->data.centroid != output->data.centroid) {
linker_error(prog,
"%s shader output `%s' %s centroid qualifier, "
"but %s shader input %s centroid qualifier\n",

View file

@ -1171,6 +1171,8 @@ cross_validate_uniforms(struct gl_shader_program *prog)
static bool
interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
{
int *InterfaceBlockStageIndex[MESA_SHADER_STAGES];
unsigned max_num_uniform_blocks = 0;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i])
@ -1180,10 +1182,9 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_shader *sh = prog->_LinkedShaders[i];
prog->InterfaceBlockStageIndex[i] = ralloc_array(prog, int,
max_num_uniform_blocks);
InterfaceBlockStageIndex[i] = new int[max_num_uniform_blocks];
for (unsigned int j = 0; j < max_num_uniform_blocks; j++)
prog->InterfaceBlockStageIndex[i][j] = -1;
InterfaceBlockStageIndex[i][j] = -1;
if (sh == NULL)
continue;
@ -1194,13 +1195,17 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
&prog->NumBufferInterfaceBlocks,
sh->BufferInterfaceBlocks[j]);
if (index == -1) {
linker_error(prog, "uniform block `%s' has mismatching definitions\n",
sh->BufferInterfaceBlocks[j]->Name);
return false;
}
if (index == -1) {
linker_error(prog, "uniform block `%s' has mismatching definitions\n",
sh->BufferInterfaceBlocks[j]->Name);
prog->InterfaceBlockStageIndex[i][index] = j;
for (unsigned k = 0; k <= i; k++) {
delete[] InterfaceBlockStageIndex[k];
}
return false;
}
InterfaceBlockStageIndex[i][index] = j;
}
}
@ -1209,18 +1214,23 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
*/
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
int stage_index =
prog->InterfaceBlockStageIndex[i][j];
int stage_index = InterfaceBlockStageIndex[i][j];
if (stage_index != -1) {
struct gl_shader *sh = prog->_LinkedShaders[i];
prog->BufferInterfaceBlocks[j].stageref |= (1 << i);
sh->BufferInterfaceBlocks[stage_index] =
&prog->BufferInterfaceBlocks[j];
}
}
}
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
delete[] InterfaceBlockStageIndex[i];
}
return true;
}
@ -2890,6 +2900,9 @@ store_fragdepth_layout(struct gl_shader_program *prog)
static void
check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
{
unsigned total_uniform_blocks = 0;
unsigned total_shader_storage_blocks = 0;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_shader *sh = prog->_LinkedShaders[i];
@ -2928,12 +2941,37 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
_mesa_shader_stage_to_string(i));
}
}
total_shader_storage_blocks += sh->NumShaderStorageBlocks;
total_uniform_blocks += sh->NumUniformBlocks;
const unsigned max_uniform_blocks =
ctx->Const.Program[i].MaxUniformBlocks;
if (max_uniform_blocks < sh->NumUniformBlocks) {
linker_error(prog, "Too many %s uniform blocks (%d/%d)\n",
_mesa_shader_stage_to_string(i), sh->NumUniformBlocks,
max_uniform_blocks);
}
const unsigned max_shader_storage_blocks =
ctx->Const.Program[i].MaxShaderStorageBlocks;
if (max_shader_storage_blocks < sh->NumShaderStorageBlocks) {
linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n",
_mesa_shader_stage_to_string(i),
sh->NumShaderStorageBlocks, max_shader_storage_blocks);
}
}
unsigned blocks[MESA_SHADER_STAGES] = {0};
unsigned total_uniform_blocks = 0;
unsigned shader_blocks[MESA_SHADER_STAGES] = {0};
unsigned total_shader_storage_blocks = 0;
if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) {
linker_error(prog, "Too many combined uniform blocks (%d/%d)\n",
total_uniform_blocks, ctx->Const.MaxCombinedUniformBlocks);
}
if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) {
linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n",
total_shader_storage_blocks,
ctx->Const.MaxCombinedShaderStorageBlocks);
}
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
/* Don't check SSBOs for Uniform Block Size */
@ -2952,57 +2990,6 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
prog->BufferInterfaceBlocks[i].UniformBufferSize,
ctx->Const.MaxShaderStorageBlockSize);
}
for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
if (prog->InterfaceBlockStageIndex[j][i] != -1) {
struct gl_shader *sh = prog->_LinkedShaders[j];
int stage_index = prog->InterfaceBlockStageIndex[j][i];
if (sh &&
sh->BufferInterfaceBlocks[stage_index]->IsShaderStorage) {
shader_blocks[j]++;
total_shader_storage_blocks++;
} else {
blocks[j]++;
total_uniform_blocks++;
}
}
}
if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) {
linker_error(prog, "Too many combined uniform blocks (%d/%d)\n",
total_uniform_blocks,
ctx->Const.MaxCombinedUniformBlocks);
} else {
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
const unsigned max_uniform_blocks =
ctx->Const.Program[i].MaxUniformBlocks;
if (blocks[i] > max_uniform_blocks) {
linker_error(prog, "Too many %s uniform blocks (%d/%d)\n",
_mesa_shader_stage_to_string(i),
blocks[i],
max_uniform_blocks);
break;
}
}
}
if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) {
linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n",
total_shader_storage_blocks,
ctx->Const.MaxCombinedShaderStorageBlocks);
} else {
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
const unsigned max_shader_storage_blocks =
ctx->Const.Program[i].MaxShaderStorageBlocks;
if (shader_blocks[i] > max_shader_storage_blocks) {
linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n",
_mesa_shader_stage_to_string(i),
shader_blocks[i],
max_shader_storage_blocks);
break;
}
}
}
}
}
@ -3072,13 +3059,7 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
ctx->Const.Program[i].MaxImageUniforms);
total_image_units += sh->NumImages;
for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
int stage_index = prog->InterfaceBlockStageIndex[i][j];
if (stage_index != -1 &&
sh->BufferInterfaceBlocks[stage_index]->IsShaderStorage)
total_shader_storage_blocks++;
}
total_shader_storage_blocks += sh->NumShaderStorageBlocks;
if (i == MESA_SHADER_FRAGMENT) {
foreach_in_list(ir_instruction, node, sh->ir) {
@ -3497,19 +3478,50 @@ build_stageref(struct gl_shader_program *shProg, const char *name,
* Create gl_shader_variable from ir_variable class.
*/
static gl_shader_variable *
create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in)
create_shader_variable(struct gl_shader_program *shProg,
const ir_variable *in, bool use_implicit_location,
int location_bias)
{
gl_shader_variable *out = ralloc(shProg, struct gl_shader_variable);
if (!out)
return NULL;
out->type = in->type;
out->name = ralloc_strdup(shProg, in->name);
/* Since gl_VertexID may be lowered to gl_VertexIDMESA, but applications
* expect to see gl_VertexID in the program resource list. Pretend.
*/
if (in->data.mode == ir_var_system_value &&
in->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
out->name = ralloc_strdup(shProg, "gl_VertexID");
} else {
out->name = ralloc_strdup(shProg, in->name);
}
if (!out->name)
return NULL;
out->location = in->data.location;
/* From the ARB_program_interface_query specification:
*
* "Not all active variables are assigned valid locations; the
* following variables will have an effective location of -1:
*
* * uniforms declared as atomic counters;
*
* * members of a uniform block;
*
* * built-in inputs, outputs, and uniforms (starting with "gl_"); and
*
* * inputs or outputs not declared with a "location" layout qualifier,
* except for vertex shader inputs and fragment shader outputs."
*/
if (in->type->base_type == GLSL_TYPE_ATOMIC_UINT ||
is_gl_identifier(in->name) ||
!(in->data.explicit_location || use_implicit_location)) {
out->location = -1;
} else {
out->location = in->data.location - location_bias;
}
out->type = in->type;
out->index = in->data.index;
out->patch = in->data.patch;
out->mode = in->data.mode;
@ -3519,38 +3531,31 @@ create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in)
static bool
add_interface_variables(struct gl_shader_program *shProg,
exec_list *ir, GLenum programInterface)
unsigned stage, GLenum programInterface)
{
exec_list *ir = shProg->_LinkedShaders[stage]->ir;
foreach_in_list(ir_instruction, node, ir) {
ir_variable *var = node->as_variable();
uint8_t mask = 0;
if (!var)
if (!var || var->data.how_declared == ir_var_hidden)
continue;
int loc_bias;
switch (var->data.mode) {
/* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes):
* "For GetActiveAttrib, all active vertex shader input variables
* are enumerated, including the special built-in inputs gl_VertexID
* and gl_InstanceID."
*/
case ir_var_system_value:
if (var->data.location != SYSTEM_VALUE_VERTEX_ID &&
var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE &&
var->data.location != SYSTEM_VALUE_INSTANCE_ID)
continue;
/* Mark special built-in inputs referenced by the vertex stage so
* that they are considered active by the shader queries.
*/
mask = (1 << (MESA_SHADER_VERTEX));
/* FALLTHROUGH */
case ir_var_shader_in:
if (programInterface != GL_PROGRAM_INPUT)
continue;
loc_bias = (stage == MESA_SHADER_VERTEX) ? int(VERT_ATTRIB_GENERIC0)
: int(VARYING_SLOT_VAR0);
break;
case ir_var_shader_out:
if (programInterface != GL_PROGRAM_OUTPUT)
continue;
loc_bias = (stage == MESA_SHADER_FRAGMENT) ? int(FRAG_RESULT_DATA0)
: int(VARYING_SLOT_VAR0);
break;
default:
continue;
@ -3568,13 +3573,16 @@ add_interface_variables(struct gl_shader_program *shProg,
if (strncmp(var->name, "gl_out_FragData", 15) == 0)
continue;
gl_shader_variable *sha_v = create_shader_variable(shProg, var);
const bool vs_input_or_fs_output =
(stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
(stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out);
gl_shader_variable *sha_v =
create_shader_variable(shProg, var, vs_input_or_fs_output, loc_bias);
if (!sha_v)
return false;
if (!add_program_resource(shProg, programInterface, sha_v,
build_stageref(shProg, sha_v->name,
sha_v->mode) | mask))
if (!add_program_resource(shProg, programInterface, sha_v, 1 << stage))
return false;
}
return true;
@ -3604,7 +3612,8 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage, GLenum type)
}
if (type == iface) {
gl_shader_variable *sha_v = create_shader_variable(shProg, var);
gl_shader_variable *sha_v =
create_shader_variable(shProg, var, false, VARYING_SLOT_VAR0);
if (!sha_v)
return false;
if (!add_program_resource(shProg, iface, sha_v,
@ -3629,7 +3638,8 @@ add_fragdata_arrays(struct gl_shader_program *shProg)
ir_variable *var = node->as_variable();
if (var) {
assert(var->data.mode == ir_var_shader_out);
gl_shader_variable *sha_v = create_shader_variable(shProg, var);
gl_shader_variable *sha_v =
create_shader_variable(shProg, var, true, FRAG_RESULT_DATA0);
if (!sha_v)
return false;
if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, sha_v,
@ -3794,7 +3804,9 @@ calculate_array_size_and_stride(struct gl_shader_program *shProg,
int array_stride = -1;
char *var_name = get_top_level_name(uni->name);
char *interface_name =
get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name);
get_top_level_name(uni->is_shader_storage ?
shProg->ShaderStorageBlocks[block_index]->Name :
shProg->UniformBlocks[block_index]->Name);
if (strcmp(var_name, interface_name) == 0) {
/* Deal with instanced array of SSBOs */
@ -3893,12 +3905,10 @@ build_program_resource_list(struct gl_context *ctx,
return;
/* Add inputs and outputs to the resource list. */
if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
GL_PROGRAM_INPUT))
if (!add_interface_variables(shProg, input_stage, GL_PROGRAM_INPUT))
return;
if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage]->ir,
GL_PROGRAM_OUTPUT))
if (!add_interface_variables(shProg, output_stage, GL_PROGRAM_OUTPUT))
return;
/* Add transform feedback varyings. */
@ -3933,15 +3943,14 @@ build_program_resource_list(struct gl_context *ctx,
ir_var_uniform);
/* Add stagereferences for uniforms in a uniform block. */
bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage;
int block_index = shProg->UniformStorage[i].block_index;
if (block_index != -1) {
for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
if (shProg->InterfaceBlockStageIndex[j][block_index] != -1)
stageref |= (1 << j);
}
stageref |= is_shader_storage ?
shProg->ShaderStorageBlocks[block_index]->stageref :
shProg->UniformBlocks[block_index]->stageref;
}
bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage;
GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
if (!should_add_buffer_variable(shProg, type,
shProg->UniformStorage[i].name))
@ -4686,6 +4695,33 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
has_xfb_qualifiers))
goto done;
/* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks
* for gl_shader_program and gl_shader, so that drivers that need separate
* index spaces for each set can have that.
*/
for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] != NULL) {
gl_shader *sh = prog->_LinkedShaders[i];
split_ubos_and_ssbos(sh,
sh->BufferInterfaceBlocks,
NULL,
sh->NumBufferInterfaceBlocks,
&sh->UniformBlocks,
&sh->NumUniformBlocks,
&sh->ShaderStorageBlocks,
&sh->NumShaderStorageBlocks);
}
}
split_ubos_and_ssbos(prog,
NULL,
prog->BufferInterfaceBlocks,
prog->NumBufferInterfaceBlocks,
&prog->UniformBlocks,
&prog->NumUniformBlocks,
&prog->ShaderStorageBlocks,
&prog->NumShaderStorageBlocks);
update_array_sizes(prog);
link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue,
num_explicit_uniform_locs,
@ -4737,33 +4773,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
}
}
/* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks
* for gl_shader_program and gl_shader, so that drivers that need separate
* index spaces for each set can have that.
*/
for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] != NULL) {
gl_shader *sh = prog->_LinkedShaders[i];
split_ubos_and_ssbos(sh,
sh->BufferInterfaceBlocks,
NULL,
sh->NumBufferInterfaceBlocks,
&sh->UniformBlocks,
&sh->NumUniformBlocks,
&sh->ShaderStorageBlocks,
&sh->NumShaderStorageBlocks);
}
}
split_ubos_and_ssbos(prog,
NULL,
prog->BufferInterfaceBlocks,
prog->NumBufferInterfaceBlocks,
&prog->UniformBlocks,
&prog->NumUniformBlocks,
&prog->ShaderStorageBlocks,
&prog->NumShaderStorageBlocks);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;

View file

@ -372,7 +372,8 @@ lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
static bool
shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
{
return state->ARB_shader_storage_buffer_object_enable;
return state->ARB_shader_storage_buffer_object_enable ||
state->is_version(430, 310);
}
uint32_t

View file

@ -100,7 +100,7 @@ lower_vertex_id_visitor::visit(ir_dereference_variable *ir)
if (gl_BaseVertex == NULL) {
gl_BaseVertex = new(mem_ctx) ir_variable(int_t, "gl_BaseVertex",
ir_var_system_value);
gl_BaseVertex->data.how_declared = ir_var_declared_implicitly;
gl_BaseVertex->data.how_declared = ir_var_hidden;
gl_BaseVertex->data.read_only = true;
gl_BaseVertex->data.location = SYSTEM_VALUE_BASE_VERTEX;
gl_BaseVertex->data.explicit_location = true;

View file

@ -96,8 +96,6 @@ _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
void
_mesa_clear_shader_program_data(struct gl_shader_program *shProg)
{
unsigned i;
shProg->NumUniformStorage = 0;
shProg->UniformStorage = NULL;
shProg->NumUniformRemapTable = 0;
@ -119,11 +117,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
shProg->ShaderStorageBlocks = NULL;
shProg->NumShaderStorageBlocks = 0;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
ralloc_free(shProg->InterfaceBlockStageIndex[i]);
shProg->InterfaceBlockStageIndex[i] = NULL;
}
ralloc_free(shProg->AtomicBuffers);
shProg->AtomicBuffers = NULL;
shProg->NumAtomicBuffers = 0;

View file

@ -817,7 +817,7 @@ store_aos(struct gallivm_state *gallivm,
#endif
/* Unaligned store due to the vertex header */
lp_set_store_alignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
}
/**
@ -1069,7 +1069,7 @@ store_clip(struct gallivm_state *gallivm,
clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");
/* Unaligned store */
lp_set_store_alignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
}
}

View file

@ -95,4 +95,18 @@ typedef void *LLVMMCJITMemoryManagerRef;
#define LLVMInsertBasicBlock ILLEGAL_LLVM_FUNCTION
#define LLVMCreateBuilder ILLEGAL_LLVM_FUNCTION
/*
* Before LLVM 3.4 LLVMSetAlignment only supported GlobalValue, not
* LoadInst/StoreInst as we need.
*/
#if HAVE_LLVM < 0x0304
# ifdef __cplusplus
extern "C"
# endif
void LLVMSetAlignmentBackport(LLVMValueRef V, unsigned Bytes);
# define LLVMSetAlignment LLVMSetAlignmentBackport
#endif
#endif /* LP_BLD_H */

View file

@ -1492,14 +1492,9 @@ lp_build_abs(struct lp_build_context *bld,
return a;
if(type.floating) {
/* Mask out the sign bit */
LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
unsigned long long absMask = ~(1ULL << (type.width - 1));
LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, ((unsigned long long) absMask));
a = LLVMBuildBitCast(builder, a, int_vec_type, "");
a = LLVMBuildAnd(builder, a, mask, "");
a = LLVMBuildBitCast(builder, a, vec_type, "");
return a;
char intrinsic[32];
lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.fabs", vec_type);
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
}
if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@ -1675,13 +1670,13 @@ enum lp_build_round_mode
* result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0.
*/
static inline LLVMValueRef
lp_build_round_sse41(struct lp_build_context *bld,
LLVMValueRef a,
enum lp_build_round_mode mode)
lp_build_nearest_sse41(struct lp_build_context *bld,
LLVMValueRef a)
{
LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef mode = LLVMConstNull(i32t);
const char *intrinsic;
LLVMValueRef res;
@ -1714,7 +1709,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
args[0] = undef;
args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
args[2] = LLVMConstInt(i32t, mode, 0);
args[2] = mode;
res = lp_build_intrinsic(builder, intrinsic,
vec_type, args, Elements(args), 0);
@ -1754,7 +1749,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
res = lp_build_intrinsic_binary(builder, intrinsic,
bld->vec_type, a,
LLVMConstInt(i32t, mode, 0));
mode);
}
return res;
@ -1856,8 +1851,38 @@ lp_build_round_arch(struct lp_build_context *bld,
LLVMValueRef a,
enum lp_build_round_mode mode)
{
if (util_cpu_caps.has_sse4_1)
return lp_build_round_sse41(bld, a, mode);
if (util_cpu_caps.has_sse4_1) {
LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
const char *intrinsic_root;
char intrinsic[32];
assert(type.floating);
assert(lp_check_value(type, a));
(void)type;
switch (mode) {
case LP_BUILD_ROUND_NEAREST:
if (HAVE_LLVM >= 0x0304) {
intrinsic_root = "llvm.round";
} else {
return lp_build_nearest_sse41(bld, a);
}
break;
case LP_BUILD_ROUND_FLOOR:
intrinsic_root = "llvm.floor";
break;
case LP_BUILD_ROUND_CEIL:
intrinsic_root = "llvm.ceil";
break;
case LP_BUILD_ROUND_TRUNCATE:
intrinsic_root = "llvm.trunc";
break;
}
lp_format_intrinsic(intrinsic, sizeof intrinsic, intrinsic_root, bld->vec_type);
return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
}
else /* (util_cpu_caps.has_altivec) */
return lp_build_round_altivec(bld, a, mode);
}
@ -1999,7 +2024,7 @@ lp_build_floor(struct lp_build_context *bld,
if (type.width != 32) {
char intrinsic[32];
util_snprintf(intrinsic, sizeof intrinsic, "llvm.floor.v%uf%u", type.length, type.width);
lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.floor", vec_type);
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
}
@ -2074,7 +2099,7 @@ lp_build_ceil(struct lp_build_context *bld,
if (type.width != 32) {
char intrinsic[32];
util_snprintf(intrinsic, sizeof intrinsic, "llvm.ceil.v%uf%u", type.length, type.width);
lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.ceil", vec_type);
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
}
@ -2411,15 +2436,8 @@ lp_build_sqrt(struct lp_build_context *bld,
assert(lp_check_value(type, a));
/* TODO: optimize the constant case */
assert(type.floating);
if (type.length == 1) {
util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.f%u", type.width);
}
else {
util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width);
}
lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.sqrt", vec_type);
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
}

View file

@ -74,7 +74,7 @@ lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), "");
res = LLVMBuildLoad(builder, ptr, "");
lp_set_load_alignment(res, src_type.width / 8);
LLVMSetAlignment(res, src_type.width / 8);
/* Truncate doubles to float */
if (src_type.floating && src_type.width == 64) {

View file

@ -112,7 +112,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
* gallium could not do anything else except 16 no matter what...
*/
if (!aligned) {
lp_set_load_alignment(res, 1);
LLVMSetAlignment(res, 1);
}
assert(src_width <= dst_width);

View file

@ -399,6 +399,20 @@ lp_build_init(void)
util_cpu_detect();
/* For simulating less capable machines */
#ifdef DEBUG
if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) {
assert(util_cpu_caps.has_sse2);
util_cpu_caps.has_sse3 = 0;
util_cpu_caps.has_ssse3 = 0;
util_cpu_caps.has_sse4_1 = 0;
util_cpu_caps.has_sse4_2 = 0;
util_cpu_caps.has_avx = 0;
util_cpu_caps.has_avx2 = 0;
util_cpu_caps.has_f16c = 0;
}
#endif
/* AMD Bulldozer AVX's throughput is the same as SSE2; and because using
* 8-wide vector needs more floating ops than 4-wide (due to padding), it is
* actually more efficient to use 4-wide vectors on this processor.
@ -456,17 +470,6 @@ lp_build_init(void)
gallivm_initialized = TRUE;
#if 0
/* For simulating less capable machines */
util_cpu_caps.has_sse3 = 0;
util_cpu_caps.has_ssse3 = 0;
util_cpu_caps.has_sse4_1 = 0;
util_cpu_caps.has_sse4_2 = 0;
util_cpu_caps.has_avx = 0;
util_cpu_caps.has_avx2 = 0;
util_cpu_caps.has_f16c = 0;
#endif
return TRUE;
}

View file

@ -77,14 +77,6 @@ func_pointer
gallivm_jit_function(struct gallivm_state *gallivm,
LLVMValueRef func);
void
lp_set_load_alignment(LLVMValueRef Inst,
unsigned Align);
void
lp_set_store_alignment(LLVMValueRef Inst,
unsigned Align);
#ifdef __cplusplus
}
#endif

View file

@ -45,6 +45,7 @@
#include "util/u_debug.h"
#include "util/u_string.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
@ -52,6 +53,48 @@
#include "lp_bld_pack.h"
void
lp_format_intrinsic(char *name,
size_t size,
const char *name_root,
LLVMTypeRef type)
{
unsigned length = 0;
unsigned width;
char c;
LLVMTypeKind kind = LLVMGetTypeKind(type);
if (kind == LLVMVectorTypeKind) {
length = LLVMGetVectorSize(type);
type = LLVMGetElementType(type);
kind = LLVMGetTypeKind(type);
}
switch (kind) {
case LLVMIntegerTypeKind:
c = 'i';
width = LLVMGetIntTypeWidth(type);
break;
case LLVMFloatTypeKind:
c = 'f';
width = 32;
break;
case LLVMDoubleTypeKind:
c = 'f';
width = 64;
break;
default:
assert(0);
}
if (length) {
util_snprintf(name, size, "%s.v%u%c%u", name_root, length, c, width);
} else {
util_snprintf(name, size, "%s.%c%u", name_root, c, width);
}
}
LLVMValueRef
lp_declare_intrinsic(LLVMModuleRef module,
const char *name,

View file

@ -47,6 +47,12 @@
#define LP_MAX_FUNC_ARGS 32
void
lp_format_intrinsic(char *name,
size_t size,
const char *name_root,
LLVMTypeRef type);
LLVMValueRef
lp_declare_intrinsic(LLVMModuleRef module,
const char *name,

View file

@ -39,6 +39,7 @@
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_init.h"
#include "lp_bld_intr.h"
#include "lp_bld_debug.h"
@ -314,35 +315,30 @@ lp_build_select(struct lp_build_context *bld,
mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
res = LLVMBuildSelect(builder, mask, a, b, "");
}
else if (HAVE_LLVM >= 0x0303) {
else if (LLVMIsConstant(mask) ||
LLVMGetInstructionOpcode(mask) == LLVMSExt) {
/* Generate a vector select.
*
* Using vector selects would avoid emitting intrinsics, but they weren't
* properly supported yet for a long time.
*
* LLVM 3.3 appears to reliably support it.
*
* LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test).
*
* LLVM 3.0 includes experimental support provided the -promote-elements
* options is passed to LLVM's command line (e.g., via
* llvm::cl::ParseCommandLineOptions), but resulting code quality is much
* worse, probably because some optimization passes don't know how to
* handle vector selects.
*
* See also:
* - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
* Using vector selects should avoid emitting intrinsics hence avoid
* hidering optimization passes, but vector selects weren't properly
* supported yet for a long time, and LLVM will generate poor code when
* the mask is not the result of a comparison.
*/
/* Convert the mask to a vector of booleans.
* XXX: There are two ways to do this. Decide what's best.
*
* XXX: In x86 the mask is controlled by the MSB, so if we shifted the
* mask by `type.width - 1`, LLVM should realize the mask is ready. Alas
* what really happens is that LLVM will emit two shifts back to back.
*/
if (1) {
LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
} else {
mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
if (0) {
LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
mask = LLVMBuildLShr(builder, mask, shift, "");
}
LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
res = LLVMBuildSelect(builder, mask, a, b, "");
}
else if (((util_cpu_caps.has_sse4_1 &&

View file

@ -178,30 +178,28 @@ gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
*>(library_info);
}
extern "C"
LLVMValueRef
lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
const char *Name)
{
return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name));
}
#if HAVE_LLVM < 0x0304
extern "C"
void
lp_set_load_alignment(LLVMValueRef Inst,
unsigned Align)
LLVMSetAlignmentBackport(LLVMValueRef V,
unsigned Bytes)
{
llvm::unwrap<llvm::LoadInst>(Inst)->setAlignment(Align);
switch (LLVMGetInstructionOpcode(V)) {
case LLVMLoad:
llvm::unwrap<llvm::LoadInst>(V)->setAlignment(Bytes);
break;
case LLVMStore:
llvm::unwrap<llvm::StoreInst>(V)->setAlignment(Bytes);
break;
default:
assert(0);
break;
}
}
extern "C"
void
lp_set_store_alignment(LLVMValueRef Inst,
unsigned Align)
{
llvm::unwrap<llvm::StoreInst>(Inst)->setAlignment(Align);
}
#endif
#if HAVE_LLVM < 0x0306

View file

@ -55,10 +55,6 @@ extern void
lp_set_target_options(void);
extern LLVMValueRef
lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
const char *Name);
extern int
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
struct lp_generated_code **OutCode,

View file

@ -1939,7 +1939,7 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
LLVMPointerType(vec4_bld.vec_type, 0), "");
border_color = LLVMBuildLoad(builder, border_color_ptr, "");
/* we don't have aligned type in the dynamic state unfortunately */
lp_set_load_alignment(border_color, 4);
LLVMSetAlignment(border_color, 4);
/*
* Instead of having some incredibly complex logic which will try to figure out

View file

@ -157,7 +157,7 @@ lp_build_pointer_get_unaligned(LLVMBuilderRef builder,
assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
res = LLVMBuildLoad(builder, element_ptr, "");
lp_set_load_alignment(res, alignment);
LLVMSetAlignment(res, alignment);
#ifdef DEBUG
lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index));
#endif
@ -188,5 +188,5 @@ lp_build_pointer_set_unaligned(LLVMBuilderRef builder,
LLVMValueRef instr;
element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
instr = LLVMBuildStore(builder, value, element_ptr);
lp_set_store_alignment(instr, alignment);
LLVMSetAlignment(instr, alignment);
}

View file

@ -146,6 +146,9 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
"NUM_CULLDIST_ENABLED",
"FS_EARLY_DEPTH_STENCIL",
"NEXT_SHADER",
"CS_FIXED_BLOCK_WIDTH",
"CS_FIXED_BLOCK_HEIGHT",
"CS_FIXED_BLOCK_DEPTH"
};
const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] =

View file

@ -88,6 +88,14 @@ tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex);
boolean
tgsi_is_shadow_target(unsigned target);
static inline boolean
tgsi_is_msaa_target(unsigned target)
{
return (target == TGSI_TEXTURE_2D_MSAA ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA);
}
#if defined __cplusplus
}
#endif

View file

@ -436,26 +436,26 @@ pipe_screen::get_compute_param.
``processor-arch-manufacturer-os`` that will be passed on to the compiler.
This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM
or PIPE_SHADER_IR_NATIVE for their preferred IR.
Value type: null-terminated string.
Value type: null-terminated string. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
for grid and block coordinates. Value type: ``uint64_t``.
for grid and block coordinates. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
units. Value type: ``uint64_t []``.
units. Value type: ``uint64_t []``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
units. Value type: ``uint64_t []``.
units. Value type: ``uint64_t []``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that
a single block can contain. Value type: ``uint64_t``.
a single block can contain. Value type: ``uint64_t``. Shader IR type dependent.
This may be less than the product of the components of MAX_BLOCK_SIZE and is
usually limited by the number of threads that can be resident simultaneously
on a compute unit.
* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
resource. Value type: ``uint64_t``.
resource. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
resource. Value type: ``uint64_t``.
resource. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
resource. Value type: ``uint64_t``.
resource. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
resource. Value type: ``uint64_t``.
resource. Value type: ``uint64_t``. Shader IR type dependent.
* ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object
allocation in bytes. Value type: ``uint64_t``.
* ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU

View file

@ -3220,6 +3220,12 @@ Which shader stage will MOST LIKELY follow after this shader when the shader
is bound. This is only a hint to the driver and doesn't have to be precise.
Only set for VS and TES.
TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH / HEIGHT / DEPTH
"""""""""""""""""""""""""""""""""""""""""""""""""""
Threads per block in each dimension, if known at compile time. If the block size
is known all three should be at least 1. If it is unknown they should all be set
to 0 or not set.
Texture Sampling and Texture Formats
------------------------------------

View file

@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen,
static int
ilo_get_compute_param(struct pipe_screen *screen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
void *ret)
{

View file

@ -25,15 +25,13 @@ if not env['embedded']:
env.Prepend(LIBS = [llvmpipe, gallium, mesautil])
tests = [
'arit',
'format',
'blend',
'conv',
'printf',
]
if not env['msvc']:
tests.append('arit')
for test in tests:
testname = 'lp_test_' + test
target = env.Program(

View file

@ -786,7 +786,7 @@ load_unswizzled_block(struct gallivm_state *gallivm,
dst[i] = LLVMBuildLoad(builder, dst_ptr, "");
lp_set_load_alignment(dst[i], dst_alignment);
LLVMSetAlignment(dst[i], dst_alignment);
}
}
@ -830,7 +830,7 @@ store_unswizzled_block(struct gallivm_state *gallivm,
src_ptr = LLVMBuildStore(builder, src[i], src_ptr);
lp_set_store_alignment(src_ptr, src_alignment);
LLVMSetAlignment(src_ptr, src_alignment);
}
}

View file

@ -272,6 +272,7 @@ const float fract_values[] = {
static const struct unary_test_t
unary_tests[] = {
{"abs", &lp_build_abs, &fabsf, exp2_values, Elements(exp2_values), 20.0 },
{"neg", &lp_build_negate, &negf, exp2_values, Elements(exp2_values), 20.0 },
{"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values), 20.0 },
{"log2", &lp_build_log2_safe, &log2f, log2_values, Elements(log2_values), 20.0 },

View file

@ -3,9 +3,9 @@ ENVYAS ?= envyas
all: gf100.asm.h gk104.asm.h gk110.asm.h gm107.asm.h
gf100.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@
$(ENVYAS) -a -W -mgf100 -Vgf100 $< -o $@
gk104.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@
$(ENVYAS) -a -W -mgf100 -Vgk104 $< -o $@
gk110.asm.h: %.asm.h: %.asm
$(ENVYAS) -a -W -mgk110 $< -o $@
gm107.asm.h: %.asm.h: %.asm

View file

@ -126,6 +126,7 @@ private:
void emitF2I();
void emitI2F();
void emitI2I();
void emitSEL();
void emitSHFL();
void emitDADD();
@ -893,6 +894,32 @@ CodeEmitterGM107::emitI2I()
emitGPR (0x00, insn->def(0));
}
void
CodeEmitterGM107::emitSEL()
{
switch (insn->src(1).getFile()) {
case FILE_GPR:
emitInsn(0x5ca00000);
emitGPR (0x14, insn->src(1));
break;
case FILE_MEMORY_CONST:
emitInsn(0x4ca00000);
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
break;
case FILE_IMMEDIATE:
emitInsn(0x38a00000);
emitIMMD(0x14, 19, insn->src(1));
break;
default:
assert(!"bad src1 file");
break;
}
emitPRED(0x27, insn->src(2));
emitGPR (0x08, insn->src(0));
emitGPR (0x00, insn->def(0));
}
void
CodeEmitterGM107::emitSHFL()
{
@ -2963,6 +2990,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
emitISETP();
}
break;
case OP_SELP:
emitSEL();
break;
case OP_PRESIN:
case OP_PREEX2:
emitRRO();

View file

@ -372,7 +372,8 @@ NV50LegalizeSSA::propagateWriteToOutput(Instruction *st)
return;
for (int s = 0; di->srcExists(s); ++s)
if (di->src(s).getFile() == FILE_IMMEDIATE)
if (di->src(s).getFile() == FILE_IMMEDIATE ||
di->src(s).getFile() == FILE_MEMORY_LOCAL)
return;
if (prog->getType() == Program::TYPE_GEOMETRY) {

View file

@ -23,6 +23,7 @@
#if NOUVEAU_DRIVER == 0xc0
# include "nvc0/nvc0_screen.h"
# include "nvc0/nvc0_3d.xml.h"
# include "nvc0/gm107_texture.xml.h"
#else
# include "nv50/nv50_screen.h"
# include "nv50/nv50_3d.xml.h"
@ -65,6 +66,7 @@
#define SF_A(sz) G80_TIC_0_COMPONENTS_SIZES_##sz
#define SF_B(sz) G200_TIC_0_COMPONENTS_SIZES_##sz
#define SF_C(sz) GF100_TIC_0_COMPONENTS_SIZES_##sz
#define SF_D(sz) GM107_TIC2_0_COMPONENTS_SIZES_##sz
#define SF(c, pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \
[PIPE_FORMAT_##pf] = { \
sf, { \
@ -236,6 +238,50 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
F3(C, BPTC_RGB_FLOAT, NONE, R, G, B, xx, FLOAT, BC6H_SF16, t),
F3(C, BPTC_RGB_UFLOAT, NONE, R, G, B, xx, FLOAT, BC6H_UF16, t),
#if NOUVEAU_DRIVER == 0xc0
F3(D, ETC1_RGB8, NONE, R, G, B, xx, UNORM, ETC2_RGB, t),
F3(D, ETC2_RGB8, NONE, R, G, B, xx, UNORM, ETC2_RGB, t),
F3(D, ETC2_SRGB8, NONE, R, G, B, xx, UNORM, ETC2_RGB, t),
C4(D, ETC2_RGB8A1, NONE, R, G, B, A, UNORM, ETC2_RGB_PTA, t),
C4(D, ETC2_SRGB8A1, NONE, R, G, B, A, UNORM, ETC2_RGB_PTA, t),
C4(D, ETC2_RGBA8, NONE, R, G, B, A, UNORM, ETC2_RGBA, t),
C4(D, ETC2_SRGBA8, NONE, R, G, B, A, UNORM, ETC2_RGBA, t),
F1(D, ETC2_R11_UNORM, NONE, R, xx, xx, xx, UNORM, EAC, t),
F1(D, ETC2_R11_SNORM, NONE, R, xx, xx, xx, SNORM, EAC, t),
F2(D, ETC2_RG11_UNORM, NONE, R, G, xx, xx, UNORM, EACX2, t),
F2(D, ETC2_RG11_SNORM, NONE, R, G, xx, xx, SNORM, EACX2, t),
C4(D, ASTC_4x4, NONE, R, G, B, A, UNORM, ASTC_2D_4X4, t),
C4(D, ASTC_5x4, NONE, R, G, B, A, UNORM, ASTC_2D_5X4, t),
C4(D, ASTC_5x5, NONE, R, G, B, A, UNORM, ASTC_2D_5X5, t),
C4(D, ASTC_6x5, NONE, R, G, B, A, UNORM, ASTC_2D_6X5, t),
C4(D, ASTC_6x6, NONE, R, G, B, A, UNORM, ASTC_2D_6X6, t),
C4(D, ASTC_8x5, NONE, R, G, B, A, UNORM, ASTC_2D_8X5, t),
C4(D, ASTC_8x6, NONE, R, G, B, A, UNORM, ASTC_2D_8X6, t),
C4(D, ASTC_8x8, NONE, R, G, B, A, UNORM, ASTC_2D_8X8, t),
C4(D, ASTC_10x5, NONE, R, G, B, A, UNORM, ASTC_2D_10X5, t),
C4(D, ASTC_10x6, NONE, R, G, B, A, UNORM, ASTC_2D_10X6, t),
C4(D, ASTC_10x8, NONE, R, G, B, A, UNORM, ASTC_2D_10X8, t),
C4(D, ASTC_10x10, NONE, R, G, B, A, UNORM, ASTC_2D_10X10, t),
C4(D, ASTC_12x10, NONE, R, G, B, A, UNORM, ASTC_2D_12X10, t),
C4(D, ASTC_12x12, NONE, R, G, B, A, UNORM, ASTC_2D_12X12, t),
C4(D, ASTC_4x4_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_4X4, t),
C4(D, ASTC_5x4_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_5X4, t),
C4(D, ASTC_5x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_5X5, t),
C4(D, ASTC_6x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_6X5, t),
C4(D, ASTC_6x6_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_6X6, t),
C4(D, ASTC_8x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_8X5, t),
C4(D, ASTC_8x6_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_8X6, t),
C4(D, ASTC_8x8_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_8X8, t),
C4(D, ASTC_10x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X5, t),
C4(D, ASTC_10x6_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X6, t),
C4(D, ASTC_10x8_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X8, t),
C4(D, ASTC_10x10_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X10, t),
C4(D, ASTC_12x10_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_12X10, t),
C4(D, ASTC_12x12_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_12X12, t),
#endif
C4(A, R32G32B32A32_FLOAT, RGBA32_FLOAT, R, G, B, A, FLOAT, R32_G32_B32_A32, IB),
C4(A, R32G32B32A32_UNORM, NONE, R, G, B, A, UNORM, R32_G32_B32_A32, T),
C4(A, R32G32B32A32_SNORM, NONE, R, G, B, A, SNORM, R32_G32_B32_A32, T),

View file

@ -368,6 +368,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
static int
nv50_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param, void *data)
{
struct nv50_screen *screen = nv50_screen(pscreen);

View file

@ -45,6 +45,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
unsigned sample_count,
unsigned bindings)
{
const struct util_format_description *desc = util_format_description(format);
if (sample_count > 8)
return false;
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
@ -65,6 +67,17 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
sample_count > 1)
return false;
/* Restrict ETC2 and ASTC formats here. These are only supported on GK20A.
*/
if ((desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
desc->layout == UTIL_FORMAT_LAYOUT_ASTC) &&
/* The claim is that this should work on GM107 but it doesn't. Need to
* test further and figure out if it's a nouveau issue or a HW one.
nouveau_screen(pscreen)->class_3d < GM107_3D_CLASS &&
*/
nouveau_screen(pscreen)->class_3d != NVEA_3D_CLASS)
return false;
/* transfers & shared are always supported */
bindings &= ~(PIPE_BIND_TRANSFER_READ |
PIPE_BIND_TRANSFER_WRITE |
@ -395,6 +408,7 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
static int
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param, void *data)
{
struct nvc0_screen *screen = nvc0_screen(pscreen);

View file

@ -250,6 +250,7 @@ gf100_create_texture_view(struct pipe_context *pipe,
uint32_t swz[4];
uint32_t width, height;
uint32_t depth;
uint32_t tex_fmt;
struct nv50_tic_entry *view;
struct nv50_miptree *mt;
bool tex_int;
@ -275,12 +276,13 @@ gf100_create_texture_view(struct pipe_context *pipe,
fmt = &nvc0_format_table[view->pipe.format];
tex_int = util_format_is_pure_integer(view->pipe.format);
tex_fmt = fmt->tic.format & 0x3f;
swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
(fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
(fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
(fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
@ -288,7 +290,8 @@ gf100_create_texture_view(struct pipe_context *pipe,
(swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
(swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
(swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
(swz[3] << G80_TIC_0_W_SOURCE__SHIFT);
(swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
address = mt->base.address;

View file

@ -499,7 +499,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
if (shader == PIPE_SHADER_COMPUTE) {
uint64_t max_const_buffer_size;
pscreen->get_compute_param(pscreen,
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
&max_const_buffer_size);
return max_const_buffer_size;

View file

@ -612,6 +612,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
}
static int r600_get_compute_param(struct pipe_screen *screen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
void *ret)
{
@ -678,7 +679,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
uint64_t *max_global_size = ret;
uint64_t max_mem_alloc_size;
r600_get_compute_param(screen,
r600_get_compute_param(screen, ir_type,
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
&max_mem_alloc_size);

View file

@ -467,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
uint64_t max_const_buffer_size;
pscreen->get_compute_param(pscreen,
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
&max_const_buffer_size);
return max_const_buffer_size;

View file

@ -195,7 +195,6 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
}
}
key->tex[i].texture_msaa = view->texture->nr_samples > 1;
if (!svga->curr.sampler[shader][i]->normalized_coords) {
assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
key->tex[i].width_height_idx = idx++;

View file

@ -99,7 +99,6 @@ struct svga_compile_key
unsigned unnormalized:1;
unsigned width_height_idx:5; /**< texture unit */
unsigned is_array:1;
unsigned texture_msaa:1; /**< A multisample texture? */
unsigned sprite_texgen:1;
unsigned swizzle_r:3;
unsigned swizzle_g:3;

View file

@ -5439,7 +5439,7 @@ emit_txf(struct svga_shader_emitter_v10 *emit,
const struct tgsi_full_instruction *inst)
{
const uint unit = inst->Src[1].Register.Index;
const unsigned msaa = emit->key.tex[unit].texture_msaa;
const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture);
int offsets[3];
struct tex_swizzle_info swz_info;

View file

@ -317,8 +317,10 @@ void trace_dump_compute_state(const struct pipe_compute_state *state)
trace_dump_struct_begin("pipe_compute_state");
trace_dump_member(uint, state, ir_type);
trace_dump_member_begin("prog");
if (state->prog) {
if (state->prog && state->ir_type == PIPE_SHADER_IR_TGSI) {
static char str[64 * 1024];
tgsi_dump_str(state->prog, 0, str, sizeof(str));
trace_dump_string(str);

View file

@ -175,6 +175,7 @@ trace_screen_get_paramf(struct pipe_screen *_screen,
static int
trace_screen_get_compute_param(struct pipe_screen *_screen,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param, void *data)
{
struct trace_screen *tr_scr = trace_screen(_screen);
@ -184,10 +185,11 @@ trace_screen_get_compute_param(struct pipe_screen *_screen,
trace_dump_call_begin("pipe_screen", "get_compute_param");
trace_dump_arg(ptr, screen);
trace_dump_arg(int, ir_type);
trace_dump_arg(int, param);
trace_dump_arg(ptr, data);
result = screen->get_compute_param(screen, param, data);
result = screen->get_compute_param(screen, ir_type, param, data);
trace_dump_ret(int, result);

View file

@ -368,6 +368,7 @@ enum pipe_flush_flags
#define PIPE_BARRIER_IMAGE (1 << 8)
#define PIPE_BARRIER_FRAMEBUFFER (1 << 9)
#define PIPE_BARRIER_STREAMOUT_BUFFER (1 << 10)
#define PIPE_BARRIER_GLOBAL_BUFFER (1 << 11)
/**
* Resource binding flags -- state tracker must specify in advance all

View file

@ -109,13 +109,16 @@ struct pipe_screen {
/**
* Query a compute-specific capability/parameter/limit.
* \param param one of PIPE_COMPUTE_CAP_x
* \param ret pointer to a preallocated buffer that will be
* initialized to the parameter value, or NULL.
* \return size in bytes of the parameter value that would be
* returned.
* \param ir_type shader IR type for which the param applies, or don't care
* if the param is not shader related
* \param param one of PIPE_COMPUTE_CAP_x
* \param ret pointer to a preallocated buffer that will be
* initialized to the parameter value, or NULL.
* \return size in bytes of the parameter value that would be
* returned.
*/
int (*get_compute_param)(struct pipe_screen *,
enum pipe_shader_ir ir_type,
enum pipe_compute_cap param,
void *ret);

View file

@ -276,7 +276,10 @@ union tgsi_immediate_data
#define TGSI_PROPERTY_NUM_CULLDIST_ENABLED 16
#define TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL 17
#define TGSI_PROPERTY_NEXT_SHADER 18
#define TGSI_PROPERTY_COUNT 19
#define TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH 19
#define TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT 20
#define TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH 21
#define TGSI_PROPERTY_COUNT 22
struct tgsi_property {
unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */

View file

@ -727,6 +727,7 @@ struct pipe_llvm_program_header
struct pipe_compute_state
{
enum pipe_shader_ir ir_type; /**< IR type contained in prog. */
const void *prog; /**< Compute program to be executed. */
unsigned req_local_mem; /**< Required size of the LOCAL resource. */
unsigned req_private_mem; /**< Required size of the PRIVATE resource. */

View file

@ -30,11 +30,12 @@ using namespace clover;
namespace {
template<typename T>
std::vector<T>
get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) {
int sz = pipe->get_compute_param(pipe, cap, NULL);
get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
pipe_compute_cap cap) {
int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
std::vector<T> v(sz / sizeof(T));
pipe->get_compute_param(pipe, cap, &v.front());
pipe->get_compute_param(pipe, ir_format, cap, &v.front());
return v;
}
}
@ -115,19 +116,19 @@ device::max_samplers() const {
cl_ulong
device::max_mem_global() const {
return get_compute_param<uint64_t>(pipe,
return get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
}
cl_ulong
device::max_mem_local() const {
return get_compute_param<uint64_t>(pipe,
return get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
}
cl_ulong
device::max_mem_input() const {
return get_compute_param<uint64_t>(pipe,
return get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
}
@ -146,30 +147,30 @@ device::max_const_buffers() const {
size_t
device::max_threads_per_block() const {
return get_compute_param<uint64_t>(
pipe, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
}
cl_ulong
device::max_mem_alloc_size() const {
return get_compute_param<uint64_t>(pipe,
return get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
}
cl_uint
device::max_clock_frequency() const {
return get_compute_param<uint32_t>(pipe,
return get_compute_param<uint32_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
}
cl_uint
device::max_compute_units() const {
return get_compute_param<uint32_t>(pipe,
return get_compute_param<uint32_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
}
bool
device::image_support() const {
return get_compute_param<uint32_t>(pipe,
return get_compute_param<uint32_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
}
@ -181,13 +182,15 @@ device::has_doubles() const {
std::vector<size_t>
device::max_block_size() const {
auto v = get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
auto v = get_compute_param<uint64_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
return { v.begin(), v.end() };
}
cl_uint
device::subgroup_size() const {
return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
return get_compute_param<uint32_t>(pipe, ir_format(),
PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
}
std::string
@ -209,7 +212,7 @@ device::ir_format() const {
std::string
device::ir_target() const {
std::vector<char> target = get_compute_param<char>(
pipe, PIPE_COMPUTE_CAP_IR_TARGET);
pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
return { target.data() };
}

View file

@ -89,6 +89,8 @@ kernel::launch(command_queue &q,
exec.sviews.size(), NULL);
q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
exec.samplers.size(), NULL);
q.pipe->memory_barrier(q.pipe, PIPE_BARRIER_GLOBAL_BUFFER);
exec.unbind();
}
@ -223,6 +225,7 @@ kernel::exec_context::bind(intrusive_ptr<command_queue> _q,
if (st)
_q->pipe->delete_compute_state(_q->pipe, st);
cs.ir_type = q->device().ir_format();
cs.prog = &(msec.data[0]);
cs.req_local_mem = mem_local;
cs.req_input_mem = input.size();

View file

@ -58,7 +58,9 @@ struct context {
uint64_t __v[4]; \
int __i, __n; \
\
__n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
__n = ctx->screen->get_compute_param(ctx->screen, \
PIPE_SHADER_IR_TGSI, \
c, __v); \
printf("%s: {", #c); \
\
for (__i = 0; __i < __n / sizeof(*__v); ++__i) \
@ -144,6 +146,7 @@ static void init_prog(struct context *ctx, unsigned local_sz,
struct pipe_context *pipe = ctx->pipe;
struct tgsi_token prog[1024];
struct pipe_compute_state cs = {
.ir_type = PIPE_SHADER_IR_TGSI,
.prog = prog,
.req_local_mem = local_sz,
.req_private_mem = private_sz,

View file

@ -924,6 +924,40 @@
</category>
<category name="GL_EXT_base_instance" number="203">
<function name="DrawArraysInstancedBaseInstanceEXT" es2="3.0"
alias="DrawArraysInstancedBaseInstance">
<param name="mode" type="GLenum"/>
<param name="first" type="GLint"/>
<param name="count" type="GLsizei"/>
<param name="instancecount" type="GLsizei"/>
<param name="baseinstance" type="GLuint"/>
</function>
<function name="DrawElementsInstancedBaseInstanceEXT" es2="3.0"
alias="DrawElementsInstancedBaseInstance">
<param name="mode" type="GLenum"/>
<param name="count" type="GLsizei"/>
<param name="type" type="GLenum"/>
<param name="indices" type="const GLvoid *"/>
<param name="instancecount" type="GLsizei"/>
<param name="baseinstance" type="GLuint"/>
</function>
<function name="DrawElementsInstancedBaseVertexBaseInstanceEXT" es2="3.0"
alias="DrawElementsInstancedBaseVertexBaseInstance">
<param name="mode" type="GLenum"/>
<param name="count" type="GLsizei"/>
<param name="type" type="GLenum"/>
<param name="indices" type="const GLvoid *"/>
<param name="instancecount" type="GLsizei"/>
<param name="basevertex" type="GLint"/>
<param name="baseinstance" type="GLuint"/>
</function>
</category>
<category name="GL_EXT_draw_elements_base_vertex" number="204">
<function name="DrawElementsBaseVertexEXT" alias="DrawElementsBaseVertex"

View file

@ -12741,7 +12741,7 @@
<enum name="POLYGON_OFFSET_CLAMP_EXT" value="0x8E1B">
<size name="Get" mode="get"/>
</enum>
<function name="PolygonOffsetClampEXT">
<function name="PolygonOffsetClampEXT" es1="1.0" es2="2.0">
<param name="factor" type="GLfloat"/>
<param name="units" type="GLfloat"/>
<param name="clamp" type="GLfloat"/>

View file

@ -148,6 +148,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
brw_fs_alloc_reg_sets(compiler);
brw_vec4_alloc_reg_set(compiler);
compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false);
compiler->scalar_stage[MESA_SHADER_VERTEX] =
devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;

View file

@ -92,6 +92,12 @@ struct brw_compiler {
bool scalar_stage[MESA_SHADER_STAGES];
struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
/**
* Apply workarounds for SIN and COS output range problems.
* This can negatively impact performance.
*/
bool precise_trig;
};
struct brw_compiler *

View file

@ -765,29 +765,27 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
inst->saturate = instr->dest.saturate;
break;
case nir_op_fsin: {
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);
if (instr->dest.saturate) {
inst->dst = result;
inst->saturate = true;
case nir_op_fsin:
if (!compiler->precise_trig) {
inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
} else {
bld.MUL(result, tmp, brw_imm_f(0.99997));
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);
inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
}
inst->saturate = instr->dest.saturate;
break;
}
case nir_op_fcos: {
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);
if (instr->dest.saturate) {
inst->dst = result;
inst->saturate = true;
case nir_op_fcos:
if (!compiler->precise_trig) {
inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
} else {
bld.MUL(result, tmp, brw_imm_f(0.99997));
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);
inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
}
inst->saturate = instr->dest.saturate;
break;
}
case nir_op_fddx:
if (fs_key->high_quality_derivatives) {

View file

@ -793,7 +793,8 @@ brw_render_target_supported(struct brw_context *brw,
/* Under some conditions, MSAA is not supported for formats whose width is
* more than 64 bits.
*/
if (rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
if (brw->gen < 8 &&
rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
/* Gen6: MSAA on >64 bit formats is unsupported. */
if (brw->gen <= 6)
return false;

View file

@ -1093,29 +1093,27 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
inst->saturate = instr->dest.saturate;
break;
case nir_op_fsin: {
src_reg tmp = src_reg(this, glsl_type::vec4_type);
inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
if (instr->dest.saturate) {
inst->dst = dst;
inst->saturate = true;
case nir_op_fsin:
if (!compiler->precise_trig) {
inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
} else {
emit(MUL(dst, tmp, brw_imm_f(0.99997)));
src_reg tmp = src_reg(this, glsl_type::vec4_type);
inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
}
inst->saturate = instr->dest.saturate;
break;
}
case nir_op_fcos: {
src_reg tmp = src_reg(this, glsl_type::vec4_type);
inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
if (instr->dest.saturate) {
inst->dst = dst;
inst->saturate = true;
case nir_op_fcos:
if (!compiler->precise_trig) {
inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
} else {
emit(MUL(dst, tmp, brw_imm_f(0.99997)));
src_reg tmp = src_reg(this, glsl_type::vec4_type);
inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
}
inst->saturate = instr->dest.saturate;
break;
}
case nir_op_idiv:
case nir_op_udiv:

View file

@ -27,6 +27,7 @@ EXT(APPLE_texture_max_level , dummy_true
EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002)
EXT(ARB_ES2_compatibility , ARB_ES2_compatibility , GLL, GLC, x , x , 2009)
EXT(ARB_ES3_1_compatibility , ARB_ES3_1_compatibility , x , GLC, x , x , 2014)
EXT(ARB_ES3_compatibility , ARB_ES3_compatibility , GLL, GLC, x , x , 2012)
EXT(ARB_arrays_of_arrays , ARB_arrays_of_arrays , GLL, GLC, x , x , 2012)
EXT(ARB_base_instance , ARB_base_instance , GLL, GLC, x , x , 2011)
@ -176,6 +177,7 @@ EXT(ATI_texture_float , ARB_texture_float
EXT(ATI_texture_mirror_once , ATI_texture_mirror_once , GLL, GLC, x , x , 2006)
EXT(EXT_abgr , dummy_true , GLL, GLC, x , x , 1995)
EXT(EXT_base_instance , ARB_base_instance , x , x , x , 30, 2014)
EXT(EXT_bgra , dummy_true , GLL, x , x , x , 1995)
EXT(EXT_blend_color , EXT_blend_color , GLL, x , x , x , 1995)
EXT(EXT_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
@ -213,7 +215,7 @@ EXT(EXT_packed_pixels , dummy_true
EXT(EXT_pixel_buffer_object , EXT_pixel_buffer_object , GLL, GLC, x , x , 2004)
EXT(EXT_point_parameters , EXT_point_parameters , GLL, x , x , x , 1997)
EXT(EXT_polygon_offset , dummy_true , GLL, x , x , x , 1995)
EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, x , x , 2014)
EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, ES1, ES2, 2014)
EXT(EXT_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009)
EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009)
EXT(EXT_rescale_normal , dummy_true , GLL, x , x , x , 1997)

View file

@ -135,6 +135,9 @@ descriptor=[
[ "MAX_LABEL_LENGTH", "CONST(MAX_LABEL_LENGTH), NO_EXTRA" ],
[ "MAX_DEBUG_GROUP_STACK_DEPTH", "CONST(MAX_DEBUG_GROUP_STACK_DEPTH), NO_EXTRA" ],
[ "DEBUG_GROUP_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
# GL_EXT_polygon_offset_clamp
[ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
]},
# Enums in OpenGL and GLES1
@ -532,7 +535,7 @@ descriptor=[
[ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
# GL_ARB_shader_storage_buffer_object / geometry shader
[ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ],
[ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ],
# GL_ARB_uniform_buffer_object / geometry shader
[ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
@ -857,9 +860,6 @@ descriptor=[
# GL_ARB_shader_image_load_store
[ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
# GL_EXT_polygon_offset_clamp
[ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
# GL_ARB_shader_storage_buffer_object
[ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
[ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],

View file

@ -2534,6 +2534,9 @@ struct gl_uniform_block
*/
bool IsShaderStorage;
/** Stages that reference this block */
uint8_t stageref;
/**
* Layout specified in the shader
*
@ -2834,16 +2837,6 @@ struct gl_shader_program
unsigned NumShaderStorageBlocks;
struct gl_uniform_block **ShaderStorageBlocks;
/**
* Indices into the BufferInterfaceBlocks[] array for each stage they're
* used in, or -1.
*
* This is used to maintain the Binding values of the stage's
* BufferInterfaceBlocks[] and to answer the
* GL_UNIFORM_BLOCK_REFERENCED_BY_*_SHADER queries.
*/
int *InterfaceBlockStageIndex[MESA_SHADER_STAGES];
/**
* Map of active uniform names to locations
*
@ -3786,6 +3779,7 @@ struct gl_extensions
GLboolean ANGLE_texture_compression_dxt;
GLboolean ARB_ES2_compatibility;
GLboolean ARB_ES3_compatibility;
GLboolean ARB_ES3_1_compatibility;
GLboolean ARB_arrays_of_arrays;
GLboolean ARB_base_instance;
GLboolean ARB_blend_func_extended;

View file

@ -101,31 +101,6 @@ _mesa_BindAttribLocation(GLuint program, GLuint index,
*/
}
static bool
is_active_attrib(const gl_shader_variable *var)
{
if (!var)
return false;
switch (var->mode) {
case ir_var_shader_in:
return var->location != -1;
case ir_var_system_value:
/* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes):
* "For GetActiveAttrib, all active vertex shader input variables
* are enumerated, including the special built-in inputs gl_VertexID
* and gl_InstanceID."
*/
return var->location == SYSTEM_VALUE_VERTEX_ID ||
var->location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE ||
var->location == SYSTEM_VALUE_INSTANCE_ID;
default:
return false;
}
}
void GLAPIENTRY
_mesa_GetActiveAttrib(GLuint program, GLuint desired_index,
GLsizei maxLength, GLsizei * length, GLint * size,
@ -166,20 +141,8 @@ _mesa_GetActiveAttrib(GLuint program, GLuint desired_index,
const gl_shader_variable *const var = RESOURCE_VAR(res);
if (!is_active_attrib(var))
return;
const char *var_name = var->name;
/* Since gl_VertexID may be lowered to gl_VertexIDMESA, we need to
* consider gl_VertexIDMESA as gl_VertexID for purposes of checking
* active attributes.
*/
if (var->mode == ir_var_system_value &&
var->location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
var_name = "gl_VertexID";
}
_mesa_copy_string(name, maxLength, length, var_name);
if (size)
@ -224,19 +187,7 @@ _mesa_GetAttribLocation(GLuint program, const GLchar * name)
if (!res)
return -1;
GLint loc = program_resource_location(shProg, res, name, array_index);
/* The extra check against against 0 is made because of builtin-attribute
* locations that have offset applied. Function program_resource_location
* can return built-in attribute locations < 0 and glGetAttribLocation
* cannot be used on "conventional" attributes.
*
* From page 95 of the OpenGL 3.0 spec:
*
* "If name is not an active attribute, if name is a conventional
* attribute, or if an error occurs, -1 will be returned."
*/
return (loc >= 0) ? loc : -1;
return program_resource_location(shProg, res, name, array_index);
}
unsigned
@ -251,8 +202,7 @@ _mesa_count_active_attribs(struct gl_shader_program *shProg)
unsigned count = 0;
for (unsigned j = 0; j < shProg->NumProgramResourceList; j++, res++) {
if (res->Type == GL_PROGRAM_INPUT &&
res->StageReferences & (1 << MESA_SHADER_VERTEX) &&
is_active_attrib(RESOURCE_VAR(res)))
res->StageReferences & (1 << MESA_SHADER_VERTEX))
count++;
}
return count;
@ -410,25 +360,12 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name)
if (!res)
return -1;
GLint loc = program_resource_location(shProg, res, name, array_index);
/* The extra check against against 0 is made because of builtin-attribute
* locations that have offset applied. Function program_resource_location
* can return built-in attribute locations < 0 and glGetFragDataLocation
* cannot be used on "conventional" attributes.
*
* From page 95 of the OpenGL 3.0 spec:
*
* "If name is not an active attribute, if name is a conventional
* attribute, or if an error occurs, -1 will be returned."
*/
return (loc >= 0) ? loc : -1;
return program_resource_location(shProg, res, name, array_index);
}
const char*
_mesa_program_resource_name(struct gl_program_resource *res)
{
const gl_shader_variable *var;
switch (res->Type) {
case GL_UNIFORM_BLOCK:
case GL_SHADER_STORAGE_BLOCK:
@ -436,13 +373,6 @@ _mesa_program_resource_name(struct gl_program_resource *res)
case GL_TRANSFORM_FEEDBACK_VARYING:
return RESOURCE_XFV(res)->Name;
case GL_PROGRAM_INPUT:
var = RESOURCE_VAR(res);
/* Special case gl_VertexIDMESA -> gl_VertexID. */
if (var->mode == ir_var_system_value &&
var->location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
return "gl_VertexID";
}
/* fallthrough */
case GL_PROGRAM_OUTPUT:
return RESOURCE_VAR(res)->name;
case GL_UNIFORM:
@ -850,34 +780,31 @@ program_resource_location(struct gl_shader_program *shProg,
struct gl_program_resource *res, const char *name,
unsigned array_index)
{
/* Built-in locations should report GL_INVALID_INDEX. */
if (is_gl_identifier(name))
return GL_INVALID_INDEX;
/* VERT_ATTRIB_GENERIC0 and FRAG_RESULT_DATA0 are decremented as these
* offsets are used internally to differentiate between built-in attributes
* and user-defined attributes.
*/
switch (res->Type) {
case GL_PROGRAM_INPUT: {
const gl_shader_variable *var = RESOURCE_VAR(res);
if (var->location == -1)
return -1;
/* If the input is an array, fail if the index is out of bounds. */
if (array_index > 0
&& array_index >= var->type->length) {
return -1;
}
return (var->location +
(array_index * var->type->without_array()->matrix_columns) -
VERT_ATTRIB_GENERIC0);
return var->location +
(array_index * var->type->without_array()->matrix_columns);
}
case GL_PROGRAM_OUTPUT:
if (RESOURCE_VAR(res)->location == -1)
return -1;
/* If the output is an array, fail if the index is out of bounds. */
if (array_index > 0
&& array_index >= RESOURCE_VAR(res)->type->length) {
return -1;
}
return RESOURCE_VAR(res)->location + array_index - FRAG_RESULT_DATA0;
return RESOURCE_VAR(res)->location + array_index;
case GL_UNIFORM:
/* If the uniform is built-in, fail. */
if (RESOURCE_UNI(res)->builtin)
@ -999,7 +926,7 @@ is_resource_referenced(struct gl_shader_program *shProg,
return RESOURCE_ATC(res)->StageReferences[stage];
if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK)
return shProg->InterfaceBlockStageIndex[stage][index] != -1;
return shProg->BufferInterfaceBlocks[index].stageref & (1 << stage);
return res->StageReferences & (1 << stage);
}

View file

@ -295,10 +295,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
ralloc_free(shProg->BufferInterfaceBlocks);
shProg->BufferInterfaceBlocks = NULL;
shProg->NumBufferInterfaceBlocks = 0;
for (i = 0; i < MESA_SHADER_STAGES; i++) {
ralloc_free(shProg->InterfaceBlockStageIndex[i]);
shProg->InterfaceBlockStageIndex[i] = NULL;
}
ralloc_free(shProg->AtomicBuffers);
shProg->AtomicBuffers = NULL;

View file

@ -2064,6 +2064,9 @@ const struct function gles11_functions_possible[] = {
{ "glObjectLabelKHR", 11, -1 },
{ "glObjectPtrLabelKHR", 11, -1 },
/* GL_EXT_polygon_offset_clamp */
{ "glPolygonOffsetClampEXT", 11, -1 },
{ NULL, 0, -1 }
};
@ -2300,6 +2303,9 @@ const struct function gles2_functions_possible[] = {
{ "glObjectLabelKHR", 20, -1 },
{ "glObjectPtrLabelKHR", 20, -1 },
/* GL_EXT_polygon_offset_clamp */
{ "glPolygonOffsetClampEXT", 11, -1 },
{ NULL, 0, -1 }
};
@ -2470,6 +2476,11 @@ const struct function gles3_functions_possible[] = {
{ "glDisableiOES", 30, -1 },
{ "glIsEnablediOES", 30, -1 },
/* GL_EXT_base_instance */
{ "glDrawArraysInstancedBaseInstanceEXT", 30, -1 },
{ "glDrawElementsInstancedBaseInstanceEXT", 30, -1 },
{ "glDrawElementsInstancedBaseVertexBaseInstanceEXT", 30, -1 },
{ NULL, 0, -1 }
};

View file

@ -765,6 +765,11 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target,
RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_UNORM);
break;
case GL_STENCIL_INDEX:
case GL_STENCIL_INDEX8:
RETURN_IF_SUPPORTED(MESA_FORMAT_S_UINT8);
break;
default:
/* For non-generic compressed format we assert two things:
*

View file

@ -351,8 +351,55 @@ compute_version(const struct gl_extensions *extensions,
extensions->ARB_shading_language_packing &&
extensions->ARB_texture_compression_bptc &&
extensions->ARB_transform_feedback_instanced);
const bool ver_4_3 = (ver_4_2 &&
consts->GLSLVersion >= 430 &&
extensions->ARB_ES3_compatibility &&
extensions->ARB_arrays_of_arrays &&
extensions->ARB_compute_shader &&
extensions->ARB_copy_image &&
extensions->ARB_explicit_uniform_location &&
extensions->ARB_fragment_layer_viewport &&
extensions->ARB_framebuffer_no_attachments &&
extensions->ARB_internalformat_query2 &&
/* extensions->ARB_robust_buffer_access_behavior */ 0 &&
extensions->ARB_shader_image_size &&
extensions->ARB_shader_storage_buffer_object &&
extensions->ARB_stencil_texturing &&
extensions->ARB_texture_buffer_range &&
extensions->ARB_texture_query_levels &&
extensions->ARB_texture_view);
const bool ver_4_4 = (ver_4_3 &&
consts->GLSLVersion >= 440 &&
extensions->ARB_buffer_storage &&
extensions->ARB_clear_texture &&
extensions->ARB_enhanced_layouts &&
extensions->ARB_query_buffer_object &&
extensions->ARB_texture_mirror_clamp_to_edge &&
extensions->ARB_texture_stencil8 &&
extensions->ARB_vertex_type_10f_11f_11f_rev);
const bool ver_4_5 = (ver_4_4 &&
consts->GLSLVersion >= 450 &&
extensions->ARB_ES3_1_compatibility &&
extensions->ARB_clip_control &&
extensions->ARB_conditional_render_inverted &&
/* extensions->ARB_cull_distance */ 0 &&
extensions->ARB_derivative_control &&
extensions->ARB_shader_texture_image_samples &&
extensions->NV_texture_barrier);
if (ver_4_2) {
if (ver_4_5) {
major = 4;
minor = 5;
}
else if (ver_4_4) {
major = 4;
minor = 4;
}
else if (ver_4_3) {
major = 4;
minor = 3;
}
else if (ver_4_2) {
major = 4;
minor = 2;
}

View file

@ -248,54 +248,51 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
paramList->Size = 0;
return -1;
}
else {
GLuint i, j;
paramList->NumParameters = oldNum + sz4;
GLuint i, j;
memset(&paramList->Parameters[oldNum], 0,
sz4 * sizeof(struct gl_program_parameter));
paramList->NumParameters = oldNum + sz4;
for (i = 0; i < sz4; i++) {
struct gl_program_parameter *p = paramList->Parameters + oldNum + i;
p->Name = name ? strdup(name) : NULL;
p->Type = type;
p->Size = size;
p->DataType = datatype;
if (values) {
if (size >= 4) {
COPY_4V(paramList->ParameterValues[oldNum + i], values);
memset(&paramList->Parameters[oldNum], 0,
sz4 * sizeof(struct gl_program_parameter));
for (i = 0; i < sz4; i++) {
struct gl_program_parameter *p = paramList->Parameters + oldNum + i;
p->Name = name ? strdup(name) : NULL;
p->Type = type;
p->Size = size;
p->DataType = datatype;
if (values) {
if (size >= 4) {
COPY_4V(paramList->ParameterValues[oldNum + i], values);
} else {
/* copy 1, 2 or 3 values */
GLuint remaining = size % 4;
assert(remaining < 4);
for (j = 0; j < remaining; j++) {
paramList->ParameterValues[oldNum + i][j].f = values[j].f;
}
else {
/* copy 1, 2 or 3 values */
GLuint remaining = size % 4;
assert(remaining < 4);
for (j = 0; j < remaining; j++) {
paramList->ParameterValues[oldNum + i][j].f = values[j].f;
}
/* fill in remaining positions with zeros */
for (; j < 4; j++) {
paramList->ParameterValues[oldNum + i][j].f = 0.0f;
}
/* fill in remaining positions with zeros */
for (; j < 4; j++) {
paramList->ParameterValues[oldNum + i][j].f = 0.0f;
}
values += 4;
p->Initialized = GL_TRUE;
}
else {
/* silence valgrind */
for (j = 0; j < 4; j++)
paramList->ParameterValues[oldNum + i][j].f = 0;
}
size -= 4;
values += 4;
p->Initialized = GL_TRUE;
} else {
/* silence valgrind */
for (j = 0; j < 4; j++)
paramList->ParameterValues[oldNum + i][j].f = 0;
}
if (state) {
for (i = 0; i < STATE_LENGTH; i++)
paramList->Parameters[oldNum].StateIndexes[i] = state[i];
}
return (GLint) oldNum;
size -= 4;
}
if (state) {
for (i = 0; i < STATE_LENGTH; i++)
paramList->Parameters[oldNum].StateIndexes[i] = state[i];
}
return (GLint) oldNum;
}

View file

@ -1124,14 +1124,15 @@ void st_init_extensions(struct pipe_screen *screen,
if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
uint64_t grid_size[3], block_size[3];
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
grid_size);
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
block_size);
screen->get_compute_param(screen,
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size);
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size);
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
&consts->MaxComputeWorkGroupInvocations);
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
&consts->MaxComputeSharedMemorySize);
for (i = 0; i < 3; i++) {

View file

@ -5935,6 +5935,20 @@ find_array(unsigned attr, struct array_decl *arrays, unsigned count,
return false;
}
static void
emit_compute_block_size(const struct gl_program *program,
struct ureg_program *ureg) {
const struct gl_compute_program *cp =
(const struct gl_compute_program *)program;
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
cp->LocalSize[0]);
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
cp->LocalSize[1]);
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
cp->LocalSize[2]);
}
/**
* Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
* \param program the program to translate
@ -6180,6 +6194,10 @@ st_translate_program(
}
}
if (procType == TGSI_PROCESSOR_COMPUTE) {
emit_compute_block_size(proginfo, ureg);
}
/* Declare address register.
*/
if (program->num_address_regs > 0) {

View file

@ -1463,6 +1463,7 @@ st_translate_compute_program(struct st_context *st,
st_translate_program_common(st, &stcp->Base.Base, stcp->glsl_to_tgsi, ureg,
TGSI_PROCESSOR_COMPUTE, &prog);
stcp->tgsi.ir_type = PIPE_SHADER_IR_TGSI;
stcp->tgsi.prog = prog.tokens;
stcp->tgsi.req_local_mem = stcp->Base.SharedSize;
stcp->tgsi.req_private_mem = 0;

View file

@ -1739,7 +1739,7 @@ vbo_initialize_exec_dispatch(const struct gl_context *ctx,
}
}
if (_mesa_is_desktop_gl(ctx)) {
if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) {
SET_DrawArraysInstancedBaseInstance(exec, vbo_exec_DrawArraysInstancedBaseInstance);
SET_DrawElementsInstancedBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseInstance);
SET_DrawElementsInstancedBaseVertexBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseVertexBaseInstance);