mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-25 20:00:37 +02:00
Merge remote-tracking branch 'public/master' into vulkan
This commit is contained in:
commit
6a04968784
84 changed files with 2249 additions and 1849 deletions
|
|
@ -16,6 +16,13 @@
|
|||
|
||||
<h1>News</h1>
|
||||
|
||||
<h2>April 4, 2016</h2>
|
||||
<p>
|
||||
<a href="relnotes/11.2.0.html">Mesa 11.2.0</a> is released. This is a
|
||||
new development release. See the release notes for more information
|
||||
about the release.
|
||||
</p>
|
||||
|
||||
<h2>February 10, 2016</h2>
|
||||
<p>
|
||||
<a href="relnotes/11.1.2.html">Mesa 11.1.2</a> is released.
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
|
|||
</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="relnotes/11.2.0.html">11.2.0 release notes</a>
|
||||
<li><a href="relnotes/11.1.2.html">11.1.2 release notes</a>
|
||||
<li><a href="relnotes/11.0.9.html">11.0.9 release notes</a>
|
||||
<li><a href="relnotes/11.1.1.html">11.1.1 release notes</a>
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.2.0 Release Notes / TBD</h1>
|
||||
<h1>Mesa 11.2.0 Release Notes / 4 April 2016</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.2.0 is a new development release.
|
||||
|
|
@ -33,7 +33,8 @@ because compatibility contexts are not supported.
|
|||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD.
|
||||
dea3d8143929aad5c24ef0993ddb05807b30c284b488fc62903adfcc1c127887 mesa-11.2.0.tar.gz
|
||||
1c1fed2674abf3f16ed2623e9a5694d6752c293194e18462ebc644a19cfaafb2 mesa-11.2.0.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
|
@ -70,7 +71,217 @@ Note: some of the new features are only available with certain drivers.
|
|||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
TBD.
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27512">Bug 27512</a> - Illegal instruction _mesa_x86_64_transform_points4_general</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75165">Bug 75165</a> - compute.c:464:49: error: function definition is not allowed here</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79783">Bug 79783</a> - Distorted output in obs-studio where other vendors "work"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89330">Bug 89330</a> - piglit glsl-1.50 invariant-qualifier-in-out-block-01 regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89969">Bug 89969</a> - nouveau: add support for chunk decoding in order to support vaapi (st/va)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90348">Bug 90348</a> - Spilling failure of b96 merged value</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91806">Bug 91806</a> - configure does not test whether assembler supports sse4.1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91927">Bug 91927</a> - [SKL] [regression] piglit compressed textures tests fail with kernel upgrade</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92193">Bug 92193</a> - [SKL] ES2-CTS.gtf.GL2ExtensionTests.compressed_astc_texture.compressed_astc_texture fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92229">Bug 92229</a> - [APITRACE] SOMA have serious graphical errors</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92233">Bug 92233</a> - Unigine Heaven 4.0 silhuette run</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92363">Bug 92363</a> - [BSW/BDW] ogles1conform Gets test fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92438">Bug 92438</a> - Segfault in pushbuf_kref when running the android emulator (qemu) on nv50</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92589">Bug 92589</a> - [BDW BSW SKL CTS] ES31-CTS.texture_gather.* GPU_HANG</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92595">Bug 92595</a> - [HSW,BDW,SKL][GLES 3.1 CTS] Big difference in the results for the ES31-CTS.shader_bitfield_operation.* tests</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92609">Bug 92609</a> - [BDW, BSW] piglit sampling-2d-array-as-2d-layer fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92687">Bug 92687</a> - Add support for ARB_internalformat_query2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92706">Bug 92706</a> - glBlitFramebuffer refuses to blit RGBA to RGB with MSAA</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92709">Bug 92709</a> - "LLVM triggered Diagnostic Handler: unsupported call to function ldexpf in main" when starting race in stuntrally</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92743">Bug 92743</a> - Centroid shouldn't have to match between the FS and the VS</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92759">Bug 92759</a> - [Regression, bisected] Visuals without alpha bits are not sRGB-capable</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92849">Bug 92849</a> - [IVB HSW BDW] piglit image load/store load-from-cleared-image.shader_test fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92909">Bug 92909</a> - Offset/alignment issue with layout std140 and vec3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93004">Bug 93004</a> - Guild Wars 2 crash on nouveau DX11 cards</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93048">Bug 93048</a> - [CTS regression] mesa af2723 breaks GL Conformance for debug extension</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93063">Bug 93063</a> - drm_helper.h:227:1: error: static declaration of ‘pipe_virgl_create_screen’ follows non-static declaration</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93091">Bug 93091</a> - [opencl] segfault when running any opencl programs (like clinfo)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93092">Bug 93092</a> - lp_test_format regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93126">Bug 93126</a> - wrongly claim supporting GL_EXT_texture_rg</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93180">Bug 93180</a> - [regression] arb_separate_shader_objects.active sampler conflict fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93189">Bug 93189</a> - "./util/u_inlines.h", line 83: operands have incompatible types: void ":" int</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93215">Bug 93215</a> - [Regression bisected] Ogles1conform Automatic mipmap generation test is fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93235">Bug 93235</a> - [regression] dispatch sanity broken by GetPointerv</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93257">Bug 93257</a> - [SKL, bisected] ASTC dEQP tests segfault</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93264">Bug 93264</a> - Tonga VM Faults since llvm ScheduleDAGInstrs: Rework schedule graph builder.</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93266">Bug 93266</a> - gl_arb_shading_language_420pack does not allow binding of image variables</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93300">Bug 93300</a> - Two Worlds 2 renders water incorrectly</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93312">Bug 93312</a> - [SKL][GLES 3.1 CTS] ES31-CTS.layout_binding* GPU_HANG</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93320">Bug 93320</a> - [HSW,BDW,SKL][GLES 3.1 CTS] ES31-CTS.vertex_attrib_binding.advanced-bindingUpdate fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93322">Bug 93322</a> - [HSW,BDW,SKL][GLES 3.1 CTS] ES31-CTS.compute_shader.resource-ubo fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93323">Bug 93323</a> - [HSW,BDW,SKL][GLES 3.1 CTS]ES31-CTS.shader_image_load_store.basic-allTargets-store-fs fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93325">Bug 93325</a> - [HSW,BDW,SKL]ES31-CTS.explicit_uniform_location.uniform-loc-* 2 tests fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93339">Bug 93339</a> - glLinkProgram() should fail when a varying is never written to in a previous stage</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93348">Bug 93348</a> - [HSW,BDW,SKL][GLES 3.1 CTS] ES31-CTS.compute_shader.* segfault</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93358">Bug 93358</a> - [HSW] Unreal Elemental demo - assertion error in copy_image_with_blitter</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93387">Bug 93387</a> - inverse() shouldn’t be exposed in GLSL 1.20 and 1.30</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93388">Bug 93388</a> - [i965, regression, bisection] MESA_FORMAT_B8G8R8X8_SRGB changes break kwin</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93407">Bug 93407</a> - [SKL][GLES 3.1 CTS]ES31-CTS.compute_shader.resources-texture fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93410">Bug 93410</a> - [BDW,SKL][GLES 3.1 CTS]ES31-CTS.shader_image_load_store.negative-linkErrors fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93418">Bug 93418</a> - Geometry Shaders output wrong vertices on Sandy Bridge</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93426">Bug 93426</a> - [SKL,BDW,BSW,BXT] CTS regression: es2-cts.gtf.gl2fixedtests.buffer_objects.buffer_object,s</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93524">Bug 93524</a> - Clover doesn't build</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93526">Bug 93526</a> - GfxBench 4 tessellation demos misrender</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93532">Bug 93532</a> - [HSW,BDW,SKL][GLES 3.1 CTS] ES31-CTS.compute_shader.*. Regression, bisected.</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93540">Bug 93540</a> - [BISECTED, HSW] Rendering issue in Heaven (and other benchmarks)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93560">Bug 93560</a> - opt_combine_constants failing fabsf(reg->f) == table.imm[i].val assertion</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93599">Bug 93599</a> - Strange green flashes with "Metro: Last Light Redux" + "Metro 2033 Redux" with Intel Mesa driver</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93648">Bug 93648</a> - Random lines being rendered when playing Dolphin (geometry shaders related, w/ apitrace)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93650">Bug 93650</a> - GL_ARB_separate_shader_objects is buggy (PCSX2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93667">Bug 93667</a> - Crash in eglCreateImageKHR with huge texture size</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93696">Bug 93696</a> - [HSW,BDW;SKL][GLES 3.1 CTS]ES31-CTS.explicit_uniform_location.uniform-loc-mix-with-implicit-max-* fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93700">Bug 93700</a> - [SKL, regression] deqp-gles2.functional.texture.completeness</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93717">Bug 93717</a> - Meta mipmap generation can corrupt texture state</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93722">Bug 93722</a> - Segfault when compiling shader with a subroutine that takes a parameter</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93725">Bug 93725</a> - [HSW, regression, bisected] ES31-CTS.texture_gather.*depth*</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93731">Bug 93731</a> - glUniformSubroutinesuiv segfaults when subroutine uniform is bound to a specific location</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93761">Bug 93761</a> - A conditional discard in a fragment shader causes no depth writing at all</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93790">Bug 93790</a> - [HSW] Use after free with compute programs</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93792">Bug 93792</a> - [HSW] intel_mipmap_tree.c:1325: intel_miptree_copy_slice: Assertion `src_mt->format == dst_mt->format</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93813">Bug 93813</a> - Incorrect viewport range when GL_CLIP_ORIGIN is GL_UPPER_LEFT</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93840">Bug 93840</a> - [i965] Alien: Isolation fails with GL_ARB_compute_shader enabled</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93862">Bug 93862</a> - [Bisected] "drm/amdgpu: fix amdgpu_bo_pin_restricted VRAM placing v2" is bad</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93878">Bug 93878</a> - [llvmpipe][softpipe] piglit arb_gpu_shader_fp64-double-gettransformfeedbackvarying regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93957">Bug 93957</a> - [HSW] Mishandling of sample count when using an attachment-less framebuffer (assertion error)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93961">Bug 93961</a> - virgl build failure after 2016-02-01 changes - no previous prototype for 'virgl_drm_winsys_create'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93962">Bug 93962</a> - [HSW, regression, bisected, CTS] ES2-CTS.gtf.GL2FixedTests.scissor.scissor - segfault/asserts</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93989">Bug 93989</a> - build: flex-2.5.39 seems to be failing for glsl_lexer.ll</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94016">Bug 94016</a> - make check MesaExtensionsTest.AlphabeticallySorted regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94019">Bug 94019</a> - [bisected] 3D acceleration broken with gallium/radeon: just get num_tile_pipes from the winsys</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94050">Bug 94050</a> - test_vec4_register_coalesce regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94073">Bug 94073</a> - Miscompilation of abs_vec3_vert_xvary_ref.vert in WebGL conformance</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94081">Bug 94081</a> - [HSW] compute shader shared var + atomic op = fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94088">Bug 94088</a> - [llvmpipe] SIGFPE pthread_barrier_destroy.c:40</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94091">Bug 94091</a> - Tonga unreal elemental segfault since radeonsi: put image, fmask, and sampler descriptors into one array</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94100">Bug 94100</a> - [HSW] compute indirect dispatch with 0 work groups causes gpu hang</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94134">Bug 94134</a> - [regression] piglit.spec.arb_texture_view.sampling-2d-array-as-2d-layer assertion</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94139">Bug 94139</a> - [regression, HSW, IVB] piglit.spec.arb_compute_shader.minmax</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94150">Bug 94150</a> - UE4 Suntemple rendering errors</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94186">Bug 94186</a> - Crash when launching glxinfo and World of Warcraft with RV790</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94188">Bug 94188</a> - define (or undef) defined behaves stupidly</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94193">Bug 94193</a> - [llvmpipe] Line antialiasing looks different when GL_LINE_STIPPLE is enabled with pattern 0xffff</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94199">Bug 94199</a> - Shader abort/crash</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94253">Bug 94253</a> - [llvmpipe] piglit gl-1.0-swapbuffers-behavior regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94254">Bug 94254</a> - [llvmpipe] [softpipe] piglit read-front regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94257">Bug 94257</a> - [softpipe] piglit glx-copy-sub-buffer regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94274">Bug 94274</a> - [swrast] piglit arb_occlusion_query2-render regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94284">Bug 94284</a> - [radeonsi] outlast segfault on start</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94388">Bug 94388</a> - r600_blit.c:281: r600_decompress_depth_textures: Assertion `tex->is_depth && !tex->is_flushing_texture' failed.</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94412">Bug 94412</a> - Trine 3 misrender</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94481">Bug 94481</a> - softpipe - access violation in img_filter_2d_nearest</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94524">Bug 94524</a> - Wrong gl_TessLevelOuter interpretation for isolines</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94595">Bug 94595</a> - [Mesa AMD&swrast] Texture views attached as framebuffers return their viewed tecture's color encoding and render incorrectly</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
|
|
@ -78,7 +289,7 @@ Microsoft Visual Studio 2013 or later is now required for building
|
|||
on Windows.
|
||||
Previously, Visual Studio 2008 and later were supported.
|
||||
|
||||
TBD.
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
|
|
|
|||
|
|
@ -49,9 +49,10 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_shader_image_load_store on radeonsi, softpipe</li>
|
||||
<li>GL_ARB_shader_image_size on radeonsi</li>
|
||||
<li>GL_ATI_fragment_shader on all Gallium drivers</li>
|
||||
<li>GL_EXT_base_instance on all drivers that support GL_ARB_base_instance</li>
|
||||
<li>GL_OES_draw_buffers_indexed and GL_EXT_draw_buffers_indexed on all drivers that support GL_ARB_draw_buffers_blend</li>
|
||||
<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
|
||||
<li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li>
|
||||
<li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
|
||||
</ul>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
|
|
|||
|
|
@ -178,11 +178,17 @@ typedef struct _RGNDATA {
|
|||
#undef WINAPI
|
||||
#endif /* WINAPI*/
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#define WINAPI __attribute__((ms_abi))
|
||||
#else /* x86_64 */
|
||||
#define WINAPI __attribute__((__stdcall__))
|
||||
#endif /* x86_64 */
|
||||
#ifdef __GNUC__
|
||||
#if (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64)
|
||||
#define WINAPI __attribute__((ms_abi))
|
||||
#elif defined(__i386) || defined(_M_IX86)
|
||||
#define WINAPI __attribute__((__stdcall__))
|
||||
#else /* neither amd64 nor i386 */
|
||||
#define WINAPI
|
||||
#endif
|
||||
#else /* __GNUC__ */
|
||||
#define WINAPI
|
||||
#endif
|
||||
|
||||
/* Implementation caps */
|
||||
#define D3DPRESENT_BACK_BUFFERS_MAX 3
|
||||
|
|
|
|||
|
|
@ -3326,6 +3326,7 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
|
|||
+ qual->flags.q.depth_less
|
||||
+ qual->flags.q.depth_unchanged;
|
||||
if (depth_layout_count > 0
|
||||
&& !state->is_version(420, 0)
|
||||
&& !state->AMD_conservative_depth_enable
|
||||
&& !state->ARB_conservative_depth_enable) {
|
||||
_mesa_glsl_error(loc, state,
|
||||
|
|
@ -3708,7 +3709,8 @@ get_variable_being_redeclared(ir_variable *var, YYLTYPE loc,
|
|||
earlier->data.interpolation = var->data.interpolation;
|
||||
|
||||
/* Layout qualifiers for gl_FragDepth. */
|
||||
} else if ((state->AMD_conservative_depth_enable ||
|
||||
} else if ((state->is_version(420, 0) ||
|
||||
state->AMD_conservative_depth_enable ||
|
||||
state->ARB_conservative_depth_enable)
|
||||
&& strcmp(var->name, "gl_FragDepth") == 0
|
||||
&& earlier->type == var->type
|
||||
|
|
|
|||
|
|
@ -210,6 +210,7 @@ static bool
|
|||
shader_integer_mix(const _mesa_glsl_parse_state *state)
|
||||
{
|
||||
return state->is_version(450, 310) ||
|
||||
state->ARB_ES3_1_compatibility_enable ||
|
||||
(v130(state) && state->EXT_shader_integer_mix_enable);
|
||||
}
|
||||
|
||||
|
|
@ -478,6 +479,7 @@ static bool
|
|||
shader_image_atomic_exchange_float(const _mesa_glsl_parse_state *state)
|
||||
{
|
||||
return (state->is_version(450, 320) ||
|
||||
state->ARB_ES3_1_compatibility_enable ||
|
||||
state->OES_shader_image_atomic_enable);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ static const struct builtin_type_versions {
|
|||
T(sampler2DRect, 140, 999)
|
||||
T(samplerBuffer, 140, 320)
|
||||
T(sampler2DMS, 150, 310)
|
||||
T(sampler2DMSArray, 150, 999)
|
||||
T(sampler2DMSArray, 150, 320)
|
||||
|
||||
T(isampler1D, 130, 999)
|
||||
T(isampler2D, 130, 300)
|
||||
|
|
@ -193,7 +193,7 @@ static const struct builtin_type_versions {
|
|||
T(isampler2DRect, 140, 999)
|
||||
T(isamplerBuffer, 140, 320)
|
||||
T(isampler2DMS, 150, 310)
|
||||
T(isampler2DMSArray, 150, 999)
|
||||
T(isampler2DMSArray, 150, 320)
|
||||
|
||||
T(usampler1D, 130, 999)
|
||||
T(usampler2D, 130, 300)
|
||||
|
|
@ -205,7 +205,7 @@ static const struct builtin_type_versions {
|
|||
T(usampler2DRect, 140, 999)
|
||||
T(usamplerBuffer, 140, 320)
|
||||
T(usampler2DMS, 150, 310)
|
||||
T(usampler2DMSArray, 150, 999)
|
||||
T(usampler2DMSArray, 150, 320)
|
||||
|
||||
T(sampler1DShadow, 110, 999)
|
||||
T(sampler2DShadow, 110, 300)
|
||||
|
|
@ -305,11 +305,13 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
|
|||
add_type(symbols, glsl_type::usamplerCubeArray_type);
|
||||
}
|
||||
|
||||
if (state->ARB_texture_multisample_enable ||
|
||||
state->OES_texture_storage_multisample_2d_array_enable) {
|
||||
if (state->ARB_texture_multisample_enable) {
|
||||
add_type(symbols, glsl_type::sampler2DMS_type);
|
||||
add_type(symbols, glsl_type::isampler2DMS_type);
|
||||
add_type(symbols, glsl_type::usampler2DMS_type);
|
||||
}
|
||||
if (state->ARB_texture_multisample_enable ||
|
||||
state->OES_texture_storage_multisample_2d_array_enable) {
|
||||
add_type(symbols, glsl_type::sampler2DMSArray_type);
|
||||
add_type(symbols, glsl_type::isampler2DMSArray_type);
|
||||
add_type(symbols, glsl_type::usampler2DMSArray_type);
|
||||
|
|
|
|||
|
|
@ -845,11 +845,6 @@ builtin_variable_generator::generate_constants()
|
|||
state->Const.MaxImageSamples);
|
||||
}
|
||||
|
||||
if (state->is_version(450, 310)) {
|
||||
add_const("gl_MaxCombinedShaderOutputResources",
|
||||
state->Const.MaxCombinedShaderOutputResources);
|
||||
}
|
||||
|
||||
if (state->is_version(400, 0) ||
|
||||
state->ARB_tessellation_shader_enable) {
|
||||
add_const("gl_MaxTessControlImageUniforms",
|
||||
|
|
@ -859,6 +854,12 @@ builtin_variable_generator::generate_constants()
|
|||
}
|
||||
}
|
||||
|
||||
if (state->is_version(450, 310) ||
|
||||
state->ARB_ES3_1_compatibility_enable) {
|
||||
add_const("gl_MaxCombinedShaderOutputResources",
|
||||
state->Const.MaxCombinedShaderOutputResources);
|
||||
}
|
||||
|
||||
if (state->is_version(410, 0) ||
|
||||
state->ARB_viewport_array_enable)
|
||||
add_const("gl_MaxViewports", state->Const.MaxViewports);
|
||||
|
|
@ -880,7 +881,8 @@ builtin_variable_generator::generate_constants()
|
|||
}
|
||||
|
||||
if (state->is_version(450, 320) ||
|
||||
state->OES_sample_variables_enable)
|
||||
state->OES_sample_variables_enable ||
|
||||
state->ARB_ES3_1_compatibility_enable)
|
||||
add_const("gl_MaxSamples", state->Const.MaxSamples);
|
||||
}
|
||||
|
||||
|
|
@ -1174,7 +1176,7 @@ builtin_variable_generator::generate_fs_special_vars()
|
|||
var->data.interpolation = INTERP_QUALIFIER_FLAT;
|
||||
}
|
||||
|
||||
if (state->is_version(450, 310)/* || state->ARB_ES3_1_compatibility_enable*/)
|
||||
if (state->is_version(450, 310) || state->ARB_ES3_1_compatibility_enable)
|
||||
add_system_value(SYSTEM_VALUE_HELPER_INVOCATION, bool_t, "gl_HelperInvocation");
|
||||
}
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -226,7 +226,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
|
|||
this->supported_versions[this->num_supported_versions].es = true;
|
||||
this->num_supported_versions++;
|
||||
}
|
||||
if (_mesa_is_gles31(ctx)) {
|
||||
if (_mesa_is_gles31(ctx) || ctx->Extensions.ARB_ES3_1_compatibility) {
|
||||
this->supported_versions[this->num_supported_versions].ver = 310;
|
||||
this->supported_versions[this->num_supported_versions].es = true;
|
||||
this->num_supported_versions++;
|
||||
|
|
@ -565,6 +565,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
|
|||
|
||||
/* ARB extensions go here, sorted alphabetically.
|
||||
*/
|
||||
EXT(ARB_ES3_1_compatibility, true, false, ARB_ES3_1_compatibility),
|
||||
EXT(ARB_arrays_of_arrays, true, false, ARB_arrays_of_arrays),
|
||||
EXT(ARB_compute_shader, true, false, ARB_compute_shader),
|
||||
EXT(ARB_conservative_depth, true, false, ARB_conservative_depth),
|
||||
|
|
|
|||
|
|
@ -510,6 +510,8 @@ struct _mesa_glsl_parse_state {
|
|||
/*@{*/
|
||||
/* ARB extensions go here, sorted alphabetically.
|
||||
*/
|
||||
bool ARB_ES3_1_compatibility_enable;
|
||||
bool ARB_ES3_1_compatibility_warn;
|
||||
bool ARB_arrays_of_arrays_enable;
|
||||
bool ARB_arrays_of_arrays_warn;
|
||||
bool ARB_compute_shader_enable;
|
||||
|
|
|
|||
|
|
@ -173,6 +173,7 @@ void ir_print_visitor::visit(ir_variable *ir)
|
|||
const char *const samp = (ir->data.sample) ? "sample " : "";
|
||||
const char *const patc = (ir->data.patch) ? "patch " : "";
|
||||
const char *const inv = (ir->data.invariant) ? "invariant " : "";
|
||||
const char *const prec = (ir->data.precise) ? "precise " : "";
|
||||
const char *const mode[] = { "", "uniform ", "shader_storage ",
|
||||
"shader_shared ", "shader_in ", "shader_out ",
|
||||
"in ", "out ", "inout ",
|
||||
|
|
@ -182,8 +183,8 @@ void ir_print_visitor::visit(ir_variable *ir)
|
|||
const char *const interp[] = { "", "smooth", "flat", "noperspective" };
|
||||
STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_QUALIFIER_COUNT);
|
||||
|
||||
fprintf(f, "(%s%s%s%s%s%s%s%s) ",
|
||||
loc, cent, samp, patc, inv, mode[ir->data.mode],
|
||||
fprintf(f, "(%s%s%s%s%s%s%s%s%s) ",
|
||||
loc, cent, samp, patc, inv, prec, mode[ir->data.mode],
|
||||
stream[ir->data.stream],
|
||||
interp[ir->data.interpolation]);
|
||||
|
||||
|
|
|
|||
|
|
@ -44,18 +44,6 @@ get_storage(gl_uniform_storage *storage, unsigned num_storage,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_uniform_block_index(const gl_shader_program *shProg,
|
||||
const char *uniformBlockName)
|
||||
{
|
||||
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
|
||||
if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName))
|
||||
return i;
|
||||
}
|
||||
|
||||
return GL_INVALID_INDEX;
|
||||
}
|
||||
|
||||
void
|
||||
copy_constant_to_storage(union gl_constant_value *storage,
|
||||
const ir_constant *val,
|
||||
|
|
@ -168,22 +156,14 @@ set_opaque_binding(void *mem_ctx, gl_shader_program *prog,
|
|||
void
|
||||
set_block_binding(gl_shader_program *prog, const char *block_name, int binding)
|
||||
{
|
||||
const unsigned block_index = get_uniform_block_index(prog, block_name);
|
||||
|
||||
if (block_index == GL_INVALID_INDEX) {
|
||||
assert(block_index != GL_INVALID_INDEX);
|
||||
return;
|
||||
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
|
||||
if (!strcmp(prog->BufferInterfaceBlocks[i].Name, block_name)) {
|
||||
prog->BufferInterfaceBlocks[i].Binding = binding;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* This is a field of a UBO. val is the binding index. */
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
int stage_index = prog->InterfaceBlockStageIndex[i][block_index];
|
||||
|
||||
if (stage_index != -1) {
|
||||
struct gl_shader *sh = prog->_LinkedShaders[i];
|
||||
sh->BufferInterfaceBlocks[stage_index]->Binding = binding;
|
||||
}
|
||||
}
|
||||
unreachable("Failed to initialize block binding");
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -282,7 +282,7 @@ public:
|
|||
: num_active_uniforms(0), num_hidden_uniforms(0), num_values(0),
|
||||
num_shader_samplers(0), num_shader_images(0),
|
||||
num_shader_uniform_components(0), num_shader_subroutines(0),
|
||||
is_ubo_var(false), is_shader_storage(false), map(map),
|
||||
is_buffer_block(false), is_shader_storage(false), map(map),
|
||||
hidden_map(hidden_map)
|
||||
{
|
||||
/* empty */
|
||||
|
|
@ -299,7 +299,7 @@ public:
|
|||
void process(ir_variable *var)
|
||||
{
|
||||
this->current_var = var;
|
||||
this->is_ubo_var = var->is_in_buffer_block();
|
||||
this->is_buffer_block = var->is_in_buffer_block();
|
||||
this->is_shader_storage = var->is_in_shader_storage_block();
|
||||
if (var->is_interface_instance())
|
||||
program_resource_visitor::process(var->get_interface_type(),
|
||||
|
|
@ -340,7 +340,7 @@ public:
|
|||
*/
|
||||
unsigned num_shader_subroutines;
|
||||
|
||||
bool is_ubo_var;
|
||||
bool is_buffer_block;
|
||||
bool is_shader_storage;
|
||||
|
||||
struct string_to_uint_map *map;
|
||||
|
|
@ -380,7 +380,7 @@ private:
|
|||
* Note that samplers do not count against this limit because they
|
||||
* don't use any storage on current hardware.
|
||||
*/
|
||||
if (!is_ubo_var && !is_shader_storage)
|
||||
if (!is_buffer_block)
|
||||
this->num_shader_uniform_components += values;
|
||||
}
|
||||
|
||||
|
|
@ -460,30 +460,33 @@ public:
|
|||
field_counter = 0;
|
||||
this->record_next_sampler = new string_to_uint_map;
|
||||
|
||||
ubo_block_index = -1;
|
||||
buffer_block_index = -1;
|
||||
if (var->is_in_buffer_block()) {
|
||||
struct gl_uniform_block **blks = var->is_in_shader_storage_block() ?
|
||||
prog->ShaderStorageBlocks : prog->UniformBlocks;
|
||||
unsigned num_blks = var->is_in_shader_storage_block() ?
|
||||
prog->NumShaderStorageBlocks : prog->NumUniformBlocks;
|
||||
|
||||
if (var->is_interface_instance() && var->type->is_array()) {
|
||||
unsigned l = strlen(var->get_interface_type()->name);
|
||||
|
||||
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
|
||||
if (strncmp(var->get_interface_type()->name,
|
||||
prog->BufferInterfaceBlocks[i].Name,
|
||||
l) == 0
|
||||
&& prog->BufferInterfaceBlocks[i].Name[l] == '[') {
|
||||
ubo_block_index = i;
|
||||
for (unsigned i = 0; i < num_blks; i++) {
|
||||
if (strncmp(var->get_interface_type()->name, blks[i]->Name, l)
|
||||
== 0 && blks[i]->Name[l] == '[') {
|
||||
buffer_block_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
|
||||
if (strcmp(var->get_interface_type()->name,
|
||||
prog->BufferInterfaceBlocks[i].Name) == 0) {
|
||||
ubo_block_index = i;
|
||||
for (unsigned i = 0; i < num_blks; i++) {
|
||||
if (strcmp(var->get_interface_type()->name, blks[i]->Name) ==
|
||||
0) {
|
||||
buffer_block_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(ubo_block_index != -1);
|
||||
assert(buffer_block_index != -1);
|
||||
|
||||
/* Uniform blocks that were specified with an instance name must be
|
||||
* handled a little bit differently. The name of the variable is the
|
||||
|
|
@ -497,7 +500,7 @@ public:
|
|||
var->get_interface_type()->name);
|
||||
} else {
|
||||
const struct gl_uniform_block *const block =
|
||||
&prog->BufferInterfaceBlocks[ubo_block_index];
|
||||
blks[buffer_block_index];
|
||||
|
||||
assert(var->data.location != -1);
|
||||
|
||||
|
|
@ -519,7 +522,7 @@ public:
|
|||
delete this->record_next_sampler;
|
||||
}
|
||||
|
||||
int ubo_block_index;
|
||||
int buffer_block_index;
|
||||
int ubo_byte_offset;
|
||||
gl_shader_stage shader_type;
|
||||
|
||||
|
|
@ -659,7 +662,7 @@ private:
|
|||
virtual void enter_record(const glsl_type *type, const char *,
|
||||
bool row_major, const unsigned packing) {
|
||||
assert(type->is_record());
|
||||
if (this->ubo_block_index == -1)
|
||||
if (this->buffer_block_index == -1)
|
||||
return;
|
||||
if (packing == GLSL_INTERFACE_PACKING_STD430)
|
||||
this->ubo_byte_offset = glsl_align(
|
||||
|
|
@ -672,7 +675,7 @@ private:
|
|||
virtual void leave_record(const glsl_type *type, const char *,
|
||||
bool row_major, const unsigned packing) {
|
||||
assert(type->is_record());
|
||||
if (this->ubo_block_index == -1)
|
||||
if (this->buffer_block_index == -1)
|
||||
return;
|
||||
if (packing == GLSL_INTERFACE_PACKING_STD430)
|
||||
this->ubo_byte_offset = glsl_align(
|
||||
|
|
@ -719,7 +722,7 @@ private:
|
|||
/* For array of arrays or struct arrays the base location may have
|
||||
* already been set so don't set it again.
|
||||
*/
|
||||
if (ubo_block_index == -1 && current_var->data.location == -1) {
|
||||
if (buffer_block_index == -1 && current_var->data.location == -1) {
|
||||
current_var->data.location = id;
|
||||
}
|
||||
|
||||
|
|
@ -766,8 +769,8 @@ private:
|
|||
this->uniforms[id].is_shader_storage =
|
||||
current_var->is_in_shader_storage_block();
|
||||
|
||||
if (this->ubo_block_index != -1) {
|
||||
this->uniforms[id].block_index = this->ubo_block_index;
|
||||
if (this->buffer_block_index != -1) {
|
||||
this->uniforms[id].block_index = this->buffer_block_index;
|
||||
|
||||
unsigned alignment = type->std140_base_alignment(row_major);
|
||||
if (packing == GLSL_INTERFACE_PACKING_STD430)
|
||||
|
|
|
|||
|
|
@ -240,7 +240,16 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
|
|||
|
||||
/* Check that all of the qualifiers match between stages.
|
||||
*/
|
||||
if (input->data.centroid != output->data.centroid) {
|
||||
|
||||
/* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
|
||||
* should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
|
||||
* conformance test suite does not verify that the qualifiers must match.
|
||||
* The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
|
||||
* OpenGLES 3.0 drivers, so we relax the checking in all cases.
|
||||
*/
|
||||
if (false /* always skip the centroid check */ &&
|
||||
prog->Version < (prog->IsES ? 310 : 430) &&
|
||||
input->data.centroid != output->data.centroid) {
|
||||
linker_error(prog,
|
||||
"%s shader output `%s' %s centroid qualifier, "
|
||||
"but %s shader input %s centroid qualifier\n",
|
||||
|
|
|
|||
|
|
@ -1171,6 +1171,8 @@ cross_validate_uniforms(struct gl_shader_program *prog)
|
|||
static bool
|
||||
interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
|
||||
{
|
||||
int *InterfaceBlockStageIndex[MESA_SHADER_STAGES];
|
||||
|
||||
unsigned max_num_uniform_blocks = 0;
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i])
|
||||
|
|
@ -1180,10 +1182,9 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
|
|||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
struct gl_shader *sh = prog->_LinkedShaders[i];
|
||||
|
||||
prog->InterfaceBlockStageIndex[i] = ralloc_array(prog, int,
|
||||
max_num_uniform_blocks);
|
||||
InterfaceBlockStageIndex[i] = new int[max_num_uniform_blocks];
|
||||
for (unsigned int j = 0; j < max_num_uniform_blocks; j++)
|
||||
prog->InterfaceBlockStageIndex[i][j] = -1;
|
||||
InterfaceBlockStageIndex[i][j] = -1;
|
||||
|
||||
if (sh == NULL)
|
||||
continue;
|
||||
|
|
@ -1194,13 +1195,17 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
|
|||
&prog->NumBufferInterfaceBlocks,
|
||||
sh->BufferInterfaceBlocks[j]);
|
||||
|
||||
if (index == -1) {
|
||||
linker_error(prog, "uniform block `%s' has mismatching definitions\n",
|
||||
sh->BufferInterfaceBlocks[j]->Name);
|
||||
return false;
|
||||
}
|
||||
if (index == -1) {
|
||||
linker_error(prog, "uniform block `%s' has mismatching definitions\n",
|
||||
sh->BufferInterfaceBlocks[j]->Name);
|
||||
|
||||
prog->InterfaceBlockStageIndex[i][index] = j;
|
||||
for (unsigned k = 0; k <= i; k++) {
|
||||
delete[] InterfaceBlockStageIndex[k];
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
InterfaceBlockStageIndex[i][index] = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1209,18 +1214,23 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
|
|||
*/
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
|
||||
int stage_index =
|
||||
prog->InterfaceBlockStageIndex[i][j];
|
||||
int stage_index = InterfaceBlockStageIndex[i][j];
|
||||
|
||||
if (stage_index != -1) {
|
||||
struct gl_shader *sh = prog->_LinkedShaders[i];
|
||||
|
||||
prog->BufferInterfaceBlocks[j].stageref |= (1 << i);
|
||||
|
||||
sh->BufferInterfaceBlocks[stage_index] =
|
||||
&prog->BufferInterfaceBlocks[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
delete[] InterfaceBlockStageIndex[i];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2890,6 +2900,9 @@ store_fragdepth_layout(struct gl_shader_program *prog)
|
|||
static void
|
||||
check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
{
|
||||
unsigned total_uniform_blocks = 0;
|
||||
unsigned total_shader_storage_blocks = 0;
|
||||
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
struct gl_shader *sh = prog->_LinkedShaders[i];
|
||||
|
||||
|
|
@ -2928,12 +2941,37 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
_mesa_shader_stage_to_string(i));
|
||||
}
|
||||
}
|
||||
|
||||
total_shader_storage_blocks += sh->NumShaderStorageBlocks;
|
||||
total_uniform_blocks += sh->NumUniformBlocks;
|
||||
|
||||
const unsigned max_uniform_blocks =
|
||||
ctx->Const.Program[i].MaxUniformBlocks;
|
||||
if (max_uniform_blocks < sh->NumUniformBlocks) {
|
||||
linker_error(prog, "Too many %s uniform blocks (%d/%d)\n",
|
||||
_mesa_shader_stage_to_string(i), sh->NumUniformBlocks,
|
||||
max_uniform_blocks);
|
||||
}
|
||||
|
||||
const unsigned max_shader_storage_blocks =
|
||||
ctx->Const.Program[i].MaxShaderStorageBlocks;
|
||||
if (max_shader_storage_blocks < sh->NumShaderStorageBlocks) {
|
||||
linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n",
|
||||
_mesa_shader_stage_to_string(i),
|
||||
sh->NumShaderStorageBlocks, max_shader_storage_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned blocks[MESA_SHADER_STAGES] = {0};
|
||||
unsigned total_uniform_blocks = 0;
|
||||
unsigned shader_blocks[MESA_SHADER_STAGES] = {0};
|
||||
unsigned total_shader_storage_blocks = 0;
|
||||
if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) {
|
||||
linker_error(prog, "Too many combined uniform blocks (%d/%d)\n",
|
||||
total_uniform_blocks, ctx->Const.MaxCombinedUniformBlocks);
|
||||
}
|
||||
|
||||
if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) {
|
||||
linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n",
|
||||
total_shader_storage_blocks,
|
||||
ctx->Const.MaxCombinedShaderStorageBlocks);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) {
|
||||
/* Don't check SSBOs for Uniform Block Size */
|
||||
|
|
@ -2952,57 +2990,6 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
prog->BufferInterfaceBlocks[i].UniformBufferSize,
|
||||
ctx->Const.MaxShaderStorageBlockSize);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
|
||||
if (prog->InterfaceBlockStageIndex[j][i] != -1) {
|
||||
struct gl_shader *sh = prog->_LinkedShaders[j];
|
||||
int stage_index = prog->InterfaceBlockStageIndex[j][i];
|
||||
if (sh &&
|
||||
sh->BufferInterfaceBlocks[stage_index]->IsShaderStorage) {
|
||||
shader_blocks[j]++;
|
||||
total_shader_storage_blocks++;
|
||||
} else {
|
||||
blocks[j]++;
|
||||
total_uniform_blocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) {
|
||||
linker_error(prog, "Too many combined uniform blocks (%d/%d)\n",
|
||||
total_uniform_blocks,
|
||||
ctx->Const.MaxCombinedUniformBlocks);
|
||||
} else {
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
const unsigned max_uniform_blocks =
|
||||
ctx->Const.Program[i].MaxUniformBlocks;
|
||||
if (blocks[i] > max_uniform_blocks) {
|
||||
linker_error(prog, "Too many %s uniform blocks (%d/%d)\n",
|
||||
_mesa_shader_stage_to_string(i),
|
||||
blocks[i],
|
||||
max_uniform_blocks);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) {
|
||||
linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n",
|
||||
total_shader_storage_blocks,
|
||||
ctx->Const.MaxCombinedShaderStorageBlocks);
|
||||
} else {
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
const unsigned max_shader_storage_blocks =
|
||||
ctx->Const.Program[i].MaxShaderStorageBlocks;
|
||||
if (shader_blocks[i] > max_shader_storage_blocks) {
|
||||
linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n",
|
||||
_mesa_shader_stage_to_string(i),
|
||||
shader_blocks[i],
|
||||
max_shader_storage_blocks);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3072,13 +3059,7 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
ctx->Const.Program[i].MaxImageUniforms);
|
||||
|
||||
total_image_units += sh->NumImages;
|
||||
|
||||
for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
|
||||
int stage_index = prog->InterfaceBlockStageIndex[i][j];
|
||||
if (stage_index != -1 &&
|
||||
sh->BufferInterfaceBlocks[stage_index]->IsShaderStorage)
|
||||
total_shader_storage_blocks++;
|
||||
}
|
||||
total_shader_storage_blocks += sh->NumShaderStorageBlocks;
|
||||
|
||||
if (i == MESA_SHADER_FRAGMENT) {
|
||||
foreach_in_list(ir_instruction, node, sh->ir) {
|
||||
|
|
@ -3497,19 +3478,50 @@ build_stageref(struct gl_shader_program *shProg, const char *name,
|
|||
* Create gl_shader_variable from ir_variable class.
|
||||
*/
|
||||
static gl_shader_variable *
|
||||
create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in)
|
||||
create_shader_variable(struct gl_shader_program *shProg,
|
||||
const ir_variable *in, bool use_implicit_location,
|
||||
int location_bias)
|
||||
{
|
||||
gl_shader_variable *out = ralloc(shProg, struct gl_shader_variable);
|
||||
if (!out)
|
||||
return NULL;
|
||||
|
||||
out->type = in->type;
|
||||
out->name = ralloc_strdup(shProg, in->name);
|
||||
/* Since gl_VertexID may be lowered to gl_VertexIDMESA, but applications
|
||||
* expect to see gl_VertexID in the program resource list. Pretend.
|
||||
*/
|
||||
if (in->data.mode == ir_var_system_value &&
|
||||
in->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
|
||||
out->name = ralloc_strdup(shProg, "gl_VertexID");
|
||||
} else {
|
||||
out->name = ralloc_strdup(shProg, in->name);
|
||||
}
|
||||
|
||||
if (!out->name)
|
||||
return NULL;
|
||||
|
||||
out->location = in->data.location;
|
||||
/* From the ARB_program_interface_query specification:
|
||||
*
|
||||
* "Not all active variables are assigned valid locations; the
|
||||
* following variables will have an effective location of -1:
|
||||
*
|
||||
* * uniforms declared as atomic counters;
|
||||
*
|
||||
* * members of a uniform block;
|
||||
*
|
||||
* * built-in inputs, outputs, and uniforms (starting with "gl_"); and
|
||||
*
|
||||
* * inputs or outputs not declared with a "location" layout qualifier,
|
||||
* except for vertex shader inputs and fragment shader outputs."
|
||||
*/
|
||||
if (in->type->base_type == GLSL_TYPE_ATOMIC_UINT ||
|
||||
is_gl_identifier(in->name) ||
|
||||
!(in->data.explicit_location || use_implicit_location)) {
|
||||
out->location = -1;
|
||||
} else {
|
||||
out->location = in->data.location - location_bias;
|
||||
}
|
||||
|
||||
out->type = in->type;
|
||||
out->index = in->data.index;
|
||||
out->patch = in->data.patch;
|
||||
out->mode = in->data.mode;
|
||||
|
|
@ -3519,38 +3531,31 @@ create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in)
|
|||
|
||||
static bool
|
||||
add_interface_variables(struct gl_shader_program *shProg,
|
||||
exec_list *ir, GLenum programInterface)
|
||||
unsigned stage, GLenum programInterface)
|
||||
{
|
||||
exec_list *ir = shProg->_LinkedShaders[stage]->ir;
|
||||
|
||||
foreach_in_list(ir_instruction, node, ir) {
|
||||
ir_variable *var = node->as_variable();
|
||||
uint8_t mask = 0;
|
||||
|
||||
if (!var)
|
||||
if (!var || var->data.how_declared == ir_var_hidden)
|
||||
continue;
|
||||
|
||||
int loc_bias;
|
||||
|
||||
switch (var->data.mode) {
|
||||
/* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes):
|
||||
* "For GetActiveAttrib, all active vertex shader input variables
|
||||
* are enumerated, including the special built-in inputs gl_VertexID
|
||||
* and gl_InstanceID."
|
||||
*/
|
||||
case ir_var_system_value:
|
||||
if (var->data.location != SYSTEM_VALUE_VERTEX_ID &&
|
||||
var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE &&
|
||||
var->data.location != SYSTEM_VALUE_INSTANCE_ID)
|
||||
continue;
|
||||
/* Mark special built-in inputs referenced by the vertex stage so
|
||||
* that they are considered active by the shader queries.
|
||||
*/
|
||||
mask = (1 << (MESA_SHADER_VERTEX));
|
||||
/* FALLTHROUGH */
|
||||
case ir_var_shader_in:
|
||||
if (programInterface != GL_PROGRAM_INPUT)
|
||||
continue;
|
||||
loc_bias = (stage == MESA_SHADER_VERTEX) ? int(VERT_ATTRIB_GENERIC0)
|
||||
: int(VARYING_SLOT_VAR0);
|
||||
break;
|
||||
case ir_var_shader_out:
|
||||
if (programInterface != GL_PROGRAM_OUTPUT)
|
||||
continue;
|
||||
loc_bias = (stage == MESA_SHADER_FRAGMENT) ? int(FRAG_RESULT_DATA0)
|
||||
: int(VARYING_SLOT_VAR0);
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
|
|
@ -3568,13 +3573,16 @@ add_interface_variables(struct gl_shader_program *shProg,
|
|||
if (strncmp(var->name, "gl_out_FragData", 15) == 0)
|
||||
continue;
|
||||
|
||||
gl_shader_variable *sha_v = create_shader_variable(shProg, var);
|
||||
const bool vs_input_or_fs_output =
|
||||
(stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
|
||||
(stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out);
|
||||
|
||||
gl_shader_variable *sha_v =
|
||||
create_shader_variable(shProg, var, vs_input_or_fs_output, loc_bias);
|
||||
if (!sha_v)
|
||||
return false;
|
||||
|
||||
if (!add_program_resource(shProg, programInterface, sha_v,
|
||||
build_stageref(shProg, sha_v->name,
|
||||
sha_v->mode) | mask))
|
||||
if (!add_program_resource(shProg, programInterface, sha_v, 1 << stage))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
|
@ -3604,7 +3612,8 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage, GLenum type)
|
|||
}
|
||||
|
||||
if (type == iface) {
|
||||
gl_shader_variable *sha_v = create_shader_variable(shProg, var);
|
||||
gl_shader_variable *sha_v =
|
||||
create_shader_variable(shProg, var, false, VARYING_SLOT_VAR0);
|
||||
if (!sha_v)
|
||||
return false;
|
||||
if (!add_program_resource(shProg, iface, sha_v,
|
||||
|
|
@ -3629,7 +3638,8 @@ add_fragdata_arrays(struct gl_shader_program *shProg)
|
|||
ir_variable *var = node->as_variable();
|
||||
if (var) {
|
||||
assert(var->data.mode == ir_var_shader_out);
|
||||
gl_shader_variable *sha_v = create_shader_variable(shProg, var);
|
||||
gl_shader_variable *sha_v =
|
||||
create_shader_variable(shProg, var, true, FRAG_RESULT_DATA0);
|
||||
if (!sha_v)
|
||||
return false;
|
||||
if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, sha_v,
|
||||
|
|
@ -3794,7 +3804,9 @@ calculate_array_size_and_stride(struct gl_shader_program *shProg,
|
|||
int array_stride = -1;
|
||||
char *var_name = get_top_level_name(uni->name);
|
||||
char *interface_name =
|
||||
get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name);
|
||||
get_top_level_name(uni->is_shader_storage ?
|
||||
shProg->ShaderStorageBlocks[block_index]->Name :
|
||||
shProg->UniformBlocks[block_index]->Name);
|
||||
|
||||
if (strcmp(var_name, interface_name) == 0) {
|
||||
/* Deal with instanced array of SSBOs */
|
||||
|
|
@ -3893,12 +3905,10 @@ build_program_resource_list(struct gl_context *ctx,
|
|||
return;
|
||||
|
||||
/* Add inputs and outputs to the resource list. */
|
||||
if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
|
||||
GL_PROGRAM_INPUT))
|
||||
if (!add_interface_variables(shProg, input_stage, GL_PROGRAM_INPUT))
|
||||
return;
|
||||
|
||||
if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage]->ir,
|
||||
GL_PROGRAM_OUTPUT))
|
||||
if (!add_interface_variables(shProg, output_stage, GL_PROGRAM_OUTPUT))
|
||||
return;
|
||||
|
||||
/* Add transform feedback varyings. */
|
||||
|
|
@ -3933,15 +3943,14 @@ build_program_resource_list(struct gl_context *ctx,
|
|||
ir_var_uniform);
|
||||
|
||||
/* Add stagereferences for uniforms in a uniform block. */
|
||||
bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage;
|
||||
int block_index = shProg->UniformStorage[i].block_index;
|
||||
if (block_index != -1) {
|
||||
for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
|
||||
if (shProg->InterfaceBlockStageIndex[j][block_index] != -1)
|
||||
stageref |= (1 << j);
|
||||
}
|
||||
stageref |= is_shader_storage ?
|
||||
shProg->ShaderStorageBlocks[block_index]->stageref :
|
||||
shProg->UniformBlocks[block_index]->stageref;
|
||||
}
|
||||
|
||||
bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage;
|
||||
GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
|
||||
if (!should_add_buffer_variable(shProg, type,
|
||||
shProg->UniformStorage[i].name))
|
||||
|
|
@ -4686,6 +4695,33 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
has_xfb_qualifiers))
|
||||
goto done;
|
||||
|
||||
/* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks
|
||||
* for gl_shader_program and gl_shader, so that drivers that need separate
|
||||
* index spaces for each set can have that.
|
||||
*/
|
||||
for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i] != NULL) {
|
||||
gl_shader *sh = prog->_LinkedShaders[i];
|
||||
split_ubos_and_ssbos(sh,
|
||||
sh->BufferInterfaceBlocks,
|
||||
NULL,
|
||||
sh->NumBufferInterfaceBlocks,
|
||||
&sh->UniformBlocks,
|
||||
&sh->NumUniformBlocks,
|
||||
&sh->ShaderStorageBlocks,
|
||||
&sh->NumShaderStorageBlocks);
|
||||
}
|
||||
}
|
||||
|
||||
split_ubos_and_ssbos(prog,
|
||||
NULL,
|
||||
prog->BufferInterfaceBlocks,
|
||||
prog->NumBufferInterfaceBlocks,
|
||||
&prog->UniformBlocks,
|
||||
&prog->NumUniformBlocks,
|
||||
&prog->ShaderStorageBlocks,
|
||||
&prog->NumShaderStorageBlocks);
|
||||
|
||||
update_array_sizes(prog);
|
||||
link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue,
|
||||
num_explicit_uniform_locs,
|
||||
|
|
@ -4737,33 +4773,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
}
|
||||
}
|
||||
|
||||
/* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks
|
||||
* for gl_shader_program and gl_shader, so that drivers that need separate
|
||||
* index spaces for each set can have that.
|
||||
*/
|
||||
for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i] != NULL) {
|
||||
gl_shader *sh = prog->_LinkedShaders[i];
|
||||
split_ubos_and_ssbos(sh,
|
||||
sh->BufferInterfaceBlocks,
|
||||
NULL,
|
||||
sh->NumBufferInterfaceBlocks,
|
||||
&sh->UniformBlocks,
|
||||
&sh->NumUniformBlocks,
|
||||
&sh->ShaderStorageBlocks,
|
||||
&sh->NumShaderStorageBlocks);
|
||||
}
|
||||
}
|
||||
|
||||
split_ubos_and_ssbos(prog,
|
||||
NULL,
|
||||
prog->BufferInterfaceBlocks,
|
||||
prog->NumBufferInterfaceBlocks,
|
||||
&prog->UniformBlocks,
|
||||
&prog->NumUniformBlocks,
|
||||
&prog->ShaderStorageBlocks,
|
||||
&prog->NumShaderStorageBlocks);
|
||||
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i] == NULL)
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -372,7 +372,8 @@ lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
|
|||
static bool
|
||||
shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
|
||||
{
|
||||
return state->ARB_shader_storage_buffer_object_enable;
|
||||
return state->ARB_shader_storage_buffer_object_enable ||
|
||||
state->is_version(430, 310);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ lower_vertex_id_visitor::visit(ir_dereference_variable *ir)
|
|||
if (gl_BaseVertex == NULL) {
|
||||
gl_BaseVertex = new(mem_ctx) ir_variable(int_t, "gl_BaseVertex",
|
||||
ir_var_system_value);
|
||||
gl_BaseVertex->data.how_declared = ir_var_declared_implicitly;
|
||||
gl_BaseVertex->data.how_declared = ir_var_hidden;
|
||||
gl_BaseVertex->data.read_only = true;
|
||||
gl_BaseVertex->data.location = SYSTEM_VALUE_BASE_VERTEX;
|
||||
gl_BaseVertex->data.explicit_location = true;
|
||||
|
|
|
|||
|
|
@ -96,8 +96,6 @@ _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh)
|
|||
void
|
||||
_mesa_clear_shader_program_data(struct gl_shader_program *shProg)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
shProg->NumUniformStorage = 0;
|
||||
shProg->UniformStorage = NULL;
|
||||
shProg->NumUniformRemapTable = 0;
|
||||
|
|
@ -119,11 +117,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
|
|||
shProg->ShaderStorageBlocks = NULL;
|
||||
shProg->NumShaderStorageBlocks = 0;
|
||||
|
||||
for (i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
ralloc_free(shProg->InterfaceBlockStageIndex[i]);
|
||||
shProg->InterfaceBlockStageIndex[i] = NULL;
|
||||
}
|
||||
|
||||
ralloc_free(shProg->AtomicBuffers);
|
||||
shProg->AtomicBuffers = NULL;
|
||||
shProg->NumAtomicBuffers = 0;
|
||||
|
|
|
|||
|
|
@ -817,7 +817,7 @@ store_aos(struct gallivm_state *gallivm,
|
|||
#endif
|
||||
|
||||
/* Unaligned store due to the vertex header */
|
||||
lp_set_store_alignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
|
||||
LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1069,7 +1069,7 @@ store_clip(struct gallivm_state *gallivm,
|
|||
clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");
|
||||
|
||||
/* Unaligned store */
|
||||
lp_set_store_alignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
|
||||
LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -95,4 +95,18 @@ typedef void *LLVMMCJITMemoryManagerRef;
|
|||
#define LLVMInsertBasicBlock ILLEGAL_LLVM_FUNCTION
|
||||
#define LLVMCreateBuilder ILLEGAL_LLVM_FUNCTION
|
||||
|
||||
|
||||
/*
|
||||
* Before LLVM 3.4 LLVMSetAlignment only supported GlobalValue, not
|
||||
* LoadInst/StoreInst as we need.
|
||||
*/
|
||||
#if HAVE_LLVM < 0x0304
|
||||
# ifdef __cplusplus
|
||||
extern "C"
|
||||
# endif
|
||||
void LLVMSetAlignmentBackport(LLVMValueRef V, unsigned Bytes);
|
||||
# define LLVMSetAlignment LLVMSetAlignmentBackport
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* LP_BLD_H */
|
||||
|
|
|
|||
|
|
@ -1492,14 +1492,9 @@ lp_build_abs(struct lp_build_context *bld,
|
|||
return a;
|
||||
|
||||
if(type.floating) {
|
||||
/* Mask out the sign bit */
|
||||
LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
|
||||
unsigned long long absMask = ~(1ULL << (type.width - 1));
|
||||
LLVMValueRef mask = lp_build_const_int_vec(bld->gallivm, type, ((unsigned long long) absMask));
|
||||
a = LLVMBuildBitCast(builder, a, int_vec_type, "");
|
||||
a = LLVMBuildAnd(builder, a, mask, "");
|
||||
a = LLVMBuildBitCast(builder, a, vec_type, "");
|
||||
return a;
|
||||
char intrinsic[32];
|
||||
lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.fabs", vec_type);
|
||||
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
|
||||
}
|
||||
|
||||
if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
|
||||
|
|
@ -1675,13 +1670,13 @@ enum lp_build_round_mode
|
|||
* result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0.
|
||||
*/
|
||||
static inline LLVMValueRef
|
||||
lp_build_round_sse41(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
enum lp_build_round_mode mode)
|
||||
lp_build_nearest_sse41(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
LLVMBuilderRef builder = bld->gallivm->builder;
|
||||
const struct lp_type type = bld->type;
|
||||
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
|
||||
LLVMValueRef mode = LLVMConstNull(i32t);
|
||||
const char *intrinsic;
|
||||
LLVMValueRef res;
|
||||
|
||||
|
|
@ -1714,7 +1709,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
|
|||
|
||||
args[0] = undef;
|
||||
args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
|
||||
args[2] = LLVMConstInt(i32t, mode, 0);
|
||||
args[2] = mode;
|
||||
|
||||
res = lp_build_intrinsic(builder, intrinsic,
|
||||
vec_type, args, Elements(args), 0);
|
||||
|
|
@ -1754,7 +1749,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
|
|||
|
||||
res = lp_build_intrinsic_binary(builder, intrinsic,
|
||||
bld->vec_type, a,
|
||||
LLVMConstInt(i32t, mode, 0));
|
||||
mode);
|
||||
}
|
||||
|
||||
return res;
|
||||
|
|
@ -1856,8 +1851,38 @@ lp_build_round_arch(struct lp_build_context *bld,
|
|||
LLVMValueRef a,
|
||||
enum lp_build_round_mode mode)
|
||||
{
|
||||
if (util_cpu_caps.has_sse4_1)
|
||||
return lp_build_round_sse41(bld, a, mode);
|
||||
if (util_cpu_caps.has_sse4_1) {
|
||||
LLVMBuilderRef builder = bld->gallivm->builder;
|
||||
const struct lp_type type = bld->type;
|
||||
const char *intrinsic_root;
|
||||
char intrinsic[32];
|
||||
|
||||
assert(type.floating);
|
||||
assert(lp_check_value(type, a));
|
||||
(void)type;
|
||||
|
||||
switch (mode) {
|
||||
case LP_BUILD_ROUND_NEAREST:
|
||||
if (HAVE_LLVM >= 0x0304) {
|
||||
intrinsic_root = "llvm.round";
|
||||
} else {
|
||||
return lp_build_nearest_sse41(bld, a);
|
||||
}
|
||||
break;
|
||||
case LP_BUILD_ROUND_FLOOR:
|
||||
intrinsic_root = "llvm.floor";
|
||||
break;
|
||||
case LP_BUILD_ROUND_CEIL:
|
||||
intrinsic_root = "llvm.ceil";
|
||||
break;
|
||||
case LP_BUILD_ROUND_TRUNCATE:
|
||||
intrinsic_root = "llvm.trunc";
|
||||
break;
|
||||
}
|
||||
|
||||
lp_format_intrinsic(intrinsic, sizeof intrinsic, intrinsic_root, bld->vec_type);
|
||||
return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
|
||||
}
|
||||
else /* (util_cpu_caps.has_altivec) */
|
||||
return lp_build_round_altivec(bld, a, mode);
|
||||
}
|
||||
|
|
@ -1999,7 +2024,7 @@ lp_build_floor(struct lp_build_context *bld,
|
|||
|
||||
if (type.width != 32) {
|
||||
char intrinsic[32];
|
||||
util_snprintf(intrinsic, sizeof intrinsic, "llvm.floor.v%uf%u", type.length, type.width);
|
||||
lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.floor", vec_type);
|
||||
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
|
||||
}
|
||||
|
||||
|
|
@ -2074,7 +2099,7 @@ lp_build_ceil(struct lp_build_context *bld,
|
|||
|
||||
if (type.width != 32) {
|
||||
char intrinsic[32];
|
||||
util_snprintf(intrinsic, sizeof intrinsic, "llvm.ceil.v%uf%u", type.length, type.width);
|
||||
lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.ceil", vec_type);
|
||||
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
|
||||
}
|
||||
|
||||
|
|
@ -2411,15 +2436,8 @@ lp_build_sqrt(struct lp_build_context *bld,
|
|||
|
||||
assert(lp_check_value(type, a));
|
||||
|
||||
/* TODO: optimize the constant case */
|
||||
|
||||
assert(type.floating);
|
||||
if (type.length == 1) {
|
||||
util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.f%u", type.width);
|
||||
}
|
||||
else {
|
||||
util_snprintf(intrinsic, sizeof intrinsic, "llvm.sqrt.v%uf%u", type.length, type.width);
|
||||
}
|
||||
lp_format_intrinsic(intrinsic, sizeof intrinsic, "llvm.sqrt", vec_type);
|
||||
|
||||
return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
|
|||
ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
|
||||
ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), "");
|
||||
res = LLVMBuildLoad(builder, ptr, "");
|
||||
lp_set_load_alignment(res, src_type.width / 8);
|
||||
LLVMSetAlignment(res, src_type.width / 8);
|
||||
|
||||
/* Truncate doubles to float */
|
||||
if (src_type.floating && src_type.width == 64) {
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
|
|||
* gallium could not do anything else except 16 no matter what...
|
||||
*/
|
||||
if (!aligned) {
|
||||
lp_set_load_alignment(res, 1);
|
||||
LLVMSetAlignment(res, 1);
|
||||
}
|
||||
|
||||
assert(src_width <= dst_width);
|
||||
|
|
|
|||
|
|
@ -399,6 +399,20 @@ lp_build_init(void)
|
|||
|
||||
util_cpu_detect();
|
||||
|
||||
/* For simulating less capable machines */
|
||||
#ifdef DEBUG
|
||||
if (debug_get_bool_option("LP_FORCE_SSE2", FALSE)) {
|
||||
assert(util_cpu_caps.has_sse2);
|
||||
util_cpu_caps.has_sse3 = 0;
|
||||
util_cpu_caps.has_ssse3 = 0;
|
||||
util_cpu_caps.has_sse4_1 = 0;
|
||||
util_cpu_caps.has_sse4_2 = 0;
|
||||
util_cpu_caps.has_avx = 0;
|
||||
util_cpu_caps.has_avx2 = 0;
|
||||
util_cpu_caps.has_f16c = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* AMD Bulldozer AVX's throughput is the same as SSE2; and because using
|
||||
* 8-wide vector needs more floating ops than 4-wide (due to padding), it is
|
||||
* actually more efficient to use 4-wide vectors on this processor.
|
||||
|
|
@ -456,17 +470,6 @@ lp_build_init(void)
|
|||
|
||||
gallivm_initialized = TRUE;
|
||||
|
||||
#if 0
|
||||
/* For simulating less capable machines */
|
||||
util_cpu_caps.has_sse3 = 0;
|
||||
util_cpu_caps.has_ssse3 = 0;
|
||||
util_cpu_caps.has_sse4_1 = 0;
|
||||
util_cpu_caps.has_sse4_2 = 0;
|
||||
util_cpu_caps.has_avx = 0;
|
||||
util_cpu_caps.has_avx2 = 0;
|
||||
util_cpu_caps.has_f16c = 0;
|
||||
#endif
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -77,14 +77,6 @@ func_pointer
|
|||
gallivm_jit_function(struct gallivm_state *gallivm,
|
||||
LLVMValueRef func);
|
||||
|
||||
void
|
||||
lp_set_load_alignment(LLVMValueRef Inst,
|
||||
unsigned Align);
|
||||
|
||||
void
|
||||
lp_set_store_alignment(LLVMValueRef Inst,
|
||||
unsigned Align);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@
|
|||
|
||||
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_string.h"
|
||||
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_intr.h"
|
||||
|
|
@ -52,6 +53,48 @@
|
|||
#include "lp_bld_pack.h"
|
||||
|
||||
|
||||
void
|
||||
lp_format_intrinsic(char *name,
|
||||
size_t size,
|
||||
const char *name_root,
|
||||
LLVMTypeRef type)
|
||||
{
|
||||
unsigned length = 0;
|
||||
unsigned width;
|
||||
char c;
|
||||
|
||||
LLVMTypeKind kind = LLVMGetTypeKind(type);
|
||||
if (kind == LLVMVectorTypeKind) {
|
||||
length = LLVMGetVectorSize(type);
|
||||
type = LLVMGetElementType(type);
|
||||
kind = LLVMGetTypeKind(type);
|
||||
}
|
||||
|
||||
switch (kind) {
|
||||
case LLVMIntegerTypeKind:
|
||||
c = 'i';
|
||||
width = LLVMGetIntTypeWidth(type);
|
||||
break;
|
||||
case LLVMFloatTypeKind:
|
||||
c = 'f';
|
||||
width = 32;
|
||||
break;
|
||||
case LLVMDoubleTypeKind:
|
||||
c = 'f';
|
||||
width = 64;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (length) {
|
||||
util_snprintf(name, size, "%s.v%u%c%u", name_root, length, c, width);
|
||||
} else {
|
||||
util_snprintf(name, size, "%s.%c%u", name_root, c, width);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_declare_intrinsic(LLVMModuleRef module,
|
||||
const char *name,
|
||||
|
|
|
|||
|
|
@ -47,6 +47,12 @@
|
|||
#define LP_MAX_FUNC_ARGS 32
|
||||
|
||||
|
||||
void
|
||||
lp_format_intrinsic(char *name,
|
||||
size_t size,
|
||||
const char *name_root,
|
||||
LLVMTypeRef type);
|
||||
|
||||
LLVMValueRef
|
||||
lp_declare_intrinsic(LLVMModuleRef module,
|
||||
const char *name,
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@
|
|||
|
||||
#include "lp_bld_type.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_swizzle.h"
|
||||
#include "lp_bld_init.h"
|
||||
#include "lp_bld_intr.h"
|
||||
#include "lp_bld_debug.h"
|
||||
|
|
@ -314,35 +315,30 @@ lp_build_select(struct lp_build_context *bld,
|
|||
mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
|
||||
res = LLVMBuildSelect(builder, mask, a, b, "");
|
||||
}
|
||||
else if (HAVE_LLVM >= 0x0303) {
|
||||
else if (LLVMIsConstant(mask) ||
|
||||
LLVMGetInstructionOpcode(mask) == LLVMSExt) {
|
||||
/* Generate a vector select.
|
||||
*
|
||||
* Using vector selects would avoid emitting intrinsics, but they weren't
|
||||
* properly supported yet for a long time.
|
||||
*
|
||||
* LLVM 3.3 appears to reliably support it.
|
||||
*
|
||||
* LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test).
|
||||
*
|
||||
* LLVM 3.0 includes experimental support provided the -promote-elements
|
||||
* options is passed to LLVM's command line (e.g., via
|
||||
* llvm::cl::ParseCommandLineOptions), but resulting code quality is much
|
||||
* worse, probably because some optimization passes don't know how to
|
||||
* handle vector selects.
|
||||
*
|
||||
* See also:
|
||||
* - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
|
||||
* Using vector selects should avoid emitting intrinsics hence avoid
|
||||
* hidering optimization passes, but vector selects weren't properly
|
||||
* supported yet for a long time, and LLVM will generate poor code when
|
||||
* the mask is not the result of a comparison.
|
||||
*/
|
||||
|
||||
/* Convert the mask to a vector of booleans.
|
||||
* XXX: There are two ways to do this. Decide what's best.
|
||||
*
|
||||
* XXX: In x86 the mask is controlled by the MSB, so if we shifted the
|
||||
* mask by `type.width - 1`, LLVM should realize the mask is ready. Alas
|
||||
* what really happens is that LLVM will emit two shifts back to back.
|
||||
*/
|
||||
if (1) {
|
||||
LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
|
||||
mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
|
||||
} else {
|
||||
mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
|
||||
if (0) {
|
||||
LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
|
||||
shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
|
||||
mask = LLVMBuildLShr(builder, mask, shift, "");
|
||||
}
|
||||
LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
|
||||
mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
|
||||
|
||||
res = LLVMBuildSelect(builder, mask, a, b, "");
|
||||
}
|
||||
else if (((util_cpu_caps.has_sse4_1 &&
|
||||
|
|
|
|||
|
|
@ -178,30 +178,28 @@ gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
|
|||
*>(library_info);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
LLVMValueRef
|
||||
lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
|
||||
const char *Name)
|
||||
{
|
||||
return llvm::wrap(llvm::unwrap(B)->CreateLoad(llvm::unwrap(PointerVal), true, Name));
|
||||
}
|
||||
|
||||
#if HAVE_LLVM < 0x0304
|
||||
|
||||
extern "C"
|
||||
void
|
||||
lp_set_load_alignment(LLVMValueRef Inst,
|
||||
unsigned Align)
|
||||
LLVMSetAlignmentBackport(LLVMValueRef V,
|
||||
unsigned Bytes)
|
||||
{
|
||||
llvm::unwrap<llvm::LoadInst>(Inst)->setAlignment(Align);
|
||||
switch (LLVMGetInstructionOpcode(V)) {
|
||||
case LLVMLoad:
|
||||
llvm::unwrap<llvm::LoadInst>(V)->setAlignment(Bytes);
|
||||
break;
|
||||
case LLVMStore:
|
||||
llvm::unwrap<llvm::StoreInst>(V)->setAlignment(Bytes);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C"
|
||||
void
|
||||
lp_set_store_alignment(LLVMValueRef Inst,
|
||||
unsigned Align)
|
||||
{
|
||||
llvm::unwrap<llvm::StoreInst>(Inst)->setAlignment(Align);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_LLVM < 0x0306
|
||||
|
|
|
|||
|
|
@ -55,10 +55,6 @@ extern void
|
|||
lp_set_target_options(void);
|
||||
|
||||
|
||||
extern LLVMValueRef
|
||||
lp_build_load_volatile(LLVMBuilderRef B, LLVMValueRef PointerVal,
|
||||
const char *Name);
|
||||
|
||||
extern int
|
||||
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
|
||||
struct lp_generated_code **OutCode,
|
||||
|
|
|
|||
|
|
@ -1939,7 +1939,7 @@ lp_build_clamp_border_color(struct lp_build_sample_context *bld,
|
|||
LLVMPointerType(vec4_bld.vec_type, 0), "");
|
||||
border_color = LLVMBuildLoad(builder, border_color_ptr, "");
|
||||
/* we don't have aligned type in the dynamic state unfortunately */
|
||||
lp_set_load_alignment(border_color, 4);
|
||||
LLVMSetAlignment(border_color, 4);
|
||||
|
||||
/*
|
||||
* Instead of having some incredibly complex logic which will try to figure out
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ lp_build_pointer_get_unaligned(LLVMBuilderRef builder,
|
|||
assert(LLVMGetTypeKind(LLVMTypeOf(ptr)) == LLVMPointerTypeKind);
|
||||
element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
|
||||
res = LLVMBuildLoad(builder, element_ptr, "");
|
||||
lp_set_load_alignment(res, alignment);
|
||||
LLVMSetAlignment(res, alignment);
|
||||
#ifdef DEBUG
|
||||
lp_build_name(res, "%s[%s]", LLVMGetValueName(ptr), LLVMGetValueName(index));
|
||||
#endif
|
||||
|
|
@ -188,5 +188,5 @@ lp_build_pointer_set_unaligned(LLVMBuilderRef builder,
|
|||
LLVMValueRef instr;
|
||||
element_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
|
||||
instr = LLVMBuildStore(builder, value, element_ptr);
|
||||
lp_set_store_alignment(instr, alignment);
|
||||
LLVMSetAlignment(instr, alignment);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -146,6 +146,9 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
|
|||
"NUM_CULLDIST_ENABLED",
|
||||
"FS_EARLY_DEPTH_STENCIL",
|
||||
"NEXT_SHADER",
|
||||
"CS_FIXED_BLOCK_WIDTH",
|
||||
"CS_FIXED_BLOCK_HEIGHT",
|
||||
"CS_FIXED_BLOCK_DEPTH"
|
||||
};
|
||||
|
||||
const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] =
|
||||
|
|
|
|||
|
|
@ -88,6 +88,14 @@ tgsi_util_get_shadow_ref_src_index(unsigned tgsi_tex);
|
|||
boolean
|
||||
tgsi_is_shadow_target(unsigned target);
|
||||
|
||||
|
||||
static inline boolean
|
||||
tgsi_is_msaa_target(unsigned target)
|
||||
{
|
||||
return (target == TGSI_TEXTURE_2D_MSAA ||
|
||||
target == TGSI_TEXTURE_2D_ARRAY_MSAA);
|
||||
}
|
||||
|
||||
#if defined __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -436,26 +436,26 @@ pipe_screen::get_compute_param.
|
|||
``processor-arch-manufacturer-os`` that will be passed on to the compiler.
|
||||
This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM
|
||||
or PIPE_SHADER_IR_NATIVE for their preferred IR.
|
||||
Value type: null-terminated string.
|
||||
Value type: null-terminated string. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions
|
||||
for grid and block coordinates. Value type: ``uint64_t``.
|
||||
for grid and block coordinates. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block
|
||||
units. Value type: ``uint64_t []``.
|
||||
units. Value type: ``uint64_t []``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread
|
||||
units. Value type: ``uint64_t []``.
|
||||
units. Value type: ``uint64_t []``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that
|
||||
a single block can contain. Value type: ``uint64_t``.
|
||||
a single block can contain. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
This may be less than the product of the components of MAX_BLOCK_SIZE and is
|
||||
usually limited by the number of threads that can be resident simultaneously
|
||||
on a compute unit.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL
|
||||
resource. Value type: ``uint64_t``.
|
||||
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL
|
||||
resource. Value type: ``uint64_t``.
|
||||
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE
|
||||
resource. Value type: ``uint64_t``.
|
||||
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT
|
||||
resource. Value type: ``uint64_t``.
|
||||
resource. Value type: ``uint64_t``. Shader IR type dependent.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object
|
||||
allocation in bytes. Value type: ``uint64_t``.
|
||||
* ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU
|
||||
|
|
|
|||
|
|
@ -3220,6 +3220,12 @@ Which shader stage will MOST LIKELY follow after this shader when the shader
|
|||
is bound. This is only a hint to the driver and doesn't have to be precise.
|
||||
Only set for VS and TES.
|
||||
|
||||
TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH / HEIGHT / DEPTH
|
||||
"""""""""""""""""""""""""""""""""""""""""""""""""""
|
||||
|
||||
Threads per block in each dimension, if known at compile time. If the block size
|
||||
is known all three should be at least 1. If it is unknown they should all be set
|
||||
to 0 or not set.
|
||||
|
||||
Texture Sampling and Texture Formats
|
||||
------------------------------------
|
||||
|
|
|
|||
|
|
@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen,
|
|||
|
||||
static int
|
||||
ilo_get_compute_param(struct pipe_screen *screen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param,
|
||||
void *ret)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -25,15 +25,13 @@ if not env['embedded']:
|
|||
env.Prepend(LIBS = [llvmpipe, gallium, mesautil])
|
||||
|
||||
tests = [
|
||||
'arit',
|
||||
'format',
|
||||
'blend',
|
||||
'conv',
|
||||
'printf',
|
||||
]
|
||||
|
||||
if not env['msvc']:
|
||||
tests.append('arit')
|
||||
|
||||
for test in tests:
|
||||
testname = 'lp_test_' + test
|
||||
target = env.Program(
|
||||
|
|
|
|||
|
|
@ -786,7 +786,7 @@ load_unswizzled_block(struct gallivm_state *gallivm,
|
|||
|
||||
dst[i] = LLVMBuildLoad(builder, dst_ptr, "");
|
||||
|
||||
lp_set_load_alignment(dst[i], dst_alignment);
|
||||
LLVMSetAlignment(dst[i], dst_alignment);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -830,7 +830,7 @@ store_unswizzled_block(struct gallivm_state *gallivm,
|
|||
|
||||
src_ptr = LLVMBuildStore(builder, src[i], src_ptr);
|
||||
|
||||
lp_set_store_alignment(src_ptr, src_alignment);
|
||||
LLVMSetAlignment(src_ptr, src_alignment);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -272,6 +272,7 @@ const float fract_values[] = {
|
|||
|
||||
static const struct unary_test_t
|
||||
unary_tests[] = {
|
||||
{"abs", &lp_build_abs, &fabsf, exp2_values, Elements(exp2_values), 20.0 },
|
||||
{"neg", &lp_build_negate, &negf, exp2_values, Elements(exp2_values), 20.0 },
|
||||
{"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values), 20.0 },
|
||||
{"log2", &lp_build_log2_safe, &log2f, log2_values, Elements(log2_values), 20.0 },
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@ ENVYAS ?= envyas
|
|||
all: gf100.asm.h gk104.asm.h gk110.asm.h gm107.asm.h
|
||||
|
||||
gf100.asm.h: %.asm.h: %.asm
|
||||
$(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@
|
||||
$(ENVYAS) -a -W -mgf100 -Vgf100 $< -o $@
|
||||
gk104.asm.h: %.asm.h: %.asm
|
||||
$(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@
|
||||
$(ENVYAS) -a -W -mgf100 -Vgk104 $< -o $@
|
||||
gk110.asm.h: %.asm.h: %.asm
|
||||
$(ENVYAS) -a -W -mgk110 $< -o $@
|
||||
gm107.asm.h: %.asm.h: %.asm
|
||||
|
|
|
|||
|
|
@ -126,6 +126,7 @@ private:
|
|||
void emitF2I();
|
||||
void emitI2F();
|
||||
void emitI2I();
|
||||
void emitSEL();
|
||||
void emitSHFL();
|
||||
|
||||
void emitDADD();
|
||||
|
|
@ -893,6 +894,32 @@ CodeEmitterGM107::emitI2I()
|
|||
emitGPR (0x00, insn->def(0));
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterGM107::emitSEL()
|
||||
{
|
||||
switch (insn->src(1).getFile()) {
|
||||
case FILE_GPR:
|
||||
emitInsn(0x5ca00000);
|
||||
emitGPR (0x14, insn->src(1));
|
||||
break;
|
||||
case FILE_MEMORY_CONST:
|
||||
emitInsn(0x4ca00000);
|
||||
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
|
||||
break;
|
||||
case FILE_IMMEDIATE:
|
||||
emitInsn(0x38a00000);
|
||||
emitIMMD(0x14, 19, insn->src(1));
|
||||
break;
|
||||
default:
|
||||
assert(!"bad src1 file");
|
||||
break;
|
||||
}
|
||||
|
||||
emitPRED(0x27, insn->src(2));
|
||||
emitGPR (0x08, insn->src(0));
|
||||
emitGPR (0x00, insn->def(0));
|
||||
}
|
||||
|
||||
void
|
||||
CodeEmitterGM107::emitSHFL()
|
||||
{
|
||||
|
|
@ -2963,6 +2990,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
|
|||
emitISETP();
|
||||
}
|
||||
break;
|
||||
case OP_SELP:
|
||||
emitSEL();
|
||||
break;
|
||||
case OP_PRESIN:
|
||||
case OP_PREEX2:
|
||||
emitRRO();
|
||||
|
|
|
|||
|
|
@ -372,7 +372,8 @@ NV50LegalizeSSA::propagateWriteToOutput(Instruction *st)
|
|||
return;
|
||||
|
||||
for (int s = 0; di->srcExists(s); ++s)
|
||||
if (di->src(s).getFile() == FILE_IMMEDIATE)
|
||||
if (di->src(s).getFile() == FILE_IMMEDIATE ||
|
||||
di->src(s).getFile() == FILE_MEMORY_LOCAL)
|
||||
return;
|
||||
|
||||
if (prog->getType() == Program::TYPE_GEOMETRY) {
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#if NOUVEAU_DRIVER == 0xc0
|
||||
# include "nvc0/nvc0_screen.h"
|
||||
# include "nvc0/nvc0_3d.xml.h"
|
||||
# include "nvc0/gm107_texture.xml.h"
|
||||
#else
|
||||
# include "nv50/nv50_screen.h"
|
||||
# include "nv50/nv50_3d.xml.h"
|
||||
|
|
@ -65,6 +66,7 @@
|
|||
#define SF_A(sz) G80_TIC_0_COMPONENTS_SIZES_##sz
|
||||
#define SF_B(sz) G200_TIC_0_COMPONENTS_SIZES_##sz
|
||||
#define SF_C(sz) GF100_TIC_0_COMPONENTS_SIZES_##sz
|
||||
#define SF_D(sz) GM107_TIC2_0_COMPONENTS_SIZES_##sz
|
||||
#define SF(c, pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \
|
||||
[PIPE_FORMAT_##pf] = { \
|
||||
sf, { \
|
||||
|
|
@ -236,6 +238,50 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
|
|||
F3(C, BPTC_RGB_FLOAT, NONE, R, G, B, xx, FLOAT, BC6H_SF16, t),
|
||||
F3(C, BPTC_RGB_UFLOAT, NONE, R, G, B, xx, FLOAT, BC6H_UF16, t),
|
||||
|
||||
#if NOUVEAU_DRIVER == 0xc0
|
||||
F3(D, ETC1_RGB8, NONE, R, G, B, xx, UNORM, ETC2_RGB, t),
|
||||
F3(D, ETC2_RGB8, NONE, R, G, B, xx, UNORM, ETC2_RGB, t),
|
||||
F3(D, ETC2_SRGB8, NONE, R, G, B, xx, UNORM, ETC2_RGB, t),
|
||||
C4(D, ETC2_RGB8A1, NONE, R, G, B, A, UNORM, ETC2_RGB_PTA, t),
|
||||
C4(D, ETC2_SRGB8A1, NONE, R, G, B, A, UNORM, ETC2_RGB_PTA, t),
|
||||
C4(D, ETC2_RGBA8, NONE, R, G, B, A, UNORM, ETC2_RGBA, t),
|
||||
C4(D, ETC2_SRGBA8, NONE, R, G, B, A, UNORM, ETC2_RGBA, t),
|
||||
F1(D, ETC2_R11_UNORM, NONE, R, xx, xx, xx, UNORM, EAC, t),
|
||||
F1(D, ETC2_R11_SNORM, NONE, R, xx, xx, xx, SNORM, EAC, t),
|
||||
F2(D, ETC2_RG11_UNORM, NONE, R, G, xx, xx, UNORM, EACX2, t),
|
||||
F2(D, ETC2_RG11_SNORM, NONE, R, G, xx, xx, SNORM, EACX2, t),
|
||||
|
||||
C4(D, ASTC_4x4, NONE, R, G, B, A, UNORM, ASTC_2D_4X4, t),
|
||||
C4(D, ASTC_5x4, NONE, R, G, B, A, UNORM, ASTC_2D_5X4, t),
|
||||
C4(D, ASTC_5x5, NONE, R, G, B, A, UNORM, ASTC_2D_5X5, t),
|
||||
C4(D, ASTC_6x5, NONE, R, G, B, A, UNORM, ASTC_2D_6X5, t),
|
||||
C4(D, ASTC_6x6, NONE, R, G, B, A, UNORM, ASTC_2D_6X6, t),
|
||||
C4(D, ASTC_8x5, NONE, R, G, B, A, UNORM, ASTC_2D_8X5, t),
|
||||
C4(D, ASTC_8x6, NONE, R, G, B, A, UNORM, ASTC_2D_8X6, t),
|
||||
C4(D, ASTC_8x8, NONE, R, G, B, A, UNORM, ASTC_2D_8X8, t),
|
||||
C4(D, ASTC_10x5, NONE, R, G, B, A, UNORM, ASTC_2D_10X5, t),
|
||||
C4(D, ASTC_10x6, NONE, R, G, B, A, UNORM, ASTC_2D_10X6, t),
|
||||
C4(D, ASTC_10x8, NONE, R, G, B, A, UNORM, ASTC_2D_10X8, t),
|
||||
C4(D, ASTC_10x10, NONE, R, G, B, A, UNORM, ASTC_2D_10X10, t),
|
||||
C4(D, ASTC_12x10, NONE, R, G, B, A, UNORM, ASTC_2D_12X10, t),
|
||||
C4(D, ASTC_12x12, NONE, R, G, B, A, UNORM, ASTC_2D_12X12, t),
|
||||
|
||||
C4(D, ASTC_4x4_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_4X4, t),
|
||||
C4(D, ASTC_5x4_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_5X4, t),
|
||||
C4(D, ASTC_5x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_5X5, t),
|
||||
C4(D, ASTC_6x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_6X5, t),
|
||||
C4(D, ASTC_6x6_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_6X6, t),
|
||||
C4(D, ASTC_8x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_8X5, t),
|
||||
C4(D, ASTC_8x6_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_8X6, t),
|
||||
C4(D, ASTC_8x8_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_8X8, t),
|
||||
C4(D, ASTC_10x5_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X5, t),
|
||||
C4(D, ASTC_10x6_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X6, t),
|
||||
C4(D, ASTC_10x8_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X8, t),
|
||||
C4(D, ASTC_10x10_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_10X10, t),
|
||||
C4(D, ASTC_12x10_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_12X10, t),
|
||||
C4(D, ASTC_12x12_SRGB, NONE, R, G, B, A, UNORM, ASTC_2D_12X12, t),
|
||||
#endif
|
||||
|
||||
C4(A, R32G32B32A32_FLOAT, RGBA32_FLOAT, R, G, B, A, FLOAT, R32_G32_B32_A32, IB),
|
||||
C4(A, R32G32B32A32_UNORM, NONE, R, G, B, A, UNORM, R32_G32_B32_A32, T),
|
||||
C4(A, R32G32B32A32_SNORM, NONE, R, G, B, A, SNORM, R32_G32_B32_A32, T),
|
||||
|
|
|
|||
|
|
@ -368,6 +368,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
|||
|
||||
static int
|
||||
nv50_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param, void *data)
|
||||
{
|
||||
struct nv50_screen *screen = nv50_screen(pscreen);
|
||||
|
|
|
|||
|
|
@ -45,6 +45,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
|
|||
unsigned sample_count,
|
||||
unsigned bindings)
|
||||
{
|
||||
const struct util_format_description *desc = util_format_description(format);
|
||||
|
||||
if (sample_count > 8)
|
||||
return false;
|
||||
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
|
||||
|
|
@ -65,6 +67,17 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
|
|||
sample_count > 1)
|
||||
return false;
|
||||
|
||||
/* Restrict ETC2 and ASTC formats here. These are only supported on GK20A.
|
||||
*/
|
||||
if ((desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
|
||||
desc->layout == UTIL_FORMAT_LAYOUT_ASTC) &&
|
||||
/* The claim is that this should work on GM107 but it doesn't. Need to
|
||||
* test further and figure out if it's a nouveau issue or a HW one.
|
||||
nouveau_screen(pscreen)->class_3d < GM107_3D_CLASS &&
|
||||
*/
|
||||
nouveau_screen(pscreen)->class_3d != NVEA_3D_CLASS)
|
||||
return false;
|
||||
|
||||
/* transfers & shared are always supported */
|
||||
bindings &= ~(PIPE_BIND_TRANSFER_READ |
|
||||
PIPE_BIND_TRANSFER_WRITE |
|
||||
|
|
@ -395,6 +408,7 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
|||
|
||||
static int
|
||||
nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param, void *data)
|
||||
{
|
||||
struct nvc0_screen *screen = nvc0_screen(pscreen);
|
||||
|
|
|
|||
|
|
@ -250,6 +250,7 @@ gf100_create_texture_view(struct pipe_context *pipe,
|
|||
uint32_t swz[4];
|
||||
uint32_t width, height;
|
||||
uint32_t depth;
|
||||
uint32_t tex_fmt;
|
||||
struct nv50_tic_entry *view;
|
||||
struct nv50_miptree *mt;
|
||||
bool tex_int;
|
||||
|
|
@ -275,12 +276,13 @@ gf100_create_texture_view(struct pipe_context *pipe,
|
|||
fmt = &nvc0_format_table[view->pipe.format];
|
||||
|
||||
tex_int = util_format_is_pure_integer(view->pipe.format);
|
||||
tex_fmt = fmt->tic.format & 0x3f;
|
||||
|
||||
swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
|
||||
swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
|
||||
swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
|
||||
swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
|
||||
tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
|
||||
tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
|
||||
(fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
|
||||
(fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
|
||||
(fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
|
||||
|
|
@ -288,7 +290,8 @@ gf100_create_texture_view(struct pipe_context *pipe,
|
|||
(swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
|
||||
(swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
|
||||
(swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
|
||||
(swz[3] << G80_TIC_0_W_SOURCE__SHIFT);
|
||||
(swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
|
||||
((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
|
||||
|
||||
address = mt->base.address;
|
||||
|
||||
|
|
|
|||
|
|
@ -499,7 +499,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
|
|||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
if (shader == PIPE_SHADER_COMPUTE) {
|
||||
uint64_t max_const_buffer_size;
|
||||
pscreen->get_compute_param(pscreen,
|
||||
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||
&max_const_buffer_size);
|
||||
return max_const_buffer_size;
|
||||
|
|
|
|||
|
|
@ -612,6 +612,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
|
|||
}
|
||||
|
||||
static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param,
|
||||
void *ret)
|
||||
{
|
||||
|
|
@ -678,7 +679,7 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
|||
uint64_t *max_global_size = ret;
|
||||
uint64_t max_mem_alloc_size;
|
||||
|
||||
r600_get_compute_param(screen,
|
||||
r600_get_compute_param(screen, ir_type,
|
||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||
&max_mem_alloc_size);
|
||||
|
||||
|
|
|
|||
|
|
@ -467,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
|
|||
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
|
||||
uint64_t max_const_buffer_size;
|
||||
pscreen->get_compute_param(pscreen,
|
||||
pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
|
||||
&max_const_buffer_size);
|
||||
return max_const_buffer_size;
|
||||
|
|
|
|||
|
|
@ -195,7 +195,6 @@ svga_init_shader_key_common(const struct svga_context *svga, unsigned shader,
|
|||
}
|
||||
}
|
||||
|
||||
key->tex[i].texture_msaa = view->texture->nr_samples > 1;
|
||||
if (!svga->curr.sampler[shader][i]->normalized_coords) {
|
||||
assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */
|
||||
key->tex[i].width_height_idx = idx++;
|
||||
|
|
|
|||
|
|
@ -99,7 +99,6 @@ struct svga_compile_key
|
|||
unsigned unnormalized:1;
|
||||
unsigned width_height_idx:5; /**< texture unit */
|
||||
unsigned is_array:1;
|
||||
unsigned texture_msaa:1; /**< A multisample texture? */
|
||||
unsigned sprite_texgen:1;
|
||||
unsigned swizzle_r:3;
|
||||
unsigned swizzle_g:3;
|
||||
|
|
|
|||
|
|
@ -5439,7 +5439,7 @@ emit_txf(struct svga_shader_emitter_v10 *emit,
|
|||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
const uint unit = inst->Src[1].Register.Index;
|
||||
const unsigned msaa = emit->key.tex[unit].texture_msaa;
|
||||
const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture);
|
||||
int offsets[3];
|
||||
struct tex_swizzle_info swz_info;
|
||||
|
||||
|
|
|
|||
|
|
@ -317,8 +317,10 @@ void trace_dump_compute_state(const struct pipe_compute_state *state)
|
|||
|
||||
trace_dump_struct_begin("pipe_compute_state");
|
||||
|
||||
trace_dump_member(uint, state, ir_type);
|
||||
|
||||
trace_dump_member_begin("prog");
|
||||
if (state->prog) {
|
||||
if (state->prog && state->ir_type == PIPE_SHADER_IR_TGSI) {
|
||||
static char str[64 * 1024];
|
||||
tgsi_dump_str(state->prog, 0, str, sizeof(str));
|
||||
trace_dump_string(str);
|
||||
|
|
|
|||
|
|
@ -175,6 +175,7 @@ trace_screen_get_paramf(struct pipe_screen *_screen,
|
|||
|
||||
static int
|
||||
trace_screen_get_compute_param(struct pipe_screen *_screen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param, void *data)
|
||||
{
|
||||
struct trace_screen *tr_scr = trace_screen(_screen);
|
||||
|
|
@ -184,10 +185,11 @@ trace_screen_get_compute_param(struct pipe_screen *_screen,
|
|||
trace_dump_call_begin("pipe_screen", "get_compute_param");
|
||||
|
||||
trace_dump_arg(ptr, screen);
|
||||
trace_dump_arg(int, ir_type);
|
||||
trace_dump_arg(int, param);
|
||||
trace_dump_arg(ptr, data);
|
||||
|
||||
result = screen->get_compute_param(screen, param, data);
|
||||
result = screen->get_compute_param(screen, ir_type, param, data);
|
||||
|
||||
trace_dump_ret(int, result);
|
||||
|
||||
|
|
|
|||
|
|
@ -368,6 +368,7 @@ enum pipe_flush_flags
|
|||
#define PIPE_BARRIER_IMAGE (1 << 8)
|
||||
#define PIPE_BARRIER_FRAMEBUFFER (1 << 9)
|
||||
#define PIPE_BARRIER_STREAMOUT_BUFFER (1 << 10)
|
||||
#define PIPE_BARRIER_GLOBAL_BUFFER (1 << 11)
|
||||
|
||||
/**
|
||||
* Resource binding flags -- state tracker must specify in advance all
|
||||
|
|
|
|||
|
|
@ -109,13 +109,16 @@ struct pipe_screen {
|
|||
|
||||
/**
|
||||
* Query a compute-specific capability/parameter/limit.
|
||||
* \param param one of PIPE_COMPUTE_CAP_x
|
||||
* \param ret pointer to a preallocated buffer that will be
|
||||
* initialized to the parameter value, or NULL.
|
||||
* \return size in bytes of the parameter value that would be
|
||||
* returned.
|
||||
* \param ir_type shader IR type for which the param applies, or don't care
|
||||
* if the param is not shader related
|
||||
* \param param one of PIPE_COMPUTE_CAP_x
|
||||
* \param ret pointer to a preallocated buffer that will be
|
||||
* initialized to the parameter value, or NULL.
|
||||
* \return size in bytes of the parameter value that would be
|
||||
* returned.
|
||||
*/
|
||||
int (*get_compute_param)(struct pipe_screen *,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param,
|
||||
void *ret);
|
||||
|
||||
|
|
|
|||
|
|
@ -276,7 +276,10 @@ union tgsi_immediate_data
|
|||
#define TGSI_PROPERTY_NUM_CULLDIST_ENABLED 16
|
||||
#define TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL 17
|
||||
#define TGSI_PROPERTY_NEXT_SHADER 18
|
||||
#define TGSI_PROPERTY_COUNT 19
|
||||
#define TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH 19
|
||||
#define TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT 20
|
||||
#define TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH 21
|
||||
#define TGSI_PROPERTY_COUNT 22
|
||||
|
||||
struct tgsi_property {
|
||||
unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */
|
||||
|
|
|
|||
|
|
@ -727,6 +727,7 @@ struct pipe_llvm_program_header
|
|||
|
||||
struct pipe_compute_state
|
||||
{
|
||||
enum pipe_shader_ir ir_type; /**< IR type contained in prog. */
|
||||
const void *prog; /**< Compute program to be executed. */
|
||||
unsigned req_local_mem; /**< Required size of the LOCAL resource. */
|
||||
unsigned req_private_mem; /**< Required size of the PRIVATE resource. */
|
||||
|
|
|
|||
|
|
@ -30,11 +30,12 @@ using namespace clover;
|
|||
namespace {
|
||||
template<typename T>
|
||||
std::vector<T>
|
||||
get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) {
|
||||
int sz = pipe->get_compute_param(pipe, cap, NULL);
|
||||
get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,
|
||||
pipe_compute_cap cap) {
|
||||
int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);
|
||||
std::vector<T> v(sz / sizeof(T));
|
||||
|
||||
pipe->get_compute_param(pipe, cap, &v.front());
|
||||
pipe->get_compute_param(pipe, ir_format, cap, &v.front());
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
|
@ -115,19 +116,19 @@ device::max_samplers() const {
|
|||
|
||||
cl_ulong
|
||||
device::max_mem_global() const {
|
||||
return get_compute_param<uint64_t>(pipe,
|
||||
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];
|
||||
}
|
||||
|
||||
cl_ulong
|
||||
device::max_mem_local() const {
|
||||
return get_compute_param<uint64_t>(pipe,
|
||||
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];
|
||||
}
|
||||
|
||||
cl_ulong
|
||||
device::max_mem_input() const {
|
||||
return get_compute_param<uint64_t>(pipe,
|
||||
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];
|
||||
}
|
||||
|
||||
|
|
@ -146,30 +147,30 @@ device::max_const_buffers() const {
|
|||
size_t
|
||||
device::max_threads_per_block() const {
|
||||
return get_compute_param<uint64_t>(
|
||||
pipe, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
|
||||
pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];
|
||||
}
|
||||
|
||||
cl_ulong
|
||||
device::max_mem_alloc_size() const {
|
||||
return get_compute_param<uint64_t>(pipe,
|
||||
return get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];
|
||||
}
|
||||
|
||||
cl_uint
|
||||
device::max_clock_frequency() const {
|
||||
return get_compute_param<uint32_t>(pipe,
|
||||
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];
|
||||
}
|
||||
|
||||
cl_uint
|
||||
device::max_compute_units() const {
|
||||
return get_compute_param<uint32_t>(pipe,
|
||||
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];
|
||||
}
|
||||
|
||||
bool
|
||||
device::image_support() const {
|
||||
return get_compute_param<uint32_t>(pipe,
|
||||
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
|
||||
}
|
||||
|
||||
|
|
@ -181,13 +182,15 @@ device::has_doubles() const {
|
|||
|
||||
std::vector<size_t>
|
||||
device::max_block_size() const {
|
||||
auto v = get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
|
||||
auto v = get_compute_param<uint64_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
|
||||
return { v.begin(), v.end() };
|
||||
}
|
||||
|
||||
cl_uint
|
||||
device::subgroup_size() const {
|
||||
return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
|
||||
return get_compute_param<uint32_t>(pipe, ir_format(),
|
||||
PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
|
||||
}
|
||||
|
||||
std::string
|
||||
|
|
@ -209,7 +212,7 @@ device::ir_format() const {
|
|||
std::string
|
||||
device::ir_target() const {
|
||||
std::vector<char> target = get_compute_param<char>(
|
||||
pipe, PIPE_COMPUTE_CAP_IR_TARGET);
|
||||
pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);
|
||||
return { target.data() };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -89,6 +89,8 @@ kernel::launch(command_queue &q,
|
|||
exec.sviews.size(), NULL);
|
||||
q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
|
||||
exec.samplers.size(), NULL);
|
||||
|
||||
q.pipe->memory_barrier(q.pipe, PIPE_BARRIER_GLOBAL_BUFFER);
|
||||
exec.unbind();
|
||||
}
|
||||
|
||||
|
|
@ -223,6 +225,7 @@ kernel::exec_context::bind(intrusive_ptr<command_queue> _q,
|
|||
if (st)
|
||||
_q->pipe->delete_compute_state(_q->pipe, st);
|
||||
|
||||
cs.ir_type = q->device().ir_format();
|
||||
cs.prog = &(msec.data[0]);
|
||||
cs.req_local_mem = mem_local;
|
||||
cs.req_input_mem = input.size();
|
||||
|
|
|
|||
|
|
@ -58,7 +58,9 @@ struct context {
|
|||
uint64_t __v[4]; \
|
||||
int __i, __n; \
|
||||
\
|
||||
__n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
|
||||
__n = ctx->screen->get_compute_param(ctx->screen, \
|
||||
PIPE_SHADER_IR_TGSI, \
|
||||
c, __v); \
|
||||
printf("%s: {", #c); \
|
||||
\
|
||||
for (__i = 0; __i < __n / sizeof(*__v); ++__i) \
|
||||
|
|
@ -144,6 +146,7 @@ static void init_prog(struct context *ctx, unsigned local_sz,
|
|||
struct pipe_context *pipe = ctx->pipe;
|
||||
struct tgsi_token prog[1024];
|
||||
struct pipe_compute_state cs = {
|
||||
.ir_type = PIPE_SHADER_IR_TGSI,
|
||||
.prog = prog,
|
||||
.req_local_mem = local_sz,
|
||||
.req_private_mem = private_sz,
|
||||
|
|
|
|||
|
|
@ -924,6 +924,40 @@
|
|||
|
||||
</category>
|
||||
|
||||
<category name="GL_EXT_base_instance" number="203">
|
||||
|
||||
<function name="DrawArraysInstancedBaseInstanceEXT" es2="3.0"
|
||||
alias="DrawArraysInstancedBaseInstance">
|
||||
<param name="mode" type="GLenum"/>
|
||||
<param name="first" type="GLint"/>
|
||||
<param name="count" type="GLsizei"/>
|
||||
<param name="instancecount" type="GLsizei"/>
|
||||
<param name="baseinstance" type="GLuint"/>
|
||||
</function>
|
||||
|
||||
<function name="DrawElementsInstancedBaseInstanceEXT" es2="3.0"
|
||||
alias="DrawElementsInstancedBaseInstance">
|
||||
<param name="mode" type="GLenum"/>
|
||||
<param name="count" type="GLsizei"/>
|
||||
<param name="type" type="GLenum"/>
|
||||
<param name="indices" type="const GLvoid *"/>
|
||||
<param name="instancecount" type="GLsizei"/>
|
||||
<param name="baseinstance" type="GLuint"/>
|
||||
</function>
|
||||
|
||||
<function name="DrawElementsInstancedBaseVertexBaseInstanceEXT" es2="3.0"
|
||||
alias="DrawElementsInstancedBaseVertexBaseInstance">
|
||||
<param name="mode" type="GLenum"/>
|
||||
<param name="count" type="GLsizei"/>
|
||||
<param name="type" type="GLenum"/>
|
||||
<param name="indices" type="const GLvoid *"/>
|
||||
<param name="instancecount" type="GLsizei"/>
|
||||
<param name="basevertex" type="GLint"/>
|
||||
<param name="baseinstance" type="GLuint"/>
|
||||
</function>
|
||||
|
||||
</category>
|
||||
|
||||
<category name="GL_EXT_draw_elements_base_vertex" number="204">
|
||||
|
||||
<function name="DrawElementsBaseVertexEXT" alias="DrawElementsBaseVertex"
|
||||
|
|
|
|||
|
|
@ -12741,7 +12741,7 @@
|
|||
<enum name="POLYGON_OFFSET_CLAMP_EXT" value="0x8E1B">
|
||||
<size name="Get" mode="get"/>
|
||||
</enum>
|
||||
<function name="PolygonOffsetClampEXT">
|
||||
<function name="PolygonOffsetClampEXT" es1="1.0" es2="2.0">
|
||||
<param name="factor" type="GLfloat"/>
|
||||
<param name="units" type="GLfloat"/>
|
||||
<param name="clamp" type="GLfloat"/>
|
||||
|
|
|
|||
|
|
@ -148,6 +148,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
|
|||
brw_fs_alloc_reg_sets(compiler);
|
||||
brw_vec4_alloc_reg_set(compiler);
|
||||
|
||||
compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false);
|
||||
|
||||
compiler->scalar_stage[MESA_SHADER_VERTEX] =
|
||||
devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
|
||||
compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false;
|
||||
|
|
|
|||
|
|
@ -92,6 +92,12 @@ struct brw_compiler {
|
|||
|
||||
bool scalar_stage[MESA_SHADER_STAGES];
|
||||
struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
|
||||
|
||||
/**
|
||||
* Apply workarounds for SIN and COS output range problems.
|
||||
* This can negatively impact performance.
|
||||
*/
|
||||
bool precise_trig;
|
||||
};
|
||||
|
||||
struct brw_compiler *
|
||||
|
|
|
|||
|
|
@ -765,29 +765,27 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
|||
inst->saturate = instr->dest.saturate;
|
||||
break;
|
||||
|
||||
case nir_op_fsin: {
|
||||
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
|
||||
inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);
|
||||
if (instr->dest.saturate) {
|
||||
inst->dst = result;
|
||||
inst->saturate = true;
|
||||
case nir_op_fsin:
|
||||
if (!compiler->precise_trig) {
|
||||
inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]);
|
||||
} else {
|
||||
bld.MUL(result, tmp, brw_imm_f(0.99997));
|
||||
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
|
||||
inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]);
|
||||
inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
|
||||
}
|
||||
inst->saturate = instr->dest.saturate;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_fcos: {
|
||||
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
|
||||
inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);
|
||||
if (instr->dest.saturate) {
|
||||
inst->dst = result;
|
||||
inst->saturate = true;
|
||||
case nir_op_fcos:
|
||||
if (!compiler->precise_trig) {
|
||||
inst = bld.emit(SHADER_OPCODE_COS, result, op[0]);
|
||||
} else {
|
||||
bld.MUL(result, tmp, brw_imm_f(0.99997));
|
||||
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F);
|
||||
inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]);
|
||||
inst = bld.MUL(result, tmp, brw_imm_f(0.99997));
|
||||
}
|
||||
inst->saturate = instr->dest.saturate;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_fddx:
|
||||
if (fs_key->high_quality_derivatives) {
|
||||
|
|
|
|||
|
|
@ -793,7 +793,8 @@ brw_render_target_supported(struct brw_context *brw,
|
|||
/* Under some conditions, MSAA is not supported for formats whose width is
|
||||
* more than 64 bits.
|
||||
*/
|
||||
if (rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
|
||||
if (brw->gen < 8 &&
|
||||
rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
|
||||
/* Gen6: MSAA on >64 bit formats is unsupported. */
|
||||
if (brw->gen <= 6)
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -1093,29 +1093,27 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
|||
inst->saturate = instr->dest.saturate;
|
||||
break;
|
||||
|
||||
case nir_op_fsin: {
|
||||
src_reg tmp = src_reg(this, glsl_type::vec4_type);
|
||||
inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
|
||||
if (instr->dest.saturate) {
|
||||
inst->dst = dst;
|
||||
inst->saturate = true;
|
||||
case nir_op_fsin:
|
||||
if (!compiler->precise_trig) {
|
||||
inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
|
||||
} else {
|
||||
emit(MUL(dst, tmp, brw_imm_f(0.99997)));
|
||||
src_reg tmp = src_reg(this, glsl_type::vec4_type);
|
||||
inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
|
||||
inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
|
||||
}
|
||||
inst->saturate = instr->dest.saturate;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_fcos: {
|
||||
src_reg tmp = src_reg(this, glsl_type::vec4_type);
|
||||
inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
|
||||
if (instr->dest.saturate) {
|
||||
inst->dst = dst;
|
||||
inst->saturate = true;
|
||||
case nir_op_fcos:
|
||||
if (!compiler->precise_trig) {
|
||||
inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
|
||||
} else {
|
||||
emit(MUL(dst, tmp, brw_imm_f(0.99997)));
|
||||
src_reg tmp = src_reg(this, glsl_type::vec4_type);
|
||||
inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
|
||||
inst = emit(MUL(dst, tmp, brw_imm_f(0.99997)));
|
||||
}
|
||||
inst->saturate = instr->dest.saturate;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_idiv:
|
||||
case nir_op_udiv:
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ EXT(APPLE_texture_max_level , dummy_true
|
|||
EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002)
|
||||
|
||||
EXT(ARB_ES2_compatibility , ARB_ES2_compatibility , GLL, GLC, x , x , 2009)
|
||||
EXT(ARB_ES3_1_compatibility , ARB_ES3_1_compatibility , x , GLC, x , x , 2014)
|
||||
EXT(ARB_ES3_compatibility , ARB_ES3_compatibility , GLL, GLC, x , x , 2012)
|
||||
EXT(ARB_arrays_of_arrays , ARB_arrays_of_arrays , GLL, GLC, x , x , 2012)
|
||||
EXT(ARB_base_instance , ARB_base_instance , GLL, GLC, x , x , 2011)
|
||||
|
|
@ -176,6 +177,7 @@ EXT(ATI_texture_float , ARB_texture_float
|
|||
EXT(ATI_texture_mirror_once , ATI_texture_mirror_once , GLL, GLC, x , x , 2006)
|
||||
|
||||
EXT(EXT_abgr , dummy_true , GLL, GLC, x , x , 1995)
|
||||
EXT(EXT_base_instance , ARB_base_instance , x , x , x , 30, 2014)
|
||||
EXT(EXT_bgra , dummy_true , GLL, x , x , x , 1995)
|
||||
EXT(EXT_blend_color , EXT_blend_color , GLL, x , x , x , 1995)
|
||||
EXT(EXT_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
|
||||
|
|
@ -213,7 +215,7 @@ EXT(EXT_packed_pixels , dummy_true
|
|||
EXT(EXT_pixel_buffer_object , EXT_pixel_buffer_object , GLL, GLC, x , x , 2004)
|
||||
EXT(EXT_point_parameters , EXT_point_parameters , GLL, x , x , x , 1997)
|
||||
EXT(EXT_polygon_offset , dummy_true , GLL, x , x , x , 1995)
|
||||
EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, x , x , 2014)
|
||||
EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, ES1, ES2, 2014)
|
||||
EXT(EXT_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009)
|
||||
EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009)
|
||||
EXT(EXT_rescale_normal , dummy_true , GLL, x , x , x , 1997)
|
||||
|
|
|
|||
|
|
@ -135,6 +135,9 @@ descriptor=[
|
|||
[ "MAX_LABEL_LENGTH", "CONST(MAX_LABEL_LENGTH), NO_EXTRA" ],
|
||||
[ "MAX_DEBUG_GROUP_STACK_DEPTH", "CONST(MAX_DEBUG_GROUP_STACK_DEPTH), NO_EXTRA" ],
|
||||
[ "DEBUG_GROUP_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],
|
||||
|
||||
# GL_EXT_polygon_offset_clamp
|
||||
[ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
|
||||
]},
|
||||
|
||||
# Enums in OpenGL and GLES1
|
||||
|
|
@ -532,7 +535,7 @@ descriptor=[
|
|||
[ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
|
||||
|
||||
# GL_ARB_shader_storage_buffer_object / geometry shader
|
||||
[ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ],
|
||||
[ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ],
|
||||
|
||||
# GL_ARB_uniform_buffer_object / geometry shader
|
||||
[ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
|
||||
|
|
@ -857,9 +860,6 @@ descriptor=[
|
|||
# GL_ARB_shader_image_load_store
|
||||
[ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
|
||||
|
||||
# GL_EXT_polygon_offset_clamp
|
||||
[ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
|
||||
|
||||
# GL_ARB_shader_storage_buffer_object
|
||||
[ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
|
||||
[ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
|
||||
|
|
|
|||
|
|
@ -2534,6 +2534,9 @@ struct gl_uniform_block
|
|||
*/
|
||||
bool IsShaderStorage;
|
||||
|
||||
/** Stages that reference this block */
|
||||
uint8_t stageref;
|
||||
|
||||
/**
|
||||
* Layout specified in the shader
|
||||
*
|
||||
|
|
@ -2834,16 +2837,6 @@ struct gl_shader_program
|
|||
unsigned NumShaderStorageBlocks;
|
||||
struct gl_uniform_block **ShaderStorageBlocks;
|
||||
|
||||
/**
|
||||
* Indices into the BufferInterfaceBlocks[] array for each stage they're
|
||||
* used in, or -1.
|
||||
*
|
||||
* This is used to maintain the Binding values of the stage's
|
||||
* BufferInterfaceBlocks[] and to answer the
|
||||
* GL_UNIFORM_BLOCK_REFERENCED_BY_*_SHADER queries.
|
||||
*/
|
||||
int *InterfaceBlockStageIndex[MESA_SHADER_STAGES];
|
||||
|
||||
/**
|
||||
* Map of active uniform names to locations
|
||||
*
|
||||
|
|
@ -3786,6 +3779,7 @@ struct gl_extensions
|
|||
GLboolean ANGLE_texture_compression_dxt;
|
||||
GLboolean ARB_ES2_compatibility;
|
||||
GLboolean ARB_ES3_compatibility;
|
||||
GLboolean ARB_ES3_1_compatibility;
|
||||
GLboolean ARB_arrays_of_arrays;
|
||||
GLboolean ARB_base_instance;
|
||||
GLboolean ARB_blend_func_extended;
|
||||
|
|
|
|||
|
|
@ -101,31 +101,6 @@ _mesa_BindAttribLocation(GLuint program, GLuint index,
|
|||
*/
|
||||
}
|
||||
|
||||
static bool
|
||||
is_active_attrib(const gl_shader_variable *var)
|
||||
{
|
||||
if (!var)
|
||||
return false;
|
||||
|
||||
switch (var->mode) {
|
||||
case ir_var_shader_in:
|
||||
return var->location != -1;
|
||||
|
||||
case ir_var_system_value:
|
||||
/* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes):
|
||||
* "For GetActiveAttrib, all active vertex shader input variables
|
||||
* are enumerated, including the special built-in inputs gl_VertexID
|
||||
* and gl_InstanceID."
|
||||
*/
|
||||
return var->location == SYSTEM_VALUE_VERTEX_ID ||
|
||||
var->location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE ||
|
||||
var->location == SYSTEM_VALUE_INSTANCE_ID;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void GLAPIENTRY
|
||||
_mesa_GetActiveAttrib(GLuint program, GLuint desired_index,
|
||||
GLsizei maxLength, GLsizei * length, GLint * size,
|
||||
|
|
@ -166,20 +141,8 @@ _mesa_GetActiveAttrib(GLuint program, GLuint desired_index,
|
|||
|
||||
const gl_shader_variable *const var = RESOURCE_VAR(res);
|
||||
|
||||
if (!is_active_attrib(var))
|
||||
return;
|
||||
|
||||
const char *var_name = var->name;
|
||||
|
||||
/* Since gl_VertexID may be lowered to gl_VertexIDMESA, we need to
|
||||
* consider gl_VertexIDMESA as gl_VertexID for purposes of checking
|
||||
* active attributes.
|
||||
*/
|
||||
if (var->mode == ir_var_system_value &&
|
||||
var->location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
|
||||
var_name = "gl_VertexID";
|
||||
}
|
||||
|
||||
_mesa_copy_string(name, maxLength, length, var_name);
|
||||
|
||||
if (size)
|
||||
|
|
@ -224,19 +187,7 @@ _mesa_GetAttribLocation(GLuint program, const GLchar * name)
|
|||
if (!res)
|
||||
return -1;
|
||||
|
||||
GLint loc = program_resource_location(shProg, res, name, array_index);
|
||||
|
||||
/* The extra check against against 0 is made because of builtin-attribute
|
||||
* locations that have offset applied. Function program_resource_location
|
||||
* can return built-in attribute locations < 0 and glGetAttribLocation
|
||||
* cannot be used on "conventional" attributes.
|
||||
*
|
||||
* From page 95 of the OpenGL 3.0 spec:
|
||||
*
|
||||
* "If name is not an active attribute, if name is a conventional
|
||||
* attribute, or if an error occurs, -1 will be returned."
|
||||
*/
|
||||
return (loc >= 0) ? loc : -1;
|
||||
return program_resource_location(shProg, res, name, array_index);
|
||||
}
|
||||
|
||||
unsigned
|
||||
|
|
@ -251,8 +202,7 @@ _mesa_count_active_attribs(struct gl_shader_program *shProg)
|
|||
unsigned count = 0;
|
||||
for (unsigned j = 0; j < shProg->NumProgramResourceList; j++, res++) {
|
||||
if (res->Type == GL_PROGRAM_INPUT &&
|
||||
res->StageReferences & (1 << MESA_SHADER_VERTEX) &&
|
||||
is_active_attrib(RESOURCE_VAR(res)))
|
||||
res->StageReferences & (1 << MESA_SHADER_VERTEX))
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
|
|
@ -410,25 +360,12 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name)
|
|||
if (!res)
|
||||
return -1;
|
||||
|
||||
GLint loc = program_resource_location(shProg, res, name, array_index);
|
||||
|
||||
/* The extra check against against 0 is made because of builtin-attribute
|
||||
* locations that have offset applied. Function program_resource_location
|
||||
* can return built-in attribute locations < 0 and glGetFragDataLocation
|
||||
* cannot be used on "conventional" attributes.
|
||||
*
|
||||
* From page 95 of the OpenGL 3.0 spec:
|
||||
*
|
||||
* "If name is not an active attribute, if name is a conventional
|
||||
* attribute, or if an error occurs, -1 will be returned."
|
||||
*/
|
||||
return (loc >= 0) ? loc : -1;
|
||||
return program_resource_location(shProg, res, name, array_index);
|
||||
}
|
||||
|
||||
const char*
|
||||
_mesa_program_resource_name(struct gl_program_resource *res)
|
||||
{
|
||||
const gl_shader_variable *var;
|
||||
switch (res->Type) {
|
||||
case GL_UNIFORM_BLOCK:
|
||||
case GL_SHADER_STORAGE_BLOCK:
|
||||
|
|
@ -436,13 +373,6 @@ _mesa_program_resource_name(struct gl_program_resource *res)
|
|||
case GL_TRANSFORM_FEEDBACK_VARYING:
|
||||
return RESOURCE_XFV(res)->Name;
|
||||
case GL_PROGRAM_INPUT:
|
||||
var = RESOURCE_VAR(res);
|
||||
/* Special case gl_VertexIDMESA -> gl_VertexID. */
|
||||
if (var->mode == ir_var_system_value &&
|
||||
var->location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
|
||||
return "gl_VertexID";
|
||||
}
|
||||
/* fallthrough */
|
||||
case GL_PROGRAM_OUTPUT:
|
||||
return RESOURCE_VAR(res)->name;
|
||||
case GL_UNIFORM:
|
||||
|
|
@ -850,34 +780,31 @@ program_resource_location(struct gl_shader_program *shProg,
|
|||
struct gl_program_resource *res, const char *name,
|
||||
unsigned array_index)
|
||||
{
|
||||
/* Built-in locations should report GL_INVALID_INDEX. */
|
||||
if (is_gl_identifier(name))
|
||||
return GL_INVALID_INDEX;
|
||||
|
||||
/* VERT_ATTRIB_GENERIC0 and FRAG_RESULT_DATA0 are decremented as these
|
||||
* offsets are used internally to differentiate between built-in attributes
|
||||
* and user-defined attributes.
|
||||
*/
|
||||
switch (res->Type) {
|
||||
case GL_PROGRAM_INPUT: {
|
||||
const gl_shader_variable *var = RESOURCE_VAR(res);
|
||||
|
||||
if (var->location == -1)
|
||||
return -1;
|
||||
|
||||
/* If the input is an array, fail if the index is out of bounds. */
|
||||
if (array_index > 0
|
||||
&& array_index >= var->type->length) {
|
||||
return -1;
|
||||
}
|
||||
return (var->location +
|
||||
(array_index * var->type->without_array()->matrix_columns) -
|
||||
VERT_ATTRIB_GENERIC0);
|
||||
return var->location +
|
||||
(array_index * var->type->without_array()->matrix_columns);
|
||||
}
|
||||
case GL_PROGRAM_OUTPUT:
|
||||
if (RESOURCE_VAR(res)->location == -1)
|
||||
return -1;
|
||||
|
||||
/* If the output is an array, fail if the index is out of bounds. */
|
||||
if (array_index > 0
|
||||
&& array_index >= RESOURCE_VAR(res)->type->length) {
|
||||
return -1;
|
||||
}
|
||||
return RESOURCE_VAR(res)->location + array_index - FRAG_RESULT_DATA0;
|
||||
return RESOURCE_VAR(res)->location + array_index;
|
||||
case GL_UNIFORM:
|
||||
/* If the uniform is built-in, fail. */
|
||||
if (RESOURCE_UNI(res)->builtin)
|
||||
|
|
@ -999,7 +926,7 @@ is_resource_referenced(struct gl_shader_program *shProg,
|
|||
return RESOURCE_ATC(res)->StageReferences[stage];
|
||||
|
||||
if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK)
|
||||
return shProg->InterfaceBlockStageIndex[stage][index] != -1;
|
||||
return shProg->BufferInterfaceBlocks[index].stageref & (1 << stage);
|
||||
|
||||
return res->StageReferences & (1 << stage);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -295,10 +295,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
|
|||
ralloc_free(shProg->BufferInterfaceBlocks);
|
||||
shProg->BufferInterfaceBlocks = NULL;
|
||||
shProg->NumBufferInterfaceBlocks = 0;
|
||||
for (i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
ralloc_free(shProg->InterfaceBlockStageIndex[i]);
|
||||
shProg->InterfaceBlockStageIndex[i] = NULL;
|
||||
}
|
||||
|
||||
ralloc_free(shProg->AtomicBuffers);
|
||||
shProg->AtomicBuffers = NULL;
|
||||
|
|
|
|||
|
|
@ -2064,6 +2064,9 @@ const struct function gles11_functions_possible[] = {
|
|||
{ "glObjectLabelKHR", 11, -1 },
|
||||
{ "glObjectPtrLabelKHR", 11, -1 },
|
||||
|
||||
/* GL_EXT_polygon_offset_clamp */
|
||||
{ "glPolygonOffsetClampEXT", 11, -1 },
|
||||
|
||||
{ NULL, 0, -1 }
|
||||
};
|
||||
|
||||
|
|
@ -2300,6 +2303,9 @@ const struct function gles2_functions_possible[] = {
|
|||
{ "glObjectLabelKHR", 20, -1 },
|
||||
{ "glObjectPtrLabelKHR", 20, -1 },
|
||||
|
||||
/* GL_EXT_polygon_offset_clamp */
|
||||
{ "glPolygonOffsetClampEXT", 11, -1 },
|
||||
|
||||
{ NULL, 0, -1 }
|
||||
};
|
||||
|
||||
|
|
@ -2470,6 +2476,11 @@ const struct function gles3_functions_possible[] = {
|
|||
{ "glDisableiOES", 30, -1 },
|
||||
{ "glIsEnablediOES", 30, -1 },
|
||||
|
||||
/* GL_EXT_base_instance */
|
||||
{ "glDrawArraysInstancedBaseInstanceEXT", 30, -1 },
|
||||
{ "glDrawElementsInstancedBaseInstanceEXT", 30, -1 },
|
||||
{ "glDrawElementsInstancedBaseVertexBaseInstanceEXT", 30, -1 },
|
||||
|
||||
{ NULL, 0, -1 }
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -765,6 +765,11 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target,
|
|||
RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_UNORM);
|
||||
break;
|
||||
|
||||
case GL_STENCIL_INDEX:
|
||||
case GL_STENCIL_INDEX8:
|
||||
RETURN_IF_SUPPORTED(MESA_FORMAT_S_UINT8);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* For non-generic compressed format we assert two things:
|
||||
*
|
||||
|
|
|
|||
|
|
@ -351,8 +351,55 @@ compute_version(const struct gl_extensions *extensions,
|
|||
extensions->ARB_shading_language_packing &&
|
||||
extensions->ARB_texture_compression_bptc &&
|
||||
extensions->ARB_transform_feedback_instanced);
|
||||
const bool ver_4_3 = (ver_4_2 &&
|
||||
consts->GLSLVersion >= 430 &&
|
||||
extensions->ARB_ES3_compatibility &&
|
||||
extensions->ARB_arrays_of_arrays &&
|
||||
extensions->ARB_compute_shader &&
|
||||
extensions->ARB_copy_image &&
|
||||
extensions->ARB_explicit_uniform_location &&
|
||||
extensions->ARB_fragment_layer_viewport &&
|
||||
extensions->ARB_framebuffer_no_attachments &&
|
||||
extensions->ARB_internalformat_query2 &&
|
||||
/* extensions->ARB_robust_buffer_access_behavior */ 0 &&
|
||||
extensions->ARB_shader_image_size &&
|
||||
extensions->ARB_shader_storage_buffer_object &&
|
||||
extensions->ARB_stencil_texturing &&
|
||||
extensions->ARB_texture_buffer_range &&
|
||||
extensions->ARB_texture_query_levels &&
|
||||
extensions->ARB_texture_view);
|
||||
const bool ver_4_4 = (ver_4_3 &&
|
||||
consts->GLSLVersion >= 440 &&
|
||||
extensions->ARB_buffer_storage &&
|
||||
extensions->ARB_clear_texture &&
|
||||
extensions->ARB_enhanced_layouts &&
|
||||
extensions->ARB_query_buffer_object &&
|
||||
extensions->ARB_texture_mirror_clamp_to_edge &&
|
||||
extensions->ARB_texture_stencil8 &&
|
||||
extensions->ARB_vertex_type_10f_11f_11f_rev);
|
||||
const bool ver_4_5 = (ver_4_4 &&
|
||||
consts->GLSLVersion >= 450 &&
|
||||
extensions->ARB_ES3_1_compatibility &&
|
||||
extensions->ARB_clip_control &&
|
||||
extensions->ARB_conditional_render_inverted &&
|
||||
/* extensions->ARB_cull_distance */ 0 &&
|
||||
extensions->ARB_derivative_control &&
|
||||
extensions->ARB_shader_texture_image_samples &&
|
||||
extensions->NV_texture_barrier);
|
||||
|
||||
if (ver_4_2) {
|
||||
if (ver_4_5) {
|
||||
major = 4;
|
||||
minor = 5;
|
||||
}
|
||||
else if (ver_4_4) {
|
||||
major = 4;
|
||||
minor = 4;
|
||||
}
|
||||
else if (ver_4_3) {
|
||||
major = 4;
|
||||
minor = 3;
|
||||
}
|
||||
else if (ver_4_2) {
|
||||
major = 4;
|
||||
minor = 2;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -248,54 +248,51 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
|
|||
paramList->Size = 0;
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
GLuint i, j;
|
||||
|
||||
paramList->NumParameters = oldNum + sz4;
|
||||
GLuint i, j;
|
||||
|
||||
memset(¶mList->Parameters[oldNum], 0,
|
||||
sz4 * sizeof(struct gl_program_parameter));
|
||||
paramList->NumParameters = oldNum + sz4;
|
||||
|
||||
for (i = 0; i < sz4; i++) {
|
||||
struct gl_program_parameter *p = paramList->Parameters + oldNum + i;
|
||||
p->Name = name ? strdup(name) : NULL;
|
||||
p->Type = type;
|
||||
p->Size = size;
|
||||
p->DataType = datatype;
|
||||
if (values) {
|
||||
if (size >= 4) {
|
||||
COPY_4V(paramList->ParameterValues[oldNum + i], values);
|
||||
memset(¶mList->Parameters[oldNum], 0,
|
||||
sz4 * sizeof(struct gl_program_parameter));
|
||||
|
||||
for (i = 0; i < sz4; i++) {
|
||||
struct gl_program_parameter *p = paramList->Parameters + oldNum + i;
|
||||
p->Name = name ? strdup(name) : NULL;
|
||||
p->Type = type;
|
||||
p->Size = size;
|
||||
p->DataType = datatype;
|
||||
if (values) {
|
||||
if (size >= 4) {
|
||||
COPY_4V(paramList->ParameterValues[oldNum + i], values);
|
||||
} else {
|
||||
/* copy 1, 2 or 3 values */
|
||||
GLuint remaining = size % 4;
|
||||
assert(remaining < 4);
|
||||
for (j = 0; j < remaining; j++) {
|
||||
paramList->ParameterValues[oldNum + i][j].f = values[j].f;
|
||||
}
|
||||
else {
|
||||
/* copy 1, 2 or 3 values */
|
||||
GLuint remaining = size % 4;
|
||||
assert(remaining < 4);
|
||||
for (j = 0; j < remaining; j++) {
|
||||
paramList->ParameterValues[oldNum + i][j].f = values[j].f;
|
||||
}
|
||||
/* fill in remaining positions with zeros */
|
||||
for (; j < 4; j++) {
|
||||
paramList->ParameterValues[oldNum + i][j].f = 0.0f;
|
||||
}
|
||||
/* fill in remaining positions with zeros */
|
||||
for (; j < 4; j++) {
|
||||
paramList->ParameterValues[oldNum + i][j].f = 0.0f;
|
||||
}
|
||||
values += 4;
|
||||
p->Initialized = GL_TRUE;
|
||||
}
|
||||
else {
|
||||
/* silence valgrind */
|
||||
for (j = 0; j < 4; j++)
|
||||
paramList->ParameterValues[oldNum + i][j].f = 0;
|
||||
}
|
||||
size -= 4;
|
||||
values += 4;
|
||||
p->Initialized = GL_TRUE;
|
||||
} else {
|
||||
/* silence valgrind */
|
||||
for (j = 0; j < 4; j++)
|
||||
paramList->ParameterValues[oldNum + i][j].f = 0;
|
||||
}
|
||||
|
||||
if (state) {
|
||||
for (i = 0; i < STATE_LENGTH; i++)
|
||||
paramList->Parameters[oldNum].StateIndexes[i] = state[i];
|
||||
}
|
||||
|
||||
return (GLint) oldNum;
|
||||
size -= 4;
|
||||
}
|
||||
|
||||
if (state) {
|
||||
for (i = 0; i < STATE_LENGTH; i++)
|
||||
paramList->Parameters[oldNum].StateIndexes[i] = state[i];
|
||||
}
|
||||
|
||||
return (GLint) oldNum;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1124,14 +1124,15 @@ void st_init_extensions(struct pipe_screen *screen,
|
|||
if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
|
||||
uint64_t grid_size[3], block_size[3];
|
||||
|
||||
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE,
|
||||
grid_size);
|
||||
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE,
|
||||
block_size);
|
||||
screen->get_compute_param(screen,
|
||||
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size);
|
||||
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size);
|
||||
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
|
||||
&consts->MaxComputeWorkGroupInvocations);
|
||||
screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
|
||||
screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI,
|
||||
PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE,
|
||||
&consts->MaxComputeSharedMemorySize);
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
|
|
|
|||
|
|
@ -5935,6 +5935,20 @@ find_array(unsigned attr, struct array_decl *arrays, unsigned count,
|
|||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_compute_block_size(const struct gl_program *program,
|
||||
struct ureg_program *ureg) {
|
||||
const struct gl_compute_program *cp =
|
||||
(const struct gl_compute_program *)program;
|
||||
|
||||
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
|
||||
cp->LocalSize[0]);
|
||||
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
|
||||
cp->LocalSize[1]);
|
||||
ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
|
||||
cp->LocalSize[2]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
|
||||
* \param program the program to translate
|
||||
|
|
@ -6180,6 +6194,10 @@ st_translate_program(
|
|||
}
|
||||
}
|
||||
|
||||
if (procType == TGSI_PROCESSOR_COMPUTE) {
|
||||
emit_compute_block_size(proginfo, ureg);
|
||||
}
|
||||
|
||||
/* Declare address register.
|
||||
*/
|
||||
if (program->num_address_regs > 0) {
|
||||
|
|
|
|||
|
|
@ -1463,6 +1463,7 @@ st_translate_compute_program(struct st_context *st,
|
|||
st_translate_program_common(st, &stcp->Base.Base, stcp->glsl_to_tgsi, ureg,
|
||||
TGSI_PROCESSOR_COMPUTE, &prog);
|
||||
|
||||
stcp->tgsi.ir_type = PIPE_SHADER_IR_TGSI;
|
||||
stcp->tgsi.prog = prog.tokens;
|
||||
stcp->tgsi.req_local_mem = stcp->Base.SharedSize;
|
||||
stcp->tgsi.req_private_mem = 0;
|
||||
|
|
|
|||
|
|
@ -1739,7 +1739,7 @@ vbo_initialize_exec_dispatch(const struct gl_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
if (_mesa_is_desktop_gl(ctx)) {
|
||||
if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) {
|
||||
SET_DrawArraysInstancedBaseInstance(exec, vbo_exec_DrawArraysInstancedBaseInstance);
|
||||
SET_DrawElementsInstancedBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseInstance);
|
||||
SET_DrawElementsInstancedBaseVertexBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseVertexBaseInstance);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue