mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
Merge remote-tracking branch 'mesa-public/master' into vulkan
This pulls in tessellation and the store_var changes that go with it.
This commit is contained in:
commit
ea77b384e8
223 changed files with 5978 additions and 1468 deletions
25
configure.ac
25
configure.ac
|
|
@ -72,8 +72,8 @@ LIBDRM_REQUIRED=2.4.60
|
|||
LIBDRM_RADEON_REQUIRED=2.4.56
|
||||
LIBDRM_AMDGPU_REQUIRED=2.4.63
|
||||
LIBDRM_INTEL_REQUIRED=2.4.61
|
||||
LIBDRM_NVVIEUX_REQUIRED=2.4.33
|
||||
LIBDRM_NOUVEAU_REQUIRED=2.4.62
|
||||
LIBDRM_NVVIEUX_REQUIRED=2.4.66
|
||||
LIBDRM_NOUVEAU_REQUIRED=2.4.66
|
||||
LIBDRM_FREEDRENO_REQUIRED=2.4.65
|
||||
DRI2PROTO_REQUIRED=2.6
|
||||
DRI3PROTO_REQUIRED=1.0
|
||||
|
|
@ -98,8 +98,7 @@ AC_PROG_CXX
|
|||
AM_PROG_CC_C_O
|
||||
AM_PROG_AS
|
||||
AX_CHECK_GNU_MAKE
|
||||
AC_CHECK_PROGS([PYTHON2], [python2 python])
|
||||
AC_CHECK_PROGS([PYTHON3], [python3])
|
||||
AC_CHECK_PROGS([PYTHON2], [python2.7 python2 python])
|
||||
AC_PROG_SED
|
||||
AC_PROG_MKDIR_P
|
||||
|
||||
|
|
@ -384,10 +383,11 @@ save_CFLAGS="$CFLAGS"
|
|||
CFLAGS="$SSE41_CFLAGS $CFLAGS"
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
#include <smmintrin.h>
|
||||
int param;
|
||||
int main () {
|
||||
__m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
|
||||
__m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
|
||||
c = _mm_max_epu32(a, b);
|
||||
return 0;
|
||||
return _mm_cvtsi128_si32(c);
|
||||
}]])], SSE41_SUPPORTED=1)
|
||||
CFLAGS="$save_CFLAGS"
|
||||
if test "x$SSE41_SUPPORTED" = x1; then
|
||||
|
|
@ -1715,7 +1715,15 @@ AC_ARG_WITH([clang-libdir],
|
|||
[CLANG_LIBDIR=''])
|
||||
|
||||
PKG_CHECK_EXISTS([libclc], [have_libclc=yes], [have_libclc=no])
|
||||
AC_CHECK_LIB([elf], [elf_memory], [have_libelf=yes;ELF_LIB=-lelf])
|
||||
PKG_CHECK_MODULES([LIBELF], [libelf], [have_libelf=yes], [have_libelf=no])
|
||||
|
||||
if test "x$have_libelf" = xno; then
|
||||
LIBELF_LIBS=''
|
||||
LIBELF_CFLAGS=''
|
||||
AC_CHECK_LIB([elf], [elf_memory], [have_libelf=yes;LIBELF_LIBS=-lelf], [have_libelf=no])
|
||||
AC_SUBST([LIBELF_LIBS])
|
||||
AC_SUBST([LIBELF_CFLAGS])
|
||||
fi
|
||||
|
||||
if test "x$enable_opencl" = xyes; then
|
||||
if test -z "$with_gallium_drivers"; then
|
||||
|
|
@ -2302,8 +2310,6 @@ if test "x$USE_VC4_SIMULATOR" = xyes -a "x$HAVE_GALLIUM_ILO" = xyes; then
|
|||
AC_MSG_ERROR([VC4 simulator on x86 replaces i965 driver build, so ilo must be disabled.])
|
||||
fi
|
||||
|
||||
AC_SUBST([ELF_LIB])
|
||||
|
||||
AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes)
|
||||
AM_CONDITIONAL(HAVE_X11_DRIVER, test "x$enable_xlib_glx" = xyes)
|
||||
AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
|
||||
|
|
@ -2584,7 +2590,6 @@ if test "x$MESA_LLVM" = x1; then
|
|||
echo ""
|
||||
fi
|
||||
echo " PYTHON2: $PYTHON2"
|
||||
echo " PYTHON3: $PYTHON3"
|
||||
|
||||
echo ""
|
||||
echo " Run '${MAKE-make}' to build Mesa"
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, r600, radeonsi
|
|||
GL_ARB_gpu_shader_fp64 DONE (llvmpipe, softpipe)
|
||||
GL_ARB_sample_shading DONE (i965, nv50)
|
||||
GL_ARB_shader_subroutine DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_tessellation_shader DONE ()
|
||||
GL_ARB_tessellation_shader DONE (i965/gen8+)
|
||||
GL_ARB_texture_buffer_object_rgb32 DONE (i965, llvmpipe, softpipe)
|
||||
GL_ARB_texture_cube_map_array DONE (i965, nv50, llvmpipe, softpipe)
|
||||
GL_ARB_texture_gather DONE (i965, nv50, llvmpipe, softpipe)
|
||||
|
|
@ -184,7 +184,7 @@ GL 4.4, GLSL 4.40:
|
|||
- forced alignment within blocks in progress
|
||||
- specified vec4-slot component numbers in progress
|
||||
- specified transform/feedback layout in progress
|
||||
- input/output block locations in progress
|
||||
- input/output block locations DONE
|
||||
GL_ARB_multi_bind DONE (all drivers)
|
||||
GL_ARB_query_buffer_object not started
|
||||
GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
|
||||
|
|
@ -233,7 +233,7 @@ GLES3.1, GLSL ES 3.1
|
|||
glMemoryBarrierByRegion DONE
|
||||
glGetTexLevelParameter[fi]v - needs updates DONE
|
||||
glGetBooleani_v - restrict to GLES enums
|
||||
gl_HelperInvocation support
|
||||
gl_HelperInvocation support DONE (i965, nvc0, r600)
|
||||
|
||||
GLES3.2, GLSL ES 3.2
|
||||
GL_EXT_color_buffer_float DONE (all drivers)
|
||||
|
|
|
|||
|
|
@ -96,8 +96,7 @@
|
|||
<br>
|
||||
<blockquote>
|
||||
<a href="http://sourceforge.net"
|
||||
target="_parent"><img src="http://sourceforge.net/sflogo.php?group_id=3&type=1"
|
||||
width="88" height="31" align="bottom" alt="Sourceforge.net" border="0"></a>
|
||||
target="_parent">sourceforge.net</a>
|
||||
</blockquote>
|
||||
|
||||
</body>
|
||||
|
|
|
|||
|
|
@ -16,6 +16,19 @@
|
|||
|
||||
<h1>News</h1>
|
||||
|
||||
<h2>December 21, 2015</h2>
|
||||
<p>
|
||||
<a href="relnotes/11.0.8.html">Mesa 11.0.8</a> is released.
|
||||
This is a bug-fix release.
|
||||
</p>
|
||||
|
||||
<h2>December 15, 2015</h2>
|
||||
<p>
|
||||
<a href="relnotes/11.1.0.html">Mesa 11.1.0</a> is released. This is a new
|
||||
development release. See the release notes for more information about
|
||||
the release.
|
||||
</p>
|
||||
|
||||
<h2>December 9, 2015</h2>
|
||||
<p>
|
||||
<a href="relnotes/11.0.7.html">Mesa 11.0.7</a> is released.
|
||||
|
|
|
|||
|
|
@ -21,6 +21,8 @@ The release notes summarize what's new or changed in each Mesa release.
|
|||
</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="relnotes/11.0.8.html">11.0.8 release notes</a>
|
||||
<li><a href="relnotes/11.1.0.html">11.1.0 release notes</a>
|
||||
<li><a href="relnotes/11.0.7.html">11.0.7 release notes</a>
|
||||
<li><a href="relnotes/11.0.6.html">11.0.6 release notes</a>
|
||||
<li><a href="relnotes/11.0.5.html">11.0.5 release notes</a>
|
||||
|
|
|
|||
200
docs/relnotes/11.0.8.html
Normal file
200
docs/relnotes/11.0.8.html
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.8 Release Notes / December 9, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.8 is a bug fix release which fixes bugs found since the 11.0.7 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.0.8 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
ab9db87b54d7525e4b611b82577ea9a9eae55927558df57b190059d5ecd9406f mesa-11.0.8.tar.gz
|
||||
5696e4730518b6805d2ed5def393c4293f425a2c2c01bd5ed4bdd7ad62f7ad75 mesa-11.0.8.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91806">Bug 91806</a> - configure does not test whether assembler supports sse4.1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92849">Bug 92849</a> - [IVB HSW BDW] piglit image load/store load-from-cleared-image.shader_test fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92909">Bug 92909</a> - Offset/alignment issue with layout std140 and vec3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93004">Bug 93004</a> - Guild Wars 2 crash on nouveau DX11 cards</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93215">Bug 93215</a> - [Regression bisected] Ogles1conform Automatic mipmap generation test is fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93266">Bug 93266</a> - gl_arb_shading_language_420pack does not allow binding of image variables</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Boyuan Zhang (1):</p>
|
||||
<ul>
|
||||
<li>radeon/uvd: uv pitch separation for stoney</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (9):</p>
|
||||
<ul>
|
||||
<li>r600: do SQ flush ES ring rolling workaround</li>
|
||||
<li>r600: SMX returns CONTEXT_DONE early workaround</li>
|
||||
<li>r600/shader: split address get out to a function.</li>
|
||||
<li>r600/shader: add utility functions to do single slot arithmatic</li>
|
||||
<li>r600g: fix geom shader input indirect indexing.</li>
|
||||
<li>r600: handle geometry dynamic input array index</li>
|
||||
<li>radeonsi: handle doubles in lds load path.</li>
|
||||
<li>mesa/varray: set double arrays to non-normalised.</li>
|
||||
<li>mesa/shader: return correct attribute location for double matrix arrays</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (8):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 11.0.7</li>
|
||||
<li>cherry-ignore: don't pick a specific i965 formats patch</li>
|
||||
<li>Revert "i965/nir: Remove unused indirect handling"</li>
|
||||
<li>Revert "i965/state: Get rid of dword_pitch arguments to buffer functions"</li>
|
||||
<li>Revert "i965/vec4: Use a stride of 1 and byte offsets for UBOs"</li>
|
||||
<li>Revert "i965/fs: Use a stride of 1 and byte offsets for UBOs"</li>
|
||||
<li>Revert "i965/vec4: Use byte offsets for UBO pulls on Sandy Bridge"</li>
|
||||
<li>Update version to 11.0.8</li>
|
||||
</ul>
|
||||
|
||||
<p>Francisco Jerez (1):</p>
|
||||
<ul>
|
||||
<li>i965: Resolve color and flush for all active shader images in intel_update_state().</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (1):</p>
|
||||
<ul>
|
||||
<li>meta/generate_mipmap: Work-around GLES 1.x problem with GL_DRAW_FRAMEBUFFER</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (17):</p>
|
||||
<ul>
|
||||
<li>freedreno/a4xx: support lod_bias</li>
|
||||
<li>freedreno/a4xx: fix 5_5_5_1 texture sampler format</li>
|
||||
<li>freedreno/a4xx: point regid to "red" even for alpha-only rb formats</li>
|
||||
<li>nvc0/ir: fold postfactor into immediate</li>
|
||||
<li>nv50/ir: deal with loops with no breaks</li>
|
||||
<li>nv50/ir: the mad source might not have a defining instruction</li>
|
||||
<li>nv50/ir: fix instruction permutation logic</li>
|
||||
<li>nv50/ir: don't forget to mark flagsDef on cvt in txb lowering</li>
|
||||
<li>nv50/ir: fix DCE to not generate 96-bit loads</li>
|
||||
<li>nv50/ir: avoid looking at uninitialized srcMods entries</li>
|
||||
<li>gk110/ir: fix imul hi emission with limm arg</li>
|
||||
<li>gk104/ir: sampler doesn't matter for txf</li>
|
||||
<li>gk110/ir: fix imad sat/hi flag emission for immediate args</li>
|
||||
<li>nv50/ir: fix cutoff for using r63 vs r127 when replacing zero</li>
|
||||
<li>nv50/ir: can't have predication and immediates</li>
|
||||
<li>glsl: assign varying locations to tess shaders when doing SSO</li>
|
||||
<li>ttn: add TEX2 support</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (5):</p>
|
||||
<ul>
|
||||
<li>i965/vec4: Use byte offsets for UBO pulls on Sandy Bridge</li>
|
||||
<li>i965/fs: Use a stride of 1 and byte offsets for UBOs</li>
|
||||
<li>i965/vec4: Use a stride of 1 and byte offsets for UBOs</li>
|
||||
<li>i965/state: Get rid of dword_pitch arguments to buffer functions</li>
|
||||
<li>i965/nir: Remove unused indirect handling</li>
|
||||
</ul>
|
||||
|
||||
<p>Jonathan Gray (2):</p>
|
||||
<ul>
|
||||
<li>configure.ac: use pkg-config for libelf</li>
|
||||
<li>configure: check for python2.7 for PYTHON2</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (2):</p>
|
||||
<ul>
|
||||
<li>i965: Fix fragment shader struct inputs.</li>
|
||||
<li>i965: Fix scalar vertex shader struct outputs.</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (8):</p>
|
||||
<ul>
|
||||
<li>radeonsi: fix occlusion queries on Fiji</li>
|
||||
<li>radeonsi: fix a hang due to uninitialized border color registers</li>
|
||||
<li>radeonsi: fix Fiji for LLVM <= 3.7</li>
|
||||
<li>radeonsi: don't call of u_prims_for_vertices for patches and rectangles</li>
|
||||
<li>radeonsi: apply the streamout workaround to Fiji as well</li>
|
||||
<li>gallium/radeon: fix Hyper-Z hangs by programming PA_SC_MODE_CNTL_1 correctly</li>
|
||||
<li>tgsi/scan: add flag colors_written</li>
|
||||
<li>r600g: write all MRTs only if there is exactly one output (fixes a hang)</li>
|
||||
</ul>
|
||||
|
||||
<p>Matt Turner (1):</p>
|
||||
<ul>
|
||||
<li>glsl: Allow binding of image variables with 420pack.</li>
|
||||
</ul>
|
||||
|
||||
<p>Neil Roberts (2):</p>
|
||||
<ul>
|
||||
<li>i965: Add MESA_FORMAT_B8G8R8X8_SRGB to brw_format_for_mesa_format</li>
|
||||
<li>i965: Add B8G8R8X8_SRGB to the alpha format override</li>
|
||||
</ul>
|
||||
|
||||
<p>Oded Gabbay (1):</p>
|
||||
<ul>
|
||||
<li>configura.ac: fix test for SSE4.1 assembler support</li>
|
||||
</ul>
|
||||
|
||||
<p>Patrick Rudolph (2):</p>
|
||||
<ul>
|
||||
<li>nv50,nvc0: fix use-after-free when vertex buffers are unbound</li>
|
||||
<li>gallium/util: return correct number of bound vertex buffers</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (1):</p>
|
||||
<ul>
|
||||
<li>nvc0: free memory allocated by the prog which reads MP perf counters</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (1):</p>
|
||||
<ul>
|
||||
<li>i965: use _Shader to get fragment program when updating surface state</li>
|
||||
</ul>
|
||||
|
||||
<p>Tom Stellard (2):</p>
|
||||
<ul>
|
||||
<li>radeonsi: Rename si_shader::ls_rsrc{1,2} to si_shader::rsrc{1,2}</li>
|
||||
<li>radeonsi/compute: Use the compiler's COMPUTE_PGM_RSRC* register values</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.1.0 Release Notes / TBD</h1>
|
||||
<h1>Mesa 11.1.0 Release Notes / 15 December 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.1.0 is a new development release.
|
||||
|
|
@ -33,7 +33,8 @@ because compatibility contexts are not supported.
|
|||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD.
|
||||
e3bc44be4df5e4dc728dfda7b55b1aaeadfce36eca6a367b76cc07598070cb2d mesa-11.1.0.tar.gz
|
||||
9befe03b04223eb1ede177fa8cac001e2850292c8c12a3ec9929106afad9cf1f mesa-11.1.0.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
|
@ -84,11 +85,196 @@ Note: some of the new features are only available with certain drivers.
|
|||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
TBD.
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28130">Bug 28130</a> - vbo: premature flushing breaks GL_LINE_LOOP</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=49779">Bug 49779</a> - Extra line segments in GL_LINE_LOOP</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=55552">Bug 55552</a> - Compile errors with --enable-mangling</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71789">Bug 71789</a> - [r300g] Visuals not found in (default) depth = 24</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79783">Bug 79783</a> - Distorted output in obs-studio where other vendors "work"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80821">Bug 80821</a> - When LIBGL_ALWAYS_SOFTWARE is set, KHR_create_context is not supported</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81174">Bug 81174</a> - Gallium: GL_LINE_LOOP broken with more than 512 points</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83508">Bug 83508</a> - [UBO] Assertion for array of blocks</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw@entry=0x7fffd4097a08, fb=fb@entry=0x7fffd40fa900, buffers=buffers@entry=2, partial_clear=partial_clear@entry=false)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86469">Bug 86469</a> - Unreal Engine demo doesn't run</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86720">Bug 86720</a> - [radeon] Europa Universalis 4 freezing during game start (10.3.3+, still broken on 11.0.2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89014">Bug 89014</a> - PIPE_QUERY_GPU_FINISHED is not acting as expected on SI</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90175">Bug 90175</a> - [hsw bisected][PATCH] atomic counters doesn't work for a binding point different to zero</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90348">Bug 90348</a> - Spilling failure of b96 merged value</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90631">Bug 90631</a> - Compilation failure for fragment shader with many branches on Sandy Bridge</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is > 32k</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91044">Bug 91044</a> - piglit spec/egl_khr_create_context/valid debug flag gles* fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91114">Bug 91114</a> - ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91716">Bug 91716</a> - [bisected] piglit.shaders.glsl-vs-int-attrib regresses on 32 bit BYT, HSW, IVB, SNB</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91718">Bug 91718</a> - piglit.spec.arb_shader_image_load_store.invalid causes intermittent GPU HANG</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91780">Bug 91780</a> - Rendering issues with geometry shader</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91785">Bug 91785</a> - make check DispatchSanity_test.GLES31 regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91788">Bug 91788</a> - [HSW Regression] Synmark2_v6 Multithread performance case FPS reduced by 36%</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image & DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91898">Bug 91898</a> - src/util/mesa-sha1.c:250:25: fatal error: openssl/sha.h: No such file or directory</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91927">Bug 91927</a> - [SKL] [regression] piglit compressed textures tests fail with kernel upgrade</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91930">Bug 91930</a> - Program with GtkGLArea widget does not redraw</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91985">Bug 91985</a> - [regression, bisected] FTBFS with commit f9caabe8f1: R600_UCP_CONST_BUFFER is undefined</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91993">Bug 91993</a> - Graphical glitch in Astromenace (open-source game).</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92009">Bug 92009</a> - ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92033">Bug 92033</a> - [SNB,regression,dEQP,bisected] functional.shaders.random tests regressed</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92052">Bug 92052</a> - nir/nir_builder.h:79: error: expected primary-expression before ‘.’ token</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92054">Bug 92054</a> - make check gbm-symbols-check regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92066">Bug 92066</a> - [ILK,G45,regression] New assertion on BRW_MAX_MRF breaks ilk and g45</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92072">Bug 92072</a> - Wine breakage since d082c5324 (st/mesa: don't call st_validate_state in BlitFramebuffer)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92095">Bug 92095</a> - [Regression, bisected] arb_shader_atomic_counters.compiler.builtins.frag</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92122">Bug 92122</a> - [bisected, cts] Regression with Assault Android Cactus</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92124">Bug 92124</a> - shader_query.cpp:841:34: error: ‘strndup’ was not declared in this scope</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92183">Bug 92183</a> - linker.cpp:3187:46: error: ‘strtok_r’ was not declared in this scope</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92193">Bug 92193</a> - [SKL] ES2-CTS.gtf.GL2ExtensionTests.compressed_astc_texture.compressed_astc_texture fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92214">Bug 92214</a> - Flightgear crashes during splashboot with R600 driver, LLVM 3.7.0 and mesa 11.0.2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92221">Bug 92221</a> - Unintended code changes in _mesa_base_tex_format commit</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92265">Bug 92265</a> - Black windows in weston after update mesa to 11.0.2-1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92304">Bug 92304</a> - [cts] cts.shaders.negative conformance tests fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92363">Bug 92363</a> - [BSW/BDW] ogles1conform Gets test fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92437">Bug 92437</a> - osmesa: Expose GL entry points for Windows build, via .def file</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92438">Bug 92438</a> - Segfault in pushbuf_kref when running the android emulator (qemu) on nv50</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92476">Bug 92476</a> - [cts] ES2-CTS.gtf.GL2ExtensionTests.egl_image.egl_image fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92588">Bug 92588</a> - [HSW,BDW,BSW,SKL-Y][GLES 3.1 CTS] ES31-CTS.arrays_of_arrays.InteractionFunctionCalls2 - assert</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92621">Bug 92621</a> - [G965 ILK G45] Regression: 24 piglit regressions in glsl-1.10</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92623">Bug 92623</a> - Differences in prog_data ignored when caching fragment programs (causes hangs)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92634">Bug 92634</a> - gallium's vl_mpeg12_decoder does not work with st/va</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92639">Bug 92639</a> - [Regression bisected] Ogles1conform mustpass.c fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92641">Bug 92641</a> - [SKL BSW] [Regression] Ogles1conform userclip.c fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92645">Bug 92645</a> - kodi vdpau interop fails since mesa,meta: move gl_texture_object::TargetIndex initializations</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92705">Bug 92705</a> - [clover] fail to build with llvm-svn/clang-svn 3.8</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92709">Bug 92709</a> - "LLVM triggered Diagnostic Handler: unsupported call to function ldexpf in main" when starting race in stuntrally</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92738">Bug 92738</a> - Randon R7 240 doesn't work on 16KiB page size platform</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92744">Bug 92744</a> - [g965 Regression bisected] Performance regression and piglit assertions due to liveness analysis</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92770">Bug 92770</a> - [SNB, regression, dEQP] deqp-gles3.functional.shaders.discard.dynamic_loop_texture</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92824">Bug 92824</a> - [regression, bisected] `make check` dispatch-sanity broken by GL_EXT_buffer_storage</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92849">Bug 92849</a> - [IVB HSW BDW] piglit image load/store load-from-cleared-image.shader_test fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92859">Bug 92859</a> - [regression, bisected] validate_intrinsic_instr: Assertion triggered</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92860">Bug 92860</a> - [radeonsi][bisected] st/mesa: implement ARB_copy_image - Corruption in ARK Survival Evolved</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92900">Bug 92900</a> - [regression bisected] About 700 piglit regressions is what could go wrong</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92909">Bug 92909</a> - Offset/alignment issue with layout std140 and vec3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92985">Bug 92985</a> - Mac OS X build error "ar: no archive members specified"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93015">Bug 93015</a> - Tonga Elemental segfault + VM faults since radeon: implement r600_query_hw_get_result via function pointers</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93048">Bug 93048</a> - [CTS regression] mesa af2723 breaks GL Conformance for debug extension</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93063">Bug 93063</a> - drm_helper.h:227:1: error: static declaration of ‘pipe_virgl_create_screen’ follows non-static declaration</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93091">Bug 93091</a> - [opencl] segfault when running any opencl programs (like clinfo)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93126">Bug 93126</a> - wrongly claim supporting GL_EXT_texture_rg</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93180">Bug 93180</a> - [regression] arb_separate_shader_objects.active sampler conflict fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93235">Bug 93235</a> - [regression] dispatch sanity broken by GetPointerv</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93266">Bug 93266</a> - gl_arb_shading_language_420pack does not allow binding of image variables</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
TBD.
|
||||
<li>MPEG4 decoding has been disabled by default in the VAAPI driver</li>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_base_instance on freedreno/a4xx</li>
|
||||
<li>GL_ARB_compute_shader on i965</li>
|
||||
<li>GL_ARB_copy_image on r600</li>
|
||||
<li>GL_ARB_tessellation_shader on r600 (evergreen/cayman only)</li>
|
||||
<li>GL_ARB_tessellation_shader on i965/gen8+ and r600 (evergreen/cayman only)</li>
|
||||
<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
|
||||
<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>
|
||||
<li>GL_ARB_texture_query_lod on freedreno/a4xx</li>
|
||||
|
|
@ -56,6 +56,8 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx</li>
|
||||
<li>GL_KHR_texture_compression_astc_ldr on freedreno/a4xx</li>
|
||||
<li>GL_AMD_performance_monitor on radeonsi (CIK+ only)</li>
|
||||
<li>New OSMesaCreateContextAttribs() function (for creating core profile
|
||||
contexts)</li>
|
||||
</ul>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
|
|
|||
|
|
@ -42,9 +42,7 @@ Tungsten Graphics, Inc. have supported the ongoing development of Mesa.
|
|||
<li>The
|
||||
<a href="http://www.mesa3d.org">Mesa</a>
|
||||
website is hosted by
|
||||
<a href="http://sourceforge.net">
|
||||
<img src="http://sourceforge.net/sflogo.php?group_id=3&type=1"
|
||||
width="88" height="31" align="bottom" alt="Sourceforge.net" border="0"></a>
|
||||
<a href="http://sourceforge.net">sourceforge.net</a>.
|
||||
<br>
|
||||
<br>
|
||||
|
||||
|
|
|
|||
|
|
@ -58,8 +58,8 @@ extern "C" {
|
|||
#include <GL/gl.h>
|
||||
|
||||
|
||||
#define OSMESA_MAJOR_VERSION 10
|
||||
#define OSMESA_MINOR_VERSION 0
|
||||
#define OSMESA_MAJOR_VERSION 11
|
||||
#define OSMESA_MINOR_VERSION 2
|
||||
#define OSMESA_PATCH_VERSION 0
|
||||
|
||||
|
||||
|
|
@ -95,6 +95,18 @@ extern "C" {
|
|||
#define OSMESA_MAX_WIDTH 0x24 /* new in 4.0 */
|
||||
#define OSMESA_MAX_HEIGHT 0x25 /* new in 4.0 */
|
||||
|
||||
/*
|
||||
* Accepted in OSMesaCreateContextAttrib's attribute list.
|
||||
*/
|
||||
#define OSMESA_DEPTH_BITS 0x30
|
||||
#define OSMESA_STENCIL_BITS 0x31
|
||||
#define OSMESA_ACCUM_BITS 0x32
|
||||
#define OSMESA_PROFILE 0x33
|
||||
#define OSMESA_CORE_PROFILE 0x34
|
||||
#define OSMESA_COMPAT_PROFILE 0x35
|
||||
#define OSMESA_CONTEXT_MAJOR_VERSION 0x36
|
||||
#define OSMESA_CONTEXT_MINOR_VERSION 0x37
|
||||
|
||||
|
||||
typedef struct osmesa_context *OSMesaContext;
|
||||
|
||||
|
|
@ -127,6 +139,35 @@ OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits,
|
|||
GLint accumBits, OSMesaContext sharelist);
|
||||
|
||||
|
||||
/*
|
||||
* Create an Off-Screen Mesa rendering context with attribute list.
|
||||
* The list is composed of (attribute, value) pairs and terminated with
|
||||
* attribute==0. Supported Attributes:
|
||||
*
|
||||
* Attributes Values
|
||||
* --------------------------------------------------------------------------
|
||||
* OSMESA_FORMAT OSMESA_RGBA*, OSMESA_BGRA, OSMESA_ARGB, etc.
|
||||
* OSMESA_DEPTH_BITS 0*, 16, 24, 32
|
||||
* OSMESA_STENCIL_BITS 0*, 8
|
||||
* OSMESA_ACCUM_BITS 0*, 16
|
||||
* OSMESA_PROFILE OSMESA_COMPAT_PROFILE*, OSMESA_CORE_PROFILE
|
||||
* OSMESA_CONTEXT_MAJOR_VERSION 1*, 2, 3
|
||||
* OSMESA_CONTEXT_MINOR_VERSION 0+
|
||||
*
|
||||
* Note: * = default value
|
||||
*
|
||||
* We return a context version >= what's specified by OSMESA_CONTEXT_MAJOR/
|
||||
* MINOR_VERSION for the given profile. For example, if you request a GL 1.4
|
||||
* compat profile, you might get a GL 3.0 compat profile.
|
||||
* Otherwise, null is returned if the version/profile is not supported.
|
||||
*
|
||||
* New in Mesa 11.2
|
||||
*/
|
||||
GLAPI OSMesaContext GLAPIENTRY
|
||||
OSMesaCreateContextAttribs( const int *attribList, OSMesaContext sharelist );
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Destroy an Off-Screen Mesa rendering context.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
include Makefile.sources
|
||||
include $(top_srcdir)/src/gallium/Automake.inc
|
||||
|
||||
noinst_LTLIBRARIES = libgallium.la
|
||||
noinst_LTLIBRARIES = libgallium_nir.la
|
||||
|
||||
AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src/loader \
|
||||
-I$(top_builddir)/src/glsl/nir \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary/util \
|
||||
$(GALLIUM_CFLAGS) \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
|
|
@ -15,11 +14,24 @@ AM_CXXFLAGS = \
|
|||
$(VISIBILITY_CXXFLAGS) \
|
||||
$(MSVC2008_COMPAT_CXXFLAGS)
|
||||
|
||||
libgallium_nir_la_SOURCES = \
|
||||
$(NIR_SOURCES)
|
||||
|
||||
libgallium_nir_la_CFLAGS = \
|
||||
-I$(top_builddir)/src/glsl/nir \
|
||||
$(GALLIUM_CFLAGS) \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(MSVC2013_COMPAT_CFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES += libgallium.la
|
||||
|
||||
libgallium_la_SOURCES = \
|
||||
$(C_SOURCES) \
|
||||
$(NIR_SOURCES) \
|
||||
$(GENERATED_SOURCES)
|
||||
|
||||
libgallium_la_LIBADD = \
|
||||
libgallium_nir.la
|
||||
|
||||
if HAVE_MESA_LLVM
|
||||
|
||||
AM_CFLAGS += \
|
||||
|
|
|
|||
|
|
@ -91,34 +91,34 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
|
|||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
out->clip[i] = clipvertex[i];
|
||||
out->pre_clip_pos[i] = position[i];
|
||||
out->clip_pos[i] = position[i];
|
||||
}
|
||||
|
||||
/* Be careful with NaNs. Comparisons must be true for them. */
|
||||
/* Do the hardwired planes first:
|
||||
*/
|
||||
if (flags & DO_CLIP_XY_GUARD_BAND) {
|
||||
if (-0.50 * position[0] + position[3] < 0) mask |= (1<<0);
|
||||
if ( 0.50 * position[0] + position[3] < 0) mask |= (1<<1);
|
||||
if (-0.50 * position[1] + position[3] < 0) mask |= (1<<2);
|
||||
if ( 0.50 * position[1] + position[3] < 0) mask |= (1<<3);
|
||||
if (!(-0.50 * position[0] + position[3] >= 0)) mask |= (1<<0);
|
||||
if (!( 0.50 * position[0] + position[3] >= 0)) mask |= (1<<1);
|
||||
if (!(-0.50 * position[1] + position[3] >= 0)) mask |= (1<<2);
|
||||
if (!( 0.50 * position[1] + position[3] >= 0)) mask |= (1<<3);
|
||||
}
|
||||
else if (flags & DO_CLIP_XY) {
|
||||
if (-position[0] + position[3] < 0) mask |= (1<<0);
|
||||
if ( position[0] + position[3] < 0) mask |= (1<<1);
|
||||
if (-position[1] + position[3] < 0) mask |= (1<<2);
|
||||
if ( position[1] + position[3] < 0) mask |= (1<<3);
|
||||
if (!(-position[0] + position[3] >= 0)) mask |= (1<<0);
|
||||
if (!( position[0] + position[3] >= 0)) mask |= (1<<1);
|
||||
if (!(-position[1] + position[3] >= 0)) mask |= (1<<2);
|
||||
if (!( position[1] + position[3] >= 0)) mask |= (1<<3);
|
||||
}
|
||||
|
||||
/* Clip Z planes according to full cube, half cube or none.
|
||||
*/
|
||||
if (flags & DO_CLIP_FULL_Z) {
|
||||
if ( position[2] + position[3] < 0) mask |= (1<<4);
|
||||
if (-position[2] + position[3] < 0) mask |= (1<<5);
|
||||
if (!( position[2] + position[3] >= 0)) mask |= (1<<4);
|
||||
if (!(-position[2] + position[3] >= 0)) mask |= (1<<5);
|
||||
}
|
||||
else if (flags & DO_CLIP_HALF_Z) {
|
||||
if ( position[2] < 0) mask |= (1<<4);
|
||||
if (-position[2] + position[3] < 0) mask |= (1<<5);
|
||||
if (!( position[2] >= 0)) mask |= (1<<4);
|
||||
if (!(-position[2] + position[3] >= 0)) mask |= (1<<5);
|
||||
}
|
||||
|
||||
if (flags & DO_CLIP_USER) {
|
||||
|
|
@ -137,7 +137,6 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
|
|||
if (have_cd && num_written_clipdistance) {
|
||||
float clipdist;
|
||||
i = plane_idx - 6;
|
||||
out->have_clipdist = 1;
|
||||
/* first four clip distance in first vector etc. */
|
||||
if (i < 4)
|
||||
clipdist = out->data[cd[0]][i];
|
||||
|
|
@ -146,7 +145,7 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
|
|||
if (clipdist < 0 || util_is_inf_or_nan(clipdist))
|
||||
mask |= 1 << plane_idx;
|
||||
} else {
|
||||
if (dot4(clipvertex, plane[plane_idx]) < 0)
|
||||
if (!(dot4(clipvertex, plane[plane_idx]) >= 0))
|
||||
mask |= 1 << plane_idx;
|
||||
}
|
||||
}
|
||||
|
|
@ -192,7 +191,6 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
|
|||
|
||||
out = (struct vertex_header *)( (char *)out + info->stride );
|
||||
}
|
||||
|
||||
return need_pipeline != 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -188,6 +188,7 @@ create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
|
|||
sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
|
||||
Elements(elem_types), 0);
|
||||
|
||||
(void) target; /* silence unused var warning for non-debug build */
|
||||
LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
|
||||
target, sampler_type,
|
||||
DRAW_JIT_SAMPLER_MIN_LOD);
|
||||
|
|
@ -234,6 +235,8 @@ create_jit_context_type(struct gallivm_state *gallivm,
|
|||
PIPE_MAX_SAMPLERS); /* samplers */
|
||||
context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
|
||||
Elements(elem_types), 0);
|
||||
|
||||
(void) target; /* silence unused var warning for non-debug build */
|
||||
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
|
||||
target, context_type, DRAW_JIT_CTX_CONSTANTS);
|
||||
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_constants,
|
||||
|
|
@ -375,15 +378,14 @@ static LLVMTypeRef
|
|||
create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
|
||||
{
|
||||
LLVMTargetDataRef target = gallivm->target;
|
||||
LLVMTypeRef elem_types[4];
|
||||
LLVMTypeRef elem_types[3];
|
||||
LLVMTypeRef vertex_header;
|
||||
char struct_name[24];
|
||||
|
||||
util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
|
||||
|
||||
elem_types[DRAW_JIT_VERTEX_VERTEX_ID] = LLVMIntTypeInContext(gallivm->context, 32);
|
||||
elem_types[DRAW_JIT_VERTEX_CLIP] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
|
||||
elem_types[DRAW_JIT_VERTEX_PRE_CLIP_POS] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
|
||||
elem_types[DRAW_JIT_VERTEX_CLIP_POS] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
|
||||
elem_types[DRAW_JIT_VERTEX_DATA] = LLVMArrayType(elem_types[1], data_elems);
|
||||
|
||||
vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
|
||||
|
|
@ -403,12 +405,10 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
|
|||
target, vertex_header,
|
||||
DRAW_JIT_VERTEX_VERTEX_ID);
|
||||
*/
|
||||
LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
|
||||
(void) target; /* silence unused var warning for non-debug build */
|
||||
LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip_pos,
|
||||
target, vertex_header,
|
||||
DRAW_JIT_VERTEX_CLIP);
|
||||
LP_CHECK_MEMBER_OFFSET(struct vertex_header, pre_clip_pos,
|
||||
target, vertex_header,
|
||||
DRAW_JIT_VERTEX_PRE_CLIP_POS);
|
||||
DRAW_JIT_VERTEX_CLIP_POS);
|
||||
LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
|
||||
target, vertex_header,
|
||||
DRAW_JIT_VERTEX_DATA);
|
||||
|
|
@ -826,7 +826,7 @@ store_aos(struct gallivm_state *gallivm,
|
|||
* struct vertex_header {
|
||||
* unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
|
||||
* unsigned edgeflag:1;
|
||||
* unsigned have_clipdist:1;
|
||||
* unsigned pad:1;
|
||||
* unsigned vertex_id:16;
|
||||
* [...]
|
||||
* }
|
||||
|
|
@ -838,7 +838,7 @@ store_aos(struct gallivm_state *gallivm,
|
|||
* {
|
||||
* return (x >> 16) | // vertex_id
|
||||
* ((x & 0x3fff) << 18) | // clipmask
|
||||
* ((x & 0x4000) << 3) | // have_clipdist
|
||||
* ((x & 0x4000) << 3) | // pad
|
||||
* ((x & 0x8000) << 1); // edgeflag
|
||||
* }
|
||||
*/
|
||||
|
|
@ -850,19 +850,23 @@ adjust_mask(struct gallivm_state *gallivm,
|
|||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef vertex_id;
|
||||
LLVMValueRef clipmask;
|
||||
LLVMValueRef have_clipdist;
|
||||
LLVMValueRef pad;
|
||||
LLVMValueRef edgeflag;
|
||||
|
||||
vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
|
||||
clipmask = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
|
||||
clipmask = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
|
||||
have_clipdist = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
|
||||
have_clipdist = LLVMBuildShl(builder, have_clipdist, lp_build_const_int32(gallivm, 3), "");
|
||||
if (0) {
|
||||
pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
|
||||
pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 3), "");
|
||||
}
|
||||
edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
|
||||
edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 1), "");
|
||||
|
||||
mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
|
||||
mask = LLVMBuildOr(builder, mask, have_clipdist, "");
|
||||
if (0) {
|
||||
mask = LLVMBuildOr(builder, mask, pad, "");
|
||||
}
|
||||
mask = LLVMBuildOr(builder, mask, edgeflag, "");
|
||||
#endif
|
||||
return mask;
|
||||
|
|
@ -877,7 +881,7 @@ store_aos_array(struct gallivm_state *gallivm,
|
|||
int attrib,
|
||||
int num_outputs,
|
||||
LLVMValueRef clipmask,
|
||||
boolean have_clipdist)
|
||||
boolean need_edgeflag)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
|
||||
|
|
@ -908,11 +912,15 @@ store_aos_array(struct gallivm_state *gallivm,
|
|||
* code here. See struct vertex_header in draw_private.h.
|
||||
*/
|
||||
assert(DRAW_TOTAL_CLIP_PLANES==14);
|
||||
/* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
|
||||
vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
|
||||
if (have_clipdist)
|
||||
vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
|
||||
val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type), vertex_id_pad_edgeflag);
|
||||
/* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
|
||||
if (!need_edgeflag) {
|
||||
vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
|
||||
}
|
||||
else {
|
||||
vertex_id_pad_edgeflag = (0xffff << 16);
|
||||
}
|
||||
val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
|
||||
vertex_id_pad_edgeflag);
|
||||
/* OR with the clipmask */
|
||||
cliptmp = LLVMBuildOr(builder, val, clipmask, "");
|
||||
for (i = 0; i < vector_length; i++) {
|
||||
|
|
@ -942,7 +950,7 @@ convert_to_aos(struct gallivm_state *gallivm,
|
|||
LLVMValueRef clipmask,
|
||||
int num_outputs,
|
||||
struct lp_type soa_type,
|
||||
boolean have_clipdist)
|
||||
boolean need_edgeflag)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
unsigned chan, attrib, i;
|
||||
|
|
@ -998,7 +1006,8 @@ convert_to_aos(struct gallivm_state *gallivm,
|
|||
aos,
|
||||
attrib,
|
||||
num_outputs,
|
||||
clipmask, have_clipdist);
|
||||
clipmask,
|
||||
need_edgeflag);
|
||||
}
|
||||
#if DEBUG_STORE
|
||||
lp_build_printf(gallivm, " # storing end\n");
|
||||
|
|
@ -1014,7 +1023,7 @@ store_clip(struct gallivm_state *gallivm,
|
|||
const struct lp_type vs_type,
|
||||
LLVMValueRef io_ptr,
|
||||
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
|
||||
boolean pre_clip_pos, int idx)
|
||||
int idx)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef soa[4];
|
||||
|
|
@ -1041,14 +1050,8 @@ store_clip(struct gallivm_state *gallivm,
|
|||
soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
|
||||
soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
|
||||
|
||||
if (!pre_clip_pos) {
|
||||
for (i = 0; i < vs_type.length; i++) {
|
||||
clip_ptrs[i] = draw_jit_header_clip(gallivm, io_ptrs[i]);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < vs_type.length; i++) {
|
||||
clip_ptrs[i] = draw_jit_header_pre_clip_pos(gallivm, io_ptrs[i]);
|
||||
}
|
||||
for (i = 0; i < vs_type.length; i++) {
|
||||
clip_ptrs[i] = draw_jit_header_clip_pos(gallivm, io_ptrs[i]);
|
||||
}
|
||||
|
||||
lp_build_transpose_aos(gallivm, vs_type, soa, soa);
|
||||
|
|
@ -1140,11 +1143,7 @@ generate_clipmask(struct draw_llvm *llvm,
|
|||
struct gallivm_state *gallivm,
|
||||
struct lp_type vs_type,
|
||||
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
|
||||
boolean clip_xy,
|
||||
boolean clip_z,
|
||||
boolean clip_user,
|
||||
boolean clip_halfz,
|
||||
unsigned ucp_enable,
|
||||
struct draw_llvm_variant_key *key,
|
||||
LLVMValueRef context_ptr,
|
||||
boolean *have_clipdist)
|
||||
{
|
||||
|
|
@ -1160,7 +1159,9 @@ generate_clipmask(struct draw_llvm *llvm,
|
|||
const unsigned pos = llvm->draw->vs.position_output;
|
||||
const unsigned cv = llvm->draw->vs.clipvertex_output;
|
||||
int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
|
||||
bool have_cd = false;
|
||||
boolean have_cd = false;
|
||||
boolean clip_user = key->clip_user;
|
||||
unsigned ucp_enable = key->ucp_enable;
|
||||
unsigned cd[2];
|
||||
|
||||
cd[0] = llvm->draw->vs.clipdistance_output[0];
|
||||
|
|
@ -1200,8 +1201,16 @@ generate_clipmask(struct draw_llvm *llvm,
|
|||
cv_w = pos_w;
|
||||
}
|
||||
|
||||
/*
|
||||
* Be careful with the comparisons and NaNs (using llvm's unordered
|
||||
* comparisons here).
|
||||
*/
|
||||
/* Cliptest, for hardwired planes */
|
||||
if (clip_xy) {
|
||||
/*
|
||||
* XXX should take guardband into account (currently not in key).
|
||||
* Otherwise might run the draw pipeline stages for nothing.
|
||||
*/
|
||||
if (key->clip_xy) {
|
||||
/* plane 1 */
|
||||
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
|
||||
temp = shift;
|
||||
|
|
@ -1229,9 +1238,9 @@ generate_clipmask(struct draw_llvm *llvm,
|
|||
mask = LLVMBuildOr(builder, mask, test, "");
|
||||
}
|
||||
|
||||
if (clip_z) {
|
||||
if (key->clip_z) {
|
||||
temp = lp_build_const_int_vec(gallivm, i32_type, 16);
|
||||
if (clip_halfz) {
|
||||
if (key->clip_halfz) {
|
||||
/* plane 5 */
|
||||
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
|
||||
test = LLVMBuildAnd(builder, test, temp, "");
|
||||
|
|
@ -1318,6 +1327,20 @@ generate_clipmask(struct draw_llvm *llvm,
|
|||
}
|
||||
}
|
||||
}
|
||||
if (key->need_edgeflags) {
|
||||
/*
|
||||
* This isn't really part of clipmask but stored the same in vertex
|
||||
* header later, so do it here.
|
||||
*/
|
||||
unsigned edge_attr = llvm->draw->vs.edgeflag_output;
|
||||
LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
|
||||
LLVMValueRef edgeflag = LLVMBuildLoad(builder, outputs[edge_attr][0], "");
|
||||
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
|
||||
temp = lp_build_const_int_vec(gallivm, i32_type,
|
||||
1LL << DRAW_TOTAL_CLIP_PLANES);
|
||||
test = LLVMBuildAnd(builder, test, temp, "");
|
||||
mask = LLVMBuildOr(builder, mask, test, "");
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
|
@ -1329,7 +1352,8 @@ generate_clipmask(struct draw_llvm *llvm,
|
|||
static LLVMValueRef
|
||||
clipmask_booli32(struct gallivm_state *gallivm,
|
||||
const struct lp_type vs_type,
|
||||
LLVMValueRef clipmask_bool_ptr)
|
||||
LLVMValueRef clipmask_bool_ptr,
|
||||
boolean edgeflag_in_clipmask)
|
||||
{
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
|
||||
|
|
@ -1339,8 +1363,18 @@ clipmask_booli32(struct gallivm_state *gallivm,
|
|||
int i;
|
||||
|
||||
/*
|
||||
* Can do this with log2(vector length) pack instructions and one extract
|
||||
* (as we don't actually need a or) with sse2 which would be way better.
|
||||
* We need to invert the edgeflag bit from the clipmask here
|
||||
* (because the result is really if we want to run the pipeline or not
|
||||
* and we (may) need it if edgeflag was 0).
|
||||
*/
|
||||
if (edgeflag_in_clipmask) {
|
||||
struct lp_type i32_type = lp_int_type(vs_type);
|
||||
LLVMValueRef edge = lp_build_const_int_vec(gallivm, i32_type,
|
||||
1LL << DRAW_TOTAL_CLIP_PLANES);
|
||||
clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
|
||||
}
|
||||
/*
|
||||
* Could do much better with just cmp/movmskps.
|
||||
*/
|
||||
for (i=0; i < vs_type.length; i++) {
|
||||
temp = LLVMBuildExtractElement(builder, clipmask_bool,
|
||||
|
|
@ -1536,8 +1570,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
|
|||
const boolean bypass_viewport = key->has_gs || key->bypass_viewport ||
|
||||
llvm->draw->vs.vertex_shader->info.writes_viewport_index;
|
||||
const boolean enable_cliptest = !key->has_gs && (key->clip_xy ||
|
||||
key->clip_z ||
|
||||
key->clip_user);
|
||||
key->clip_z ||
|
||||
key->clip_user ||
|
||||
key->need_edgeflags);
|
||||
LLVMValueRef variant_func;
|
||||
const unsigned pos = llvm->draw->vs.position_output;
|
||||
const unsigned cv = llvm->draw->vs.clipvertex_output;
|
||||
|
|
@ -1766,8 +1801,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
|
|||
|
||||
if (pos != -1 && cv != -1) {
|
||||
/* store original positions in clip before further manipulation */
|
||||
store_clip(gallivm, vs_type, io, outputs, FALSE, key->clip_user ? cv : pos);
|
||||
store_clip(gallivm, vs_type, io, outputs, TRUE, pos);
|
||||
store_clip(gallivm, vs_type, io, outputs, pos);
|
||||
|
||||
/* do cliptest */
|
||||
if (enable_cliptest) {
|
||||
|
|
@ -1777,11 +1811,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
|
|||
gallivm,
|
||||
vs_type,
|
||||
outputs,
|
||||
key->clip_xy,
|
||||
key->clip_z,
|
||||
key->clip_user,
|
||||
key->clip_halfz,
|
||||
key->ucp_enable,
|
||||
key,
|
||||
context_ptr, &have_clipdist);
|
||||
temp = LLVMBuildOr(builder, clipmask, temp, "");
|
||||
/* store temporary clipping boolean value */
|
||||
|
|
@ -1806,14 +1836,15 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
|
|||
*/
|
||||
convert_to_aos(gallivm, io, NULL, outputs, clipmask,
|
||||
vs_info->num_outputs, vs_type,
|
||||
have_clipdist);
|
||||
enable_cliptest && key->need_edgeflags);
|
||||
}
|
||||
lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
|
||||
|
||||
sampler->destroy(sampler);
|
||||
|
||||
/* return clipping boolean value for function */
|
||||
ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr);
|
||||
ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr,
|
||||
enable_cliptest && key->need_edgeflags);
|
||||
|
||||
LLVMBuildRet(builder, ret);
|
||||
|
||||
|
|
@ -1847,6 +1878,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
|
|||
key->clip_user = llvm->draw->clip_user;
|
||||
key->bypass_viewport = llvm->draw->bypass_viewport;
|
||||
key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
|
||||
/* XXX assumes edgeflag output not at 0 */
|
||||
key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
|
||||
key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
|
||||
key->has_gs = llvm->draw->gs.geometry_shader != NULL;
|
||||
|
|
|
|||
|
|
@ -104,8 +104,7 @@ enum {
|
|||
|
||||
enum {
|
||||
DRAW_JIT_VERTEX_VERTEX_ID = 0,
|
||||
DRAW_JIT_VERTEX_CLIP,
|
||||
DRAW_JIT_VERTEX_PRE_CLIP_POS,
|
||||
DRAW_JIT_VERTEX_CLIP_POS,
|
||||
DRAW_JIT_VERTEX_DATA
|
||||
};
|
||||
|
||||
|
|
@ -162,11 +161,8 @@ enum {
|
|||
#define draw_jit_header_id(_gallivm, _ptr) \
|
||||
lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_VERTEX_ID, "id")
|
||||
|
||||
#define draw_jit_header_clip(_gallivm, _ptr) \
|
||||
lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_CLIP, "clip")
|
||||
|
||||
#define draw_jit_header_pre_clip_pos(_gallivm, _ptr) \
|
||||
lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_PRE_CLIP_POS, "pre_clip_pos")
|
||||
#define draw_jit_header_clip_pos(_gallivm, _ptr) \
|
||||
lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_CLIP_POS, "clip_pos")
|
||||
|
||||
#define draw_jit_header_data(_gallivm, _ptr) \
|
||||
lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_DATA, "data")
|
||||
|
|
|
|||
|
|
@ -646,6 +646,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
|
|||
struct pipe_context *pipe = draw->pipe;
|
||||
const struct pipe_rasterizer_state *rast = draw->rasterizer;
|
||||
uint num_samplers;
|
||||
uint num_sampler_views;
|
||||
void *r;
|
||||
|
||||
assert(draw->rasterizer->line_smooth);
|
||||
|
|
@ -667,9 +668,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
|
|||
draw_aaline_prepare_outputs(draw, draw->pipeline.aaline);
|
||||
|
||||
/* how many samplers? */
|
||||
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
|
||||
num_samplers = MAX2(aaline->num_sampler_views, aaline->num_samplers);
|
||||
num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1);
|
||||
/* we'll use sampler/texture[aaline->sampler_unit] for the alpha texture */
|
||||
num_samplers = MAX2(aaline->num_samplers, aaline->fs->sampler_unit + 1);
|
||||
num_sampler_views = MAX2(num_samplers, aaline->num_sampler_views);
|
||||
|
||||
aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso;
|
||||
pipe_sampler_view_reference(&aaline->state.sampler_views[aaline->fs->sampler_unit],
|
||||
|
|
@ -681,7 +682,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
|
|||
num_samplers, aaline->state.sampler);
|
||||
|
||||
aaline->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
|
||||
num_samplers, aaline->state.sampler_views);
|
||||
num_sampler_views, aaline->state.sampler_views);
|
||||
|
||||
/* Disable triangle culling, stippling, unfilled mode etc. */
|
||||
r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
|
||||
|
|
|
|||
|
|
@ -59,6 +59,8 @@ struct clip_stage {
|
|||
struct draw_stage stage; /**< base class */
|
||||
|
||||
unsigned pos_attr;
|
||||
boolean have_clipdist;
|
||||
int cv_attr;
|
||||
|
||||
/* List of the attributes to be constant interpolated. */
|
||||
uint num_const_attribs;
|
||||
|
|
@ -145,20 +147,23 @@ static void interp(const struct clip_stage *clip,
|
|||
*/
|
||||
dst->clipmask = 0;
|
||||
dst->edgeflag = 0; /* will get overwritten later */
|
||||
dst->have_clipdist = in->have_clipdist;
|
||||
dst->pad = 0;
|
||||
dst->vertex_id = UNDEFINED_VERTEX_ID;
|
||||
|
||||
/* Interpolate the clip-space coords.
|
||||
*/
|
||||
interp_attr(dst->clip, t, in->clip, out->clip);
|
||||
if (clip->cv_attr >= 0) {
|
||||
interp_attr(dst->data[clip->cv_attr], t,
|
||||
in->data[clip->cv_attr], out->data[clip->cv_attr]);
|
||||
}
|
||||
/* interpolate the clip-space position */
|
||||
interp_attr(dst->pre_clip_pos, t, in->pre_clip_pos, out->pre_clip_pos);
|
||||
interp_attr(dst->clip_pos, t, in->clip_pos, out->clip_pos);
|
||||
|
||||
/* Do the projective divide and viewport transformation to get
|
||||
* new window coordinates:
|
||||
*/
|
||||
{
|
||||
const float *pos = dst->pre_clip_pos;
|
||||
const float *pos = dst->clip_pos;
|
||||
const float *scale =
|
||||
clip->stage.draw->viewports[viewport_index].scale;
|
||||
const float *trans =
|
||||
|
|
@ -192,11 +197,11 @@ static void interp(const struct clip_stage *clip,
|
|||
t_nopersp = t;
|
||||
/* find either in.x != out.x or in.y != out.y */
|
||||
for (k = 0; k < 2; k++) {
|
||||
if (in->pre_clip_pos[k] != out->pre_clip_pos[k]) {
|
||||
if (in->clip_pos[k] != out->clip_pos[k]) {
|
||||
/* do divide by W, then compute linear interpolation factor */
|
||||
float in_coord = in->pre_clip_pos[k] / in->pre_clip_pos[3];
|
||||
float out_coord = out->pre_clip_pos[k] / out->pre_clip_pos[3];
|
||||
float dst_coord = dst->pre_clip_pos[k] / dst->pre_clip_pos[3];
|
||||
float in_coord = in->clip_pos[k] / in->clip_pos[3];
|
||||
float out_coord = out->clip_pos[k] / out->clip_pos[3];
|
||||
float dst_coord = dst->clip_pos[k] / dst->clip_pos[3];
|
||||
t_nopersp = (dst_coord - out_coord) / (in_coord - out_coord);
|
||||
break;
|
||||
}
|
||||
|
|
@ -214,9 +219,9 @@ static void interp(const struct clip_stage *clip,
|
|||
* Triangle is considered null/empty if its area is equal to zero.
|
||||
*/
|
||||
static inline boolean
|
||||
is_tri_null(struct draw_context *draw, const struct prim_header *header)
|
||||
is_tri_null(const struct clip_stage *clip, const struct prim_header *header)
|
||||
{
|
||||
const unsigned pos_attr = draw_current_shader_position_output(draw);
|
||||
const unsigned pos_attr = clip->pos_attr;
|
||||
float x1 = header->v[1]->data[pos_attr][0] - header->v[0]->data[pos_attr][0];
|
||||
float y1 = header->v[1]->data[pos_attr][1] - header->v[0]->data[pos_attr][1];
|
||||
float z1 = header->v[1]->data[pos_attr][2] - header->v[0]->data[pos_attr][2];
|
||||
|
|
@ -242,6 +247,7 @@ static void emit_poly(struct draw_stage *stage,
|
|||
unsigned n,
|
||||
const struct prim_header *origPrim)
|
||||
{
|
||||
const struct clip_stage *clipper = clip_stage(stage);
|
||||
struct prim_header header;
|
||||
unsigned i;
|
||||
ushort edge_first, edge_middle, edge_last;
|
||||
|
|
@ -281,7 +287,7 @@ static void emit_poly(struct draw_stage *stage,
|
|||
header.v[2] = inlist[0]; /* the provoking vertex */
|
||||
}
|
||||
|
||||
tri_null = is_tri_null(stage->draw, &header);
|
||||
tri_null = is_tri_null(clipper, &header);
|
||||
/* If we generated a triangle with an area, aka. non-null triangle,
|
||||
* or if the previous triangle was also null then skip all subsequent
|
||||
* null triangles */
|
||||
|
|
@ -306,11 +312,18 @@ static void emit_poly(struct draw_stage *stage,
|
|||
debug_printf("Clipped tri: (flat-shade-first = %d)\n",
|
||||
stage->draw->rasterizer->flatshade_first);
|
||||
for (j = 0; j < 3; j++) {
|
||||
debug_printf(" Vert %d: clip: %f %f %f %f\n", j,
|
||||
header.v[j]->clip[0],
|
||||
header.v[j]->clip[1],
|
||||
header.v[j]->clip[2],
|
||||
header.v[j]->clip[3]);
|
||||
debug_printf(" Vert %d: clip pos: %f %f %f %f\n", j,
|
||||
header.v[j]->clip_pos[0],
|
||||
header.v[j]->clip_pos[1],
|
||||
header.v[j]->clip_pos[2],
|
||||
header.v[j]->clip_pos[3]);
|
||||
if (clipper->cv_attr >= 0) {
|
||||
debug_printf(" Vert %d: cv: %f %f %f %f\n", j,
|
||||
header.v[j]->data[clipper->cv_attr][0],
|
||||
header.v[j]->data[clipper->cv_attr][1],
|
||||
header.v[j]->data[clipper->cv_attr][2],
|
||||
header.v[j]->data[clipper->cv_attr][3]);
|
||||
}
|
||||
for (k = 0; k < draw_num_shader_outputs(stage->draw); k++) {
|
||||
debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k,
|
||||
header.v[j]->data[k][0],
|
||||
|
|
@ -320,7 +333,7 @@ static void emit_poly(struct draw_stage *stage,
|
|||
}
|
||||
}
|
||||
}
|
||||
stage->next->tri( stage->next, &header );
|
||||
stage->next->tri(stage->next, &header);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -345,15 +358,28 @@ static inline float getclipdist(const struct clip_stage *clipper,
|
|||
{
|
||||
const float *plane;
|
||||
float dp;
|
||||
if (vert->have_clipdist && plane_idx >= 6) {
|
||||
if (plane_idx < 6) {
|
||||
/* ordinary xyz view volume clipping uses pos output */
|
||||
plane = clipper->plane[plane_idx];
|
||||
dp = dot4(vert->clip_pos, plane);
|
||||
}
|
||||
else if (clipper->have_clipdist) {
|
||||
/* pick the correct clipdistance element from the output vectors */
|
||||
int _idx = plane_idx - 6;
|
||||
int cdi = _idx >= 4;
|
||||
int vidx = cdi ? _idx - 4 : _idx;
|
||||
dp = vert->data[draw_current_shader_clipdistance_output(clipper->stage.draw, cdi)][vidx];
|
||||
} else {
|
||||
/*
|
||||
* legacy user clip planes or gl_ClipVertex
|
||||
*/
|
||||
plane = clipper->plane[plane_idx];
|
||||
dp = dot4(vert->clip, plane);
|
||||
if (clipper->cv_attr >= 0) {
|
||||
dp = dot4(vert->data[clipper->cv_attr], plane);
|
||||
}
|
||||
else {
|
||||
dp = dot4(vert->clip_pos, plane);
|
||||
}
|
||||
}
|
||||
return dp;
|
||||
}
|
||||
|
|
@ -400,13 +426,22 @@ do_clip_tri(struct draw_stage *stage,
|
|||
viewport_index = draw_viewport_index(clipper->stage.draw, prov_vertex);
|
||||
|
||||
if (DEBUG_CLIP) {
|
||||
const float *v0 = header->v[0]->clip;
|
||||
const float *v1 = header->v[1]->clip;
|
||||
const float *v2 = header->v[2]->clip;
|
||||
debug_printf("Clip triangle:\n");
|
||||
const float *v0 = header->v[0]->clip_pos;
|
||||
const float *v1 = header->v[1]->clip_pos;
|
||||
const float *v2 = header->v[2]->clip_pos;
|
||||
debug_printf("Clip triangle pos:\n");
|
||||
debug_printf(" %f, %f, %f, %f\n", v0[0], v0[1], v0[2], v0[3]);
|
||||
debug_printf(" %f, %f, %f, %f\n", v1[0], v1[1], v1[2], v1[3]);
|
||||
debug_printf(" %f, %f, %f, %f\n", v2[0], v2[1], v2[2], v2[3]);
|
||||
if (clipper->cv_attr >= 0) {
|
||||
const float *v0 = header->v[0]->data[clipper->cv_attr];
|
||||
const float *v1 = header->v[1]->data[clipper->cv_attr];
|
||||
const float *v2 = header->v[2]->data[clipper->cv_attr];
|
||||
debug_printf("Clip triangle cv:\n");
|
||||
debug_printf(" %f, %f, %f, %f\n", v0[0], v0[1], v0[2], v0[3]);
|
||||
debug_printf(" %f, %f, %f, %f\n", v1[0], v1[1], v1[2], v1[3]);
|
||||
debug_printf(" %f, %f, %f, %f\n", v2[0], v2[1], v2[2], v2[3]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -555,7 +590,7 @@ do_clip_tri(struct draw_stage *stage,
|
|||
|
||||
/* Emit the polygon as triangles to the setup stage:
|
||||
*/
|
||||
emit_poly( stage, inlist, inEdges, n, header );
|
||||
emit_poly(stage, inlist, inEdges, n, header);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -567,7 +602,7 @@ do_clip_line(struct draw_stage *stage,
|
|||
struct prim_header *header,
|
||||
unsigned clipmask)
|
||||
{
|
||||
const struct clip_stage *clipper = clip_stage( stage );
|
||||
const struct clip_stage *clipper = clip_stage(stage);
|
||||
struct vertex_header *v0 = header->v[0];
|
||||
struct vertex_header *v1 = header->v[1];
|
||||
struct vertex_header *prov_vertex;
|
||||
|
|
@ -671,9 +706,9 @@ clip_point_guard_xy(struct draw_stage *stage, struct prim_header *header)
|
|||
* automatically). These would usually be captured by depth clip
|
||||
* too but this can be disabled.
|
||||
*/
|
||||
if (header->v[0]->clip[3] <= 0.0f ||
|
||||
util_is_inf_or_nan(header->v[0]->clip[0]) ||
|
||||
util_is_inf_or_nan(header->v[0]->clip[1]))
|
||||
if (header->v[0]->clip_pos[3] <= 0.0f ||
|
||||
util_is_inf_or_nan(header->v[0]->clip_pos[0]) ||
|
||||
util_is_inf_or_nan(header->v[0]->clip_pos[1]))
|
||||
return;
|
||||
}
|
||||
stage->next->point(stage->next, header);
|
||||
|
|
@ -773,7 +808,7 @@ find_interp(const struct draw_fragment_shader *fs, int *indexed_interp,
|
|||
static void
|
||||
clip_init_state(struct draw_stage *stage)
|
||||
{
|
||||
struct clip_stage *clipper = clip_stage( stage );
|
||||
struct clip_stage *clipper = clip_stage(stage);
|
||||
const struct draw_context *draw = stage->draw;
|
||||
const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
|
||||
const struct tgsi_shader_info *info = draw_get_shader_info(draw);
|
||||
|
|
@ -781,6 +816,13 @@ clip_init_state(struct draw_stage *stage)
|
|||
int indexed_interp[2];
|
||||
|
||||
clipper->pos_attr = draw_current_shader_position_output(draw);
|
||||
clipper->have_clipdist = draw_current_shader_num_written_clipdistances(draw) > 0;
|
||||
if (draw_current_shader_clipvertex_output(draw) != clipper->pos_attr) {
|
||||
clipper->cv_attr = (int)draw_current_shader_clipvertex_output(draw);
|
||||
}
|
||||
else {
|
||||
clipper->cv_attr = -1;
|
||||
}
|
||||
|
||||
/* We need to know for each attribute what kind of interpolation is
|
||||
* done on it (flat, smooth or noperspective). But the information
|
||||
|
|
|
|||
|
|
@ -477,6 +477,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
|
|||
struct pipe_context *pipe = pstip->pipe;
|
||||
struct draw_context *draw = stage->draw;
|
||||
uint num_samplers;
|
||||
uint num_sampler_views;
|
||||
|
||||
assert(stage->draw->rasterizer->poly_stipple_enable);
|
||||
|
||||
|
|
@ -490,8 +491,8 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
|
|||
|
||||
/* how many samplers? */
|
||||
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
|
||||
num_samplers = MAX2(pstip->num_sampler_views, pstip->num_samplers);
|
||||
num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1);
|
||||
num_samplers = MAX2(pstip->num_samplers, pstip->fs->sampler_unit + 1);
|
||||
num_sampler_views = MAX2(pstip->num_sampler_views, num_samplers);
|
||||
|
||||
/* plug in our sampler, texture */
|
||||
pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso;
|
||||
|
|
@ -506,7 +507,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
|
|||
num_samplers, pstip->state.samplers);
|
||||
|
||||
pstip->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
|
||||
num_samplers, pstip->state.sampler_views);
|
||||
num_sampler_views, pstip->state.sampler_views);
|
||||
|
||||
draw->suspend_flushing = FALSE;
|
||||
|
||||
|
|
|
|||
|
|
@ -86,11 +86,10 @@ struct draw_vertex_buffer {
|
|||
struct vertex_header {
|
||||
unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
|
||||
unsigned edgeflag:1;
|
||||
unsigned have_clipdist:1;
|
||||
unsigned pad:1;
|
||||
unsigned vertex_id:16;
|
||||
|
||||
float clip[4];
|
||||
float pre_clip_pos[4];
|
||||
float clip_pos[4];
|
||||
|
||||
/* This will probably become float (*data)[4] soon:
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ draw_pt_emit_prepare(struct pt_emit *emit,
|
|||
|
||||
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
|
||||
*/
|
||||
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
|
||||
draw_do_flush(draw, DRAW_FLUSH_BACKEND);
|
||||
|
||||
/* XXX: may need to defensively reset this later on as clipping can
|
||||
* clobber this state in the render backend.
|
||||
|
|
@ -80,7 +80,7 @@ draw_pt_emit_prepare(struct pt_emit *emit,
|
|||
unsigned emit_sz = 0;
|
||||
unsigned src_buffer = 0;
|
||||
unsigned output_format;
|
||||
unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
|
||||
unsigned src_offset = vinfo->attrib[i].src_index * 4 * sizeof(float);
|
||||
|
||||
output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
|
||||
emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
|
||||
|
|
@ -89,8 +89,8 @@ draw_pt_emit_prepare(struct pt_emit *emit,
|
|||
assert(emit_sz != 0);
|
||||
|
||||
if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
|
||||
src_buffer = 1;
|
||||
src_offset = 0;
|
||||
src_buffer = 1;
|
||||
src_offset = 0;
|
||||
}
|
||||
|
||||
hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
|
||||
|
|
@ -138,7 +138,7 @@ draw_pt_emit(struct pt_emit *emit,
|
|||
|
||||
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
|
||||
*/
|
||||
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
|
||||
draw_do_flush(draw, DRAW_FLUSH_BACKEND);
|
||||
|
||||
if (vertex_count == 0)
|
||||
return;
|
||||
|
|
@ -152,31 +152,31 @@ draw_pt_emit(struct pt_emit *emit,
|
|||
(ushort)translate->key.output_stride,
|
||||
(ushort)vertex_count);
|
||||
|
||||
hw_verts = render->map_vertices( render );
|
||||
hw_verts = render->map_vertices(render);
|
||||
if (!hw_verts) {
|
||||
debug_warn_once("map of vertex buffer failed (out of memory?)");
|
||||
return;
|
||||
}
|
||||
|
||||
translate->set_buffer(translate,
|
||||
0,
|
||||
vertex_data,
|
||||
stride,
|
||||
~0);
|
||||
0,
|
||||
vertex_data,
|
||||
stride,
|
||||
~0);
|
||||
|
||||
translate->set_buffer(translate,
|
||||
1,
|
||||
&draw->rasterizer->point_size,
|
||||
0,
|
||||
~0);
|
||||
1,
|
||||
&draw->rasterizer->point_size,
|
||||
0,
|
||||
~0);
|
||||
|
||||
/* fetch/translate vertex attribs to fill hw_verts[] */
|
||||
translate->run(translate,
|
||||
0,
|
||||
vertex_count,
|
||||
draw->start_instance,
|
||||
draw->instance_id,
|
||||
hw_verts );
|
||||
0,
|
||||
vertex_count,
|
||||
0,
|
||||
0,
|
||||
hw_verts);
|
||||
|
||||
render->unmap_vertices(render, 0, vertex_count - 1);
|
||||
|
||||
|
|
@ -212,7 +212,7 @@ draw_pt_emit_linear(struct pt_emit *emit,
|
|||
#endif
|
||||
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
|
||||
*/
|
||||
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
|
||||
draw_do_flush(draw, DRAW_FLUSH_BACKEND);
|
||||
|
||||
/* XXX: and work out some way to coordinate the render primitive
|
||||
* between vbuf.c and here...
|
||||
|
|
@ -224,35 +224,35 @@ draw_pt_emit_linear(struct pt_emit *emit,
|
|||
(ushort)count))
|
||||
goto fail;
|
||||
|
||||
hw_verts = render->map_vertices( render );
|
||||
hw_verts = render->map_vertices(render);
|
||||
if (!hw_verts)
|
||||
goto fail;
|
||||
|
||||
translate->set_buffer(translate, 0,
|
||||
vertex_data, stride, count - 1);
|
||||
vertex_data, stride, count - 1);
|
||||
|
||||
translate->set_buffer(translate, 1,
|
||||
&draw->rasterizer->point_size,
|
||||
0, ~0);
|
||||
&draw->rasterizer->point_size,
|
||||
0, ~0);
|
||||
|
||||
translate->run(translate,
|
||||
0,
|
||||
count,
|
||||
draw->start_instance,
|
||||
draw->instance_id,
|
||||
0,
|
||||
0,
|
||||
hw_verts);
|
||||
|
||||
if (0) {
|
||||
unsigned i;
|
||||
for (i = 0; i < count; i++) {
|
||||
debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
|
||||
draw_dump_emitted_vertex( emit->vinfo,
|
||||
(const uint8_t *)hw_verts +
|
||||
translate->key.output_stride * i );
|
||||
draw_dump_emitted_vertex(emit->vinfo,
|
||||
(const uint8_t *)hw_verts +
|
||||
translate->key.output_stride * i);
|
||||
}
|
||||
}
|
||||
|
||||
render->unmap_vertices( render, 0, count - 1 );
|
||||
render->unmap_vertices(render, 0, count - 1);
|
||||
|
||||
for (start = i = 0;
|
||||
i < prim_info->primitive_count;
|
||||
|
|
@ -262,7 +262,7 @@ draw_pt_emit_linear(struct pt_emit *emit,
|
|||
start,
|
||||
prim_info->primitive_lengths[i]);
|
||||
}
|
||||
|
||||
|
||||
render->release_vertices(render);
|
||||
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -453,6 +453,7 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
|
|||
draw->vs.vertex_shader->info.writes_viewport_index)) {
|
||||
clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info, prim_info );
|
||||
}
|
||||
/* "clipped" also includes non-one edgeflag */
|
||||
if (clipped) {
|
||||
opt |= PT_PIPELINE;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ initialize_vertex_header(struct vertex_header *header)
|
|||
{
|
||||
header->clipmask = 0;
|
||||
header->edgeflag = 1;
|
||||
header->have_clipdist = 0;
|
||||
header->pad = 0;
|
||||
header->vertex_id = UNDEFINED_VERTEX_ID;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -275,7 +275,7 @@ void draw_pt_so_emit( struct pt_so_emit *emit,
|
|||
emit->generated_primitives = 0;
|
||||
emit->input_vertex_stride = input_verts->stride;
|
||||
if (emit->use_pre_clip_pos)
|
||||
emit->pre_clip_pos = input_verts->verts->pre_clip_pos;
|
||||
emit->pre_clip_pos = input_verts->verts->clip_pos;
|
||||
|
||||
emit->inputs = (const float (*)[4])input_verts->verts->data;
|
||||
|
||||
|
|
|
|||
|
|
@ -22,10 +22,6 @@
|
|||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
|
||||
#endif
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "glsl/nir/nir.h"
|
||||
#include "glsl/nir/nir_control_flow.h"
|
||||
|
|
@ -1069,7 +1065,9 @@ ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
|||
static void
|
||||
ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
|
||||
{
|
||||
nir_ssa_def *cmp = nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
|
||||
nir_ssa_def *cmp = nir_bany_inequal4(b, nir_flt(b, src[0],
|
||||
nir_imm_float(b, 0.0)),
|
||||
nir_imm_int(b, 0));
|
||||
nir_intrinsic_instr *discard =
|
||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
|
||||
discard->src[0] = nir_src_for_ssa(cmp);
|
||||
|
|
@ -1901,6 +1899,7 @@ ttn_emit_instruction(struct ttn_compile *c)
|
|||
&tgsi_dst->Indirect : NULL;
|
||||
|
||||
store->num_components = 4;
|
||||
store->const_index[0] = 0xf;
|
||||
store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
|
||||
store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
|
||||
|
||||
|
|
|
|||
|
|
@ -398,9 +398,8 @@ util_blitter_save_stencil_ref(struct blitter_context *blitter,
|
|||
blitter->saved_stencil_ref = *state;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_rasterizer(struct blitter_context *blitter,
|
||||
void *state)
|
||||
static inline void
|
||||
util_blitter_save_rasterizer(struct blitter_context *blitter, void *state)
|
||||
{
|
||||
blitter->saved_rs_state = state;
|
||||
}
|
||||
|
|
@ -459,8 +458,8 @@ util_blitter_save_scissor(struct blitter_context *blitter,
|
|||
blitter->saved_scissor = *state;
|
||||
}
|
||||
|
||||
static inline
|
||||
void util_blitter_save_fragment_sampler_states(
|
||||
static inline void
|
||||
util_blitter_save_fragment_sampler_states(
|
||||
struct blitter_context *blitter,
|
||||
unsigned num_sampler_states,
|
||||
void **sampler_states)
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
|
||||
|
||||
struct pipe_context;
|
||||
|
|
|
|||
|
|
@ -588,8 +588,11 @@ dd_context_flush(struct pipe_context *_pipe,
|
|||
static void
|
||||
dd_before_draw(struct dd_context *dctx)
|
||||
{
|
||||
if (dd_screen(dctx->base.screen)->mode == DD_DETECT_HANGS &&
|
||||
!dd_screen(dctx->base.screen)->no_flush)
|
||||
struct dd_screen *dscreen = dd_screen(dctx->base.screen);
|
||||
|
||||
if (dscreen->mode == DD_DETECT_HANGS &&
|
||||
!dscreen->no_flush &&
|
||||
dctx->num_draw_calls >= dscreen->skip_count)
|
||||
dd_flush_and_handle_hang(dctx, NULL, 0,
|
||||
"GPU hang most likely caused by internal "
|
||||
"driver commands");
|
||||
|
|
@ -598,22 +601,31 @@ dd_before_draw(struct dd_context *dctx)
|
|||
static void
|
||||
dd_after_draw(struct dd_context *dctx, struct dd_call *call)
|
||||
{
|
||||
switch (dd_screen(dctx->base.screen)->mode) {
|
||||
case DD_DETECT_HANGS:
|
||||
if (!dd_screen(dctx->base.screen)->no_flush &&
|
||||
dd_flush_and_check_hang(dctx, NULL, 0)) {
|
||||
dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG);
|
||||
struct dd_screen *dscreen = dd_screen(dctx->base.screen);
|
||||
|
||||
/* Terminate the process to prevent future hangs. */
|
||||
dd_kill_process();
|
||||
if (dctx->num_draw_calls >= dscreen->skip_count) {
|
||||
switch (dscreen->mode) {
|
||||
case DD_DETECT_HANGS:
|
||||
if (!dscreen->no_flush &&
|
||||
dd_flush_and_check_hang(dctx, NULL, 0)) {
|
||||
dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG);
|
||||
|
||||
/* Terminate the process to prevent future hangs. */
|
||||
dd_kill_process();
|
||||
}
|
||||
break;
|
||||
case DD_DUMP_ALL_CALLS:
|
||||
dd_dump_call(dctx, call, 0);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
case DD_DUMP_ALL_CALLS:
|
||||
dd_dump_call(dctx, call, 0);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
++dctx->num_draw_calls;
|
||||
if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0)
|
||||
fprintf(stderr, "Gallium debugger reached %u draw calls.\n",
|
||||
dctx->num_draw_calls);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ struct dd_screen
|
|||
unsigned timeout_ms;
|
||||
enum dd_mode mode;
|
||||
bool no_flush;
|
||||
unsigned skip_count;
|
||||
};
|
||||
|
||||
struct dd_query
|
||||
|
|
@ -110,6 +111,8 @@ struct dd_context
|
|||
struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS];
|
||||
struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
|
||||
float tess_default_levels[6];
|
||||
|
||||
unsigned num_draw_calls;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -290,6 +290,9 @@ ddebug_screen_create(struct pipe_screen *screen)
|
|||
puts(" $HOME/"DD_DIR"/ when a hang is detected.");
|
||||
puts(" If 'noflush' is specified, only detect hangs in pipe->flush.");
|
||||
puts("");
|
||||
puts(" GALLIUM_DDEBUG_SKIP=[count]");
|
||||
puts(" Skip flush and hang detection for the given initial number of draw calls.");
|
||||
puts("");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
|
@ -349,5 +352,11 @@ ddebug_screen_create(struct pipe_screen *screen)
|
|||
assert(0);
|
||||
}
|
||||
|
||||
dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0);
|
||||
if (dscreen->skip_count > 0) {
|
||||
fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n",
|
||||
dscreen->skip_count);
|
||||
}
|
||||
|
||||
return &dscreen->base;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
|
|||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
|
|||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
|
|
@ -1421,15 +1421,23 @@ static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_
|
|||
#define REG_A3XX_PC_RESTART_INDEX 0x000021ed
|
||||
|
||||
#define REG_A3XX_HLSQ_CONTROL_0_REG 0x00002200
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000030
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4
|
||||
static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE 0x00000040
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_COMPUTEMODE 0x00000100
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK 0x00fff000
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT 12
|
||||
static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_FSONLYTEX 0x02000000
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK 0x08000000
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT 27
|
||||
|
|
@ -1443,17 +1451,39 @@ static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
|
|||
#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT 0x80000000
|
||||
|
||||
#define REG_A3XX_HLSQ_CONTROL_1_REG 0x00002201
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x00000040
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x000000c0
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT 6
|
||||
static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE 0x00000100
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_RESERVED1 0x00000200
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_ZWCOORD 0x02000000
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK 0x00ff0000
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT 16
|
||||
static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK 0xff000000
|
||||
#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT 24
|
||||
static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_HLSQ_CONTROL_2_REG 0x00002202
|
||||
#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK 0x000003fc
|
||||
#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT 2
|
||||
static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK 0x03fc0000
|
||||
#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT 18
|
||||
static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000
|
||||
#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT 26
|
||||
static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
|
||||
|
|
@ -1470,13 +1500,13 @@ static inline uint32_t A3XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
|
|||
}
|
||||
|
||||
#define REG_A3XX_HLSQ_VS_CONTROL_REG 0x00002204
|
||||
#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff
|
||||
#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff
|
||||
#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0
|
||||
static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000
|
||||
#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000
|
||||
#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12
|
||||
static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
|
||||
{
|
||||
|
|
@ -1490,13 +1520,13 @@ static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
|
|||
}
|
||||
|
||||
#define REG_A3XX_HLSQ_FS_CONTROL_REG 0x00002205
|
||||
#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff
|
||||
#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff
|
||||
#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT 0
|
||||
static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000
|
||||
#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000
|
||||
#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12
|
||||
static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
|
||||
{
|
||||
|
|
@ -1510,13 +1540,13 @@ static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
|
|||
}
|
||||
|
||||
#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x00002206
|
||||
#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff
|
||||
#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff
|
||||
#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT 0
|
||||
static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000
|
||||
#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000
|
||||
#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT 16
|
||||
static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
|
||||
{
|
||||
|
|
@ -1524,13 +1554,13 @@ static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
|
|||
}
|
||||
|
||||
#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x00002207
|
||||
#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff
|
||||
#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff
|
||||
#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT 0
|
||||
static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000
|
||||
#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000
|
||||
#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT 16
|
||||
static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
|
||||
{
|
||||
|
|
@ -2012,24 +2042,19 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffe
|
|||
return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID 0x00000004
|
||||
#define A3XX_SP_VS_CTRL_REG0_ALUSCHMODE 0x00000008
|
||||
#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
|
||||
#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
|
||||
static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00
|
||||
#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00
|
||||
#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10
|
||||
static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000
|
||||
#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18
|
||||
static inline uint32_t A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000
|
||||
#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20
|
||||
static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
|
||||
|
|
@ -2037,8 +2062,6 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
|
|||
return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000
|
||||
#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000
|
||||
#define A3XX_SP_VS_CTRL_REG0_COMPUTEMODE 0x00800000
|
||||
#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000
|
||||
#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24
|
||||
static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
|
||||
|
|
@ -2079,7 +2102,8 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
|
|||
{
|
||||
return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0xfff00000
|
||||
#define A3XX_SP_VS_PARAM_REG_POS2DMODE 0x00010000
|
||||
#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0x01f00000
|
||||
#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT 20
|
||||
static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
|
||||
{
|
||||
|
|
@ -2089,24 +2113,26 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
|
|||
static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
|
||||
|
||||
static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
|
||||
#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff
|
||||
#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff
|
||||
#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0
|
||||
static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_OUT_REG_A_HALF 0x00000100
|
||||
#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00001e00
|
||||
#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 9
|
||||
static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x01ff0000
|
||||
#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000
|
||||
#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT 16
|
||||
static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_OUT_REG_B_HALF 0x01000000
|
||||
#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x1e000000
|
||||
#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 25
|
||||
static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
|
||||
|
|
@ -2117,25 +2143,25 @@ static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
|
|||
static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
|
||||
|
||||
static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x0000007f
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0
|
||||
static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x00007f00
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8
|
||||
static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x007f0000
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16
|
||||
static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0x7f000000
|
||||
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24
|
||||
static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
|
||||
{
|
||||
|
|
@ -2143,6 +2169,12 @@ static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
|
|||
}
|
||||
|
||||
#define REG_A3XX_SP_VS_OBJ_OFFSET_REG 0x000022d4
|
||||
#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff
|
||||
#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0
|
||||
static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
|
||||
#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
|
||||
static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
|
||||
|
|
@ -2159,8 +2191,38 @@ static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
|
|||
#define REG_A3XX_SP_VS_OBJ_START_REG 0x000022d5
|
||||
|
||||
#define REG_A3XX_SP_VS_PVT_MEM_PARAM_REG 0x000022d6
|
||||
#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff
|
||||
#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0
|
||||
static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00
|
||||
#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8
|
||||
static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000
|
||||
#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24
|
||||
static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG 0x000022d7
|
||||
#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f
|
||||
#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0
|
||||
static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0
|
||||
#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5
|
||||
static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
|
||||
{
|
||||
return ((val >> 5) << A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG 0x000022d8
|
||||
|
||||
|
|
@ -2186,24 +2248,22 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffe
|
|||
return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK;
|
||||
}
|
||||
#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID 0x00000004
|
||||
#define A3XX_SP_FS_CTRL_REG0_ALUSCHMODE 0x00000008
|
||||
#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
|
||||
#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
|
||||
static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
|
||||
}
|
||||
#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00
|
||||
#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00
|
||||
#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10
|
||||
static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
|
||||
}
|
||||
#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000
|
||||
#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18
|
||||
static inline uint32_t A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
|
||||
}
|
||||
#define A3XX_SP_FS_CTRL_REG0_FSBYPASSENABLE 0x00020000
|
||||
#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP 0x00040000
|
||||
#define A3XX_SP_FS_CTRL_REG0_OUTORDERED 0x00080000
|
||||
#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000
|
||||
#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20
|
||||
static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
|
||||
|
|
@ -2239,7 +2299,7 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
|
|||
{
|
||||
return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK;
|
||||
}
|
||||
#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x3f000000
|
||||
#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x7f000000
|
||||
#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT 24
|
||||
static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
|
||||
{
|
||||
|
|
@ -2247,6 +2307,12 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
|
|||
}
|
||||
|
||||
#define REG_A3XX_SP_FS_OBJ_OFFSET_REG 0x000022e2
|
||||
#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff
|
||||
#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0
|
||||
static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
|
||||
}
|
||||
#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
|
||||
#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
|
||||
static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
|
||||
|
|
@ -2263,8 +2329,38 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
|
|||
#define REG_A3XX_SP_FS_OBJ_START_REG 0x000022e3
|
||||
|
||||
#define REG_A3XX_SP_FS_PVT_MEM_PARAM_REG 0x000022e4
|
||||
#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff
|
||||
#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0
|
||||
static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
|
||||
}
|
||||
#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00
|
||||
#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8
|
||||
static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
|
||||
}
|
||||
#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000
|
||||
#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24
|
||||
static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG 0x000022e5
|
||||
#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f
|
||||
#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0
|
||||
static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
|
||||
}
|
||||
#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0
|
||||
#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5
|
||||
static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
|
||||
{
|
||||
return ((val >> 5) << A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG 0x000022e6
|
||||
|
||||
|
|
|
|||
|
|
@ -229,7 +229,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
|
|||
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
|
||||
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
|
||||
A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
|
||||
COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_ZWCOORD));
|
||||
COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(regid(0,0)) |
|
||||
A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(regid(0,2))));
|
||||
OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
|
||||
OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
|
||||
OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
|
||||
|
|
@ -254,10 +255,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
|
|||
COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
|
||||
A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
|
||||
A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
|
||||
A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
|
||||
A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
|
||||
A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
|
||||
COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
|
||||
A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
|
||||
OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
|
||||
A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
|
||||
|
|
@ -336,7 +335,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
|
|||
COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
|
||||
A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
|
||||
A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
|
||||
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
|
||||
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
|
||||
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
|
||||
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
|
||||
COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
|
|||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
|
|
@ -157,58 +157,62 @@ enum a4xx_vtx_fmt {
|
|||
VFMT4_10_10_10_2_UNORM = 57,
|
||||
VFMT4_10_10_10_2_SINT = 58,
|
||||
VFMT4_10_10_10_2_SNORM = 59,
|
||||
VFMT4_2_10_10_10_UINT = 60,
|
||||
VFMT4_2_10_10_10_UNORM = 61,
|
||||
VFMT4_2_10_10_10_SINT = 62,
|
||||
VFMT4_2_10_10_10_SNORM = 63,
|
||||
};
|
||||
|
||||
enum a4xx_tex_fmt {
|
||||
TFMT4_5_6_5_UNORM = 11,
|
||||
TFMT4_5_5_5_1_UNORM = 9,
|
||||
TFMT4_A8_UNORM = 3,
|
||||
TFMT4_8_UNORM = 4,
|
||||
TFMT4_8_SNORM = 5,
|
||||
TFMT4_8_UINT = 6,
|
||||
TFMT4_8_SINT = 7,
|
||||
TFMT4_4_4_4_4_UNORM = 8,
|
||||
TFMT4_X8Z24_UNORM = 71,
|
||||
TFMT4_5_5_5_1_UNORM = 9,
|
||||
TFMT4_5_6_5_UNORM = 11,
|
||||
TFMT4_L8_A8_UNORM = 13,
|
||||
TFMT4_8_8_UNORM = 14,
|
||||
TFMT4_8_8_SNORM = 15,
|
||||
TFMT4_8_8_UINT = 16,
|
||||
TFMT4_8_8_SINT = 17,
|
||||
TFMT4_16_UNORM = 18,
|
||||
TFMT4_16_SNORM = 19,
|
||||
TFMT4_16_FLOAT = 20,
|
||||
TFMT4_16_UINT = 21,
|
||||
TFMT4_16_SINT = 22,
|
||||
TFMT4_8_8_8_8_UNORM = 28,
|
||||
TFMT4_8_8_8_8_SNORM = 29,
|
||||
TFMT4_8_8_8_8_UINT = 30,
|
||||
TFMT4_8_8_8_8_SINT = 31,
|
||||
TFMT4_9_9_9_E5_FLOAT = 32,
|
||||
TFMT4_10_10_10_2_UNORM = 33,
|
||||
TFMT4_10_10_10_2_UINT = 34,
|
||||
TFMT4_A8_UNORM = 3,
|
||||
TFMT4_L8_A8_UNORM = 13,
|
||||
TFMT4_8_UNORM = 4,
|
||||
TFMT4_8_8_UNORM = 14,
|
||||
TFMT4_8_8_8_8_UNORM = 28,
|
||||
TFMT4_8_SNORM = 5,
|
||||
TFMT4_8_8_SNORM = 15,
|
||||
TFMT4_8_8_8_8_SNORM = 29,
|
||||
TFMT4_8_UINT = 6,
|
||||
TFMT4_8_8_UINT = 16,
|
||||
TFMT4_8_8_8_8_UINT = 30,
|
||||
TFMT4_8_SINT = 7,
|
||||
TFMT4_8_8_SINT = 17,
|
||||
TFMT4_8_8_8_8_SINT = 31,
|
||||
TFMT4_16_UNORM = 18,
|
||||
TFMT4_11_11_10_FLOAT = 37,
|
||||
TFMT4_16_16_UNORM = 38,
|
||||
TFMT4_16_16_16_16_UNORM = 51,
|
||||
TFMT4_16_SNORM = 19,
|
||||
TFMT4_16_16_SNORM = 39,
|
||||
TFMT4_16_16_16_16_SNORM = 52,
|
||||
TFMT4_16_UINT = 21,
|
||||
TFMT4_16_16_UINT = 41,
|
||||
TFMT4_16_16_16_16_UINT = 54,
|
||||
TFMT4_16_SINT = 22,
|
||||
TFMT4_16_16_SINT = 42,
|
||||
TFMT4_16_16_16_16_SINT = 55,
|
||||
TFMT4_32_UINT = 44,
|
||||
TFMT4_32_32_UINT = 57,
|
||||
TFMT4_32_32_32_32_UINT = 64,
|
||||
TFMT4_32_SINT = 45,
|
||||
TFMT4_32_32_SINT = 58,
|
||||
TFMT4_32_32_32_32_SINT = 65,
|
||||
TFMT4_16_FLOAT = 20,
|
||||
TFMT4_16_16_FLOAT = 40,
|
||||
TFMT4_16_16_16_16_FLOAT = 53,
|
||||
TFMT4_16_16_UINT = 41,
|
||||
TFMT4_16_16_SINT = 42,
|
||||
TFMT4_32_FLOAT = 43,
|
||||
TFMT4_32_UINT = 44,
|
||||
TFMT4_32_SINT = 45,
|
||||
TFMT4_16_16_16_16_UNORM = 51,
|
||||
TFMT4_16_16_16_16_SNORM = 52,
|
||||
TFMT4_16_16_16_16_FLOAT = 53,
|
||||
TFMT4_16_16_16_16_UINT = 54,
|
||||
TFMT4_16_16_16_16_SINT = 55,
|
||||
TFMT4_32_32_FLOAT = 56,
|
||||
TFMT4_32_32_32_32_FLOAT = 63,
|
||||
TFMT4_32_32_UINT = 57,
|
||||
TFMT4_32_32_SINT = 58,
|
||||
TFMT4_32_32_32_FLOAT = 59,
|
||||
TFMT4_32_32_32_UINT = 60,
|
||||
TFMT4_32_32_32_SINT = 61,
|
||||
TFMT4_9_9_9_E5_FLOAT = 32,
|
||||
TFMT4_11_11_10_FLOAT = 37,
|
||||
TFMT4_32_32_32_32_FLOAT = 63,
|
||||
TFMT4_32_32_32_32_UINT = 64,
|
||||
TFMT4_32_32_32_32_SINT = 65,
|
||||
TFMT4_X8Z24_UNORM = 71,
|
||||
TFMT4_DXT1 = 86,
|
||||
TFMT4_DXT3 = 87,
|
||||
TFMT4_DXT5 = 88,
|
||||
|
|
@ -800,6 +804,7 @@ static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
|
|||
}
|
||||
#define A4XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080
|
||||
#define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00010000
|
||||
#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS 0x00020000
|
||||
#define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000
|
||||
|
||||
#define REG_A4XX_RB_DEPTH_CLEAR 0x00002102
|
||||
|
|
@ -1060,6 +1065,9 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x
|
|||
|
||||
#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_D 0x0000004d
|
||||
|
||||
#define REG_A4XX_RBBM_POWER_CNTL_IP 0x00000098
|
||||
#define A4XX_RBBM_POWER_CNTL_IP_SW_COLLAPSE 0x00000001
|
||||
|
||||
#define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c
|
||||
|
||||
static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; }
|
||||
|
|
@ -1110,6 +1118,10 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(uint32_t i0) { r
|
|||
|
||||
static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) { return 0x0000008e + 0x1*i0; }
|
||||
|
||||
#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0 0x00000099
|
||||
|
||||
#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1 0x0000009a
|
||||
|
||||
#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168
|
||||
|
||||
#define REG_A4XX_RBBM_PERFCTR_CTL 0x00000170
|
||||
|
|
@ -1163,6 +1175,11 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0)
|
|||
|
||||
#define REG_A4XX_RBBM_INTERFACE_RRDY_STATUS5 0x0000019f
|
||||
|
||||
#define REG_A4XX_RBBM_POWER_STATUS 0x000001b0
|
||||
#define A4XX_RBBM_POWER_STATUS_SP_TP_PWR_ON 0x00100000
|
||||
|
||||
#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2 0x000001b8
|
||||
|
||||
#define REG_A4XX_CP_SCRATCH_UMASK 0x00000228
|
||||
|
||||
#define REG_A4XX_CP_SCRATCH_ADDR 0x00000229
|
||||
|
|
@ -1265,6 +1282,28 @@ static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578
|
|||
|
||||
#define REG_A4XX_SP_MODE_CONTROL 0x00000ec3
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_0 0x00000ec4
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_1 0x00000ec5
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_2 0x00000ec6
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_3 0x00000ec7
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_4 0x00000ec8
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_5 0x00000ec9
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_6 0x00000eca
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_7 0x00000ecb
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_8 0x00000ecc
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_9 0x00000ecd
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_10 0x00000ece
|
||||
|
||||
#define REG_A4XX_SP_PERFCTR_SP_SEL_11 0x00000ecf
|
||||
|
||||
#define REG_A4XX_SP_SP_CTRL_REG 0x000022c0
|
||||
|
|
@ -2180,6 +2219,7 @@ static inline uint32_t A4XX_GRAS_SU_POINT_SIZE(float val)
|
|||
|
||||
#define REG_A4XX_GRAS_ALPHA_CONTROL 0x00002073
|
||||
#define A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE 0x00000004
|
||||
#define A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS 0x00000008
|
||||
|
||||
#define REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE 0x00002074
|
||||
#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff
|
||||
|
|
|
|||
|
|
@ -529,14 +529,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
|||
|
||||
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
|
||||
OUT_RING(ring, zsa->rb_depth_control |
|
||||
COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE));
|
||||
COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
|
||||
COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
|
||||
|
||||
/* maybe this register/bitfield needs a better name.. this
|
||||
* appears to be just disabling early-z
|
||||
*/
|
||||
OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
|
||||
OUT_RING(ring, zsa->gras_alpha_control |
|
||||
COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE));
|
||||
COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) |
|
||||
COND(fragz && fp->frag_coord, A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS));
|
||||
}
|
||||
|
||||
if (dirty & FD_DIRTY_RASTERIZER) {
|
||||
|
|
|
|||
|
|
@ -420,9 +420,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
|
|||
COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) |
|
||||
COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD |
|
||||
A4XX_RB_RENDER_CONTROL2_YCOORD |
|
||||
// TODO enabling gl_FragCoord.z is causing lockups on 0ad (but seems
|
||||
// to work everywhere else).
|
||||
// A4XX_RB_RENDER_CONTROL2_ZCOORD |
|
||||
A4XX_RB_RENDER_CONTROL2_ZCOORD |
|
||||
A4XX_RB_RENDER_CONTROL2_WCOORD));
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
|
|||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
|
|||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
|
|
@ -199,7 +199,11 @@ enum adreno_state_type {
|
|||
|
||||
enum adreno_state_src {
|
||||
SS_DIRECT = 0,
|
||||
SS_INVALID_ALL_IC = 2,
|
||||
SS_INVALID_PART_IC = 3,
|
||||
SS_INDIRECT = 4,
|
||||
SS_INDIRECT_TCM = 5,
|
||||
SS_INDIRECT_STM = 6,
|
||||
};
|
||||
|
||||
enum a4xx_index_size {
|
||||
|
|
@ -227,7 +231,7 @@ static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val)
|
|||
{
|
||||
return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK;
|
||||
}
|
||||
#define CP_LOAD_STATE_0_NUM_UNIT__MASK 0x7fc00000
|
||||
#define CP_LOAD_STATE_0_NUM_UNIT__MASK 0xffc00000
|
||||
#define CP_LOAD_STATE_0_NUM_UNIT__SHIFT 22
|
||||
static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -114,8 +114,6 @@ int main(int argc, char **argv)
|
|||
void *ptr;
|
||||
size_t size;
|
||||
|
||||
fd_mesa_debug |= FD_DBG_DISASM;
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
memset(&v, 0, sizeof(v));
|
||||
|
||||
|
|
@ -128,7 +126,7 @@ int main(int argc, char **argv)
|
|||
|
||||
while (n < argc) {
|
||||
if (!strcmp(argv[n], "--verbose")) {
|
||||
fd_mesa_debug |= FD_DBG_MSGS | FD_DBG_OPTMSGS;
|
||||
fd_mesa_debug |= FD_DBG_MSGS | FD_DBG_OPTMSGS | FD_DBG_DISASM;
|
||||
n++;
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1264,7 +1264,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
|||
|
||||
/* handles array reads: */
|
||||
static void
|
||||
emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
||||
emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
nir_deref_var *dvar = intr->variables[0];
|
||||
|
|
@ -1305,7 +1305,7 @@ emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
|
|||
|
||||
/* handles array writes: */
|
||||
static void
|
||||
emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||
emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
nir_deref_var *dvar = intr->variables[0];
|
||||
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
|
||||
|
|
@ -1321,6 +1321,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
case nir_deref_array_type_direct:
|
||||
/* direct access does not require anything special: */
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
/* ttn doesn't generate partial writemasks */
|
||||
assert(intr->const_index[0] ==
|
||||
(1 << intr->num_components) - 1);
|
||||
|
||||
unsigned n = darr->base_offset * 4 + i;
|
||||
compile_assert(ctx, n < arr->length);
|
||||
arr->arr[n] = src[i];
|
||||
|
|
@ -1333,6 +1337,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
struct ir3_instruction *addr =
|
||||
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
/* ttn doesn't generate partial writemasks */
|
||||
assert(intr->const_index[0] ==
|
||||
(1 << intr->num_components) - 1);
|
||||
|
||||
struct ir3_instruction *store;
|
||||
unsigned n = darr->base_offset * 4 + i;
|
||||
compile_assert(ctx, n < arr->length);
|
||||
|
|
@ -1392,7 +1400,7 @@ static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
|
|||
}
|
||||
|
||||
static void
|
||||
emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||
emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
|
||||
struct ir3_instruction **dst, **src;
|
||||
|
|
@ -1454,10 +1462,10 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
|
|||
}
|
||||
break;
|
||||
case nir_intrinsic_load_var:
|
||||
emit_intrinisic_load_var(ctx, intr, dst);
|
||||
emit_intrinsic_load_var(ctx, intr, dst);
|
||||
break;
|
||||
case nir_intrinsic_store_var:
|
||||
emit_intrinisic_store_var(ctx, intr);
|
||||
emit_intrinsic_store_var(ctx, intr);
|
||||
break;
|
||||
case nir_intrinsic_store_output:
|
||||
const_offset = nir_src_as_const_value(intr->src[1]);
|
||||
|
|
@ -1927,7 +1935,7 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
|
|||
emit_alu(ctx, nir_instr_as_alu(instr));
|
||||
break;
|
||||
case nir_instr_type_intrinsic:
|
||||
emit_intrinisic(ctx, nir_instr_as_intrinsic(instr));
|
||||
emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
|
||||
break;
|
||||
case nir_instr_type_load_const:
|
||||
emit_load_const(ctx, nir_instr_as_load_const(instr));
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ block_id(struct ir3_block *block)
|
|||
#ifdef DEBUG
|
||||
return block->serialno;
|
||||
#else
|
||||
return (uint32_t)(uint64_t)block;
|
||||
return (uint32_t)(unsigned long)block;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,8 @@ enum lp_interp {
|
|||
LP_INTERP_LINEAR,
|
||||
LP_INTERP_PERSPECTIVE,
|
||||
LP_INTERP_POSITION,
|
||||
LP_INTERP_FACING
|
||||
LP_INTERP_FACING,
|
||||
LP_INTERP_ZERO
|
||||
};
|
||||
|
||||
struct lp_shader_input {
|
||||
|
|
|
|||
|
|
@ -108,22 +108,28 @@ struct llvmpipe_context {
|
|||
struct vertex_info vertex_info;
|
||||
|
||||
/** Which vertex shader output slot contains color */
|
||||
int color_slot[2];
|
||||
uint8_t color_slot[2];
|
||||
|
||||
/** Which vertex shader output slot contains bcolor */
|
||||
int bcolor_slot[2];
|
||||
uint8_t bcolor_slot[2];
|
||||
|
||||
/** Which vertex shader output slot contains point size */
|
||||
int psize_slot;
|
||||
uint8_t psize_slot;
|
||||
|
||||
/** Which vertex shader output slot contains viewport index */
|
||||
int viewport_index_slot;
|
||||
uint8_t viewport_index_slot;
|
||||
|
||||
/** Which geometry shader output slot contains layer */
|
||||
int layer_slot;
|
||||
uint8_t layer_slot;
|
||||
|
||||
/** A fake frontface output for unfilled primitives */
|
||||
int face_slot;
|
||||
uint8_t face_slot;
|
||||
|
||||
/** Which output slot is used for the fake vp index info */
|
||||
uint8_t fake_vpindex_slot;
|
||||
|
||||
/** Which output slot is used for the fake layer info */
|
||||
uint8_t fake_layer_slot;
|
||||
|
||||
/** Depth format and bias settings. */
|
||||
boolean floating_point_depth;
|
||||
|
|
|
|||
|
|
@ -1207,7 +1207,7 @@ lp_setup_update_state( struct lp_setup_context *setup,
|
|||
/* Will probably need to move this somewhere else, just need
|
||||
* to know about vertex shader point size attribute.
|
||||
*/
|
||||
setup->psize = lp->psize_slot;
|
||||
setup->psize_slot = lp->psize_slot;
|
||||
setup->viewport_index_slot = lp->viewport_index_slot;
|
||||
setup->layer_slot = lp->layer_slot;
|
||||
setup->face_slot = lp->face_slot;
|
||||
|
|
|
|||
|
|
@ -105,10 +105,10 @@ struct lp_setup_context
|
|||
float pixel_offset;
|
||||
float line_width;
|
||||
float point_size;
|
||||
float psize;
|
||||
unsigned viewport_index_slot;
|
||||
unsigned layer_slot;
|
||||
int face_slot;
|
||||
uint8_t psize_slot;
|
||||
uint8_t viewport_index_slot;
|
||||
uint8_t layer_slot;
|
||||
uint8_t face_slot;
|
||||
|
||||
struct pipe_framebuffer_state fb;
|
||||
struct u_rect framebuffer;
|
||||
|
|
|
|||
|
|
@ -328,7 +328,7 @@ try_setup_point( struct lp_setup_context *setup,
|
|||
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
|
||||
/* x/y positions in fixed point */
|
||||
const struct lp_setup_variant_key *key = &setup->setup.variant->key;
|
||||
const int sizeAttr = setup->psize;
|
||||
const int sizeAttr = setup->psize_slot;
|
||||
const float size
|
||||
= (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0]
|
||||
: setup->point_size;
|
||||
|
|
|
|||
|
|
@ -55,10 +55,14 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
|||
|
||||
draw_prepare_shader_outputs(llvmpipe->draw);
|
||||
|
||||
llvmpipe->color_slot[0] = -1;
|
||||
llvmpipe->color_slot[1] = -1;
|
||||
llvmpipe->bcolor_slot[0] = -1;
|
||||
llvmpipe->bcolor_slot[1] = -1;
|
||||
llvmpipe->color_slot[0] = 0;
|
||||
llvmpipe->color_slot[1] = 0;
|
||||
llvmpipe->bcolor_slot[0] = 0;
|
||||
llvmpipe->bcolor_slot[1] = 0;
|
||||
llvmpipe->viewport_index_slot = 0;
|
||||
llvmpipe->layer_slot = 0;
|
||||
llvmpipe->face_slot = 0;
|
||||
llvmpipe->psize_slot = 0;
|
||||
|
||||
/*
|
||||
* Match FS inputs against VS outputs, emitting the necessary
|
||||
|
|
@ -86,7 +90,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
|||
if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
|
||||
lpfs->info.base.input_semantic_index[i] < 2) {
|
||||
int idx = lpfs->info.base.input_semantic_index[i];
|
||||
llvmpipe->color_slot[idx] = (int)vinfo->num_attribs;
|
||||
llvmpipe->color_slot[idx] = vinfo->num_attribs;
|
||||
}
|
||||
|
||||
if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
|
||||
|
|
@ -94,6 +98,30 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
|||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
} else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_PRIMID) {
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
/*
|
||||
* For vp index and layer, if the fs requires them but the vs doesn't
|
||||
* provide them, store the slot - we'll later replace the data directly
|
||||
* with zero (as required by ARB_fragment_layer_viewport). This is
|
||||
* because draw itself just redirects them to whatever was at output 0.
|
||||
* We'll also store the real vpindex/layer slot for setup use.
|
||||
*/
|
||||
} else if (lpfs->info.base.input_semantic_name[i] ==
|
||||
TGSI_SEMANTIC_VIEWPORT_INDEX) {
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->viewport_index_slot = vinfo->num_attribs;
|
||||
}
|
||||
else {
|
||||
llvmpipe->fake_vpindex_slot = vinfo->num_attribs;
|
||||
}
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
} else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->layer_slot = vinfo->num_attribs;
|
||||
}
|
||||
else {
|
||||
llvmpipe->fake_layer_slot = vinfo->num_attribs;
|
||||
}
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
} else {
|
||||
/*
|
||||
* Emit the requested fs attribute for all but position.
|
||||
|
|
@ -101,6 +129,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
|||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
|
||||
}
|
||||
}
|
||||
|
||||
/* Figure out if we need bcolor as well.
|
||||
*/
|
||||
for (i = 0; i < 2; i++) {
|
||||
|
|
@ -108,12 +137,11 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
|||
TGSI_SEMANTIC_BCOLOR, i);
|
||||
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->bcolor_slot[i] = (int)vinfo->num_attribs;
|
||||
llvmpipe->bcolor_slot[i] = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Figure out if we need pointsize as well.
|
||||
*/
|
||||
vs_index = draw_find_shader_output(llvmpipe->draw,
|
||||
|
|
@ -124,26 +152,26 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
|
|||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
}
|
||||
|
||||
/* Figure out if we need viewport index */
|
||||
vs_index = draw_find_shader_output(llvmpipe->draw,
|
||||
TGSI_SEMANTIC_VIEWPORT_INDEX,
|
||||
0);
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->viewport_index_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
} else {
|
||||
llvmpipe->viewport_index_slot = 0;
|
||||
/* Figure out if we need viewport index (if it wasn't already in fs input) */
|
||||
if (llvmpipe->viewport_index_slot == 0) {
|
||||
vs_index = draw_find_shader_output(llvmpipe->draw,
|
||||
TGSI_SEMANTIC_VIEWPORT_INDEX,
|
||||
0);
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->viewport_index_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
}
|
||||
}
|
||||
|
||||
/* Figure out if we need layer */
|
||||
vs_index = draw_find_shader_output(llvmpipe->draw,
|
||||
TGSI_SEMANTIC_LAYER,
|
||||
0);
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->layer_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
} else {
|
||||
llvmpipe->layer_slot = 0;
|
||||
/* Figure out if we need layer (if it wasn't already in fs input) */
|
||||
if (llvmpipe->layer_slot == 0) {
|
||||
vs_index = draw_find_shader_output(llvmpipe->draw,
|
||||
TGSI_SEMANTIC_LAYER,
|
||||
0);
|
||||
if (vs_index >= 0) {
|
||||
llvmpipe->layer_slot = vinfo->num_attribs;
|
||||
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
|
||||
}
|
||||
}
|
||||
|
||||
draw_compute_vertex_size(vinfo);
|
||||
|
|
|
|||
|
|
@ -372,9 +372,9 @@ load_attribute(struct gallivm_state *gallivm,
|
|||
/* Potentially modify it according to twoside, etc:
|
||||
*/
|
||||
if (key->twoside) {
|
||||
if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
|
||||
if (vert_attr == key->color_slot && key->bcolor_slot > 0)
|
||||
lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
|
||||
else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
|
||||
else if (vert_attr == key->spec_slot && key->bspec_slot > 0)
|
||||
lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
|
||||
}
|
||||
}
|
||||
|
|
@ -602,6 +602,13 @@ emit_tri_coef( struct gallivm_state *gallivm,
|
|||
*/
|
||||
break;
|
||||
|
||||
case LP_INTERP_ZERO:
|
||||
/*
|
||||
* The information we get from the output is bogus, replace it
|
||||
* with zero.
|
||||
*/
|
||||
emit_constant_coef4(gallivm, args, slot+1, args->bld.zero);
|
||||
break;
|
||||
case LP_INTERP_FACING:
|
||||
emit_facing_coef(gallivm, args, slot+1);
|
||||
break;
|
||||
|
|
@ -848,14 +855,10 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
|
|||
key->size = Offset(struct lp_setup_variant_key,
|
||||
inputs[key->num_inputs]);
|
||||
|
||||
key->color_slot = lp->color_slot [0];
|
||||
key->color_slot = lp->color_slot[0];
|
||||
key->bcolor_slot = lp->bcolor_slot[0];
|
||||
key->spec_slot = lp->color_slot [1];
|
||||
key->bspec_slot = lp->bcolor_slot[1];
|
||||
assert(key->color_slot == lp->color_slot [0]);
|
||||
assert(key->bcolor_slot == lp->bcolor_slot[0]);
|
||||
assert(key->spec_slot == lp->color_slot [1]);
|
||||
assert(key->bspec_slot == lp->bcolor_slot[1]);
|
||||
key->spec_slot = lp->color_slot[1];
|
||||
key->bspec_slot = lp->bcolor_slot[1];
|
||||
|
||||
/*
|
||||
* If depth is floating point, depth bias is calculated with respect
|
||||
|
|
@ -876,7 +879,13 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
|
|||
key->pad = 0;
|
||||
memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
|
||||
for (i = 0; i < key->num_inputs; i++) {
|
||||
if (key->inputs[i].interp == LP_INTERP_COLOR) {
|
||||
if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
|
||||
if (key->inputs[i].src_index == lp->fake_vpindex_slot ||
|
||||
key->inputs[i].src_index == lp->fake_layer_slot) {
|
||||
key->inputs[i].interp = LP_INTERP_ZERO;
|
||||
}
|
||||
}
|
||||
else if (key->inputs[i].interp == LP_INTERP_COLOR) {
|
||||
if (lp->rasterizer->flatshade)
|
||||
key->inputs[i].interp = LP_INTERP_CONSTANT;
|
||||
else
|
||||
|
|
|
|||
|
|
@ -17,11 +17,10 @@ struct lp_setup_variant_list_item
|
|||
struct lp_setup_variant_key {
|
||||
unsigned size:16;
|
||||
unsigned num_inputs:8;
|
||||
int color_slot:8;
|
||||
|
||||
int bcolor_slot:8;
|
||||
int spec_slot:8;
|
||||
int bspec_slot:8;
|
||||
unsigned color_slot:8;
|
||||
unsigned bcolor_slot:8;
|
||||
unsigned spec_slot:8;
|
||||
unsigned bspec_slot:8;
|
||||
unsigned flatshade_first:1;
|
||||
unsigned pixel_center_half:1;
|
||||
unsigned twoside:1;
|
||||
|
|
|
|||
|
|
@ -1124,12 +1124,15 @@ CodeEmitterNV50::emitIMUL(const Instruction *i)
|
|||
{
|
||||
code[0] = 0x40000000;
|
||||
|
||||
if (i->src(1).getFile() == FILE_IMMEDIATE) {
|
||||
if (i->sType == TYPE_S16)
|
||||
code[0] |= 0x8100;
|
||||
code[1] = 0;
|
||||
emitForm_IMM(i);
|
||||
} else
|
||||
if (i->encSize == 8) {
|
||||
code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
|
||||
if (i->src(1).getFile() == FILE_IMMEDIATE)
|
||||
emitForm_IMM(i);
|
||||
else
|
||||
emitForm_MAD(i);
|
||||
emitForm_MAD(i);
|
||||
} else {
|
||||
if (i->sType == TYPE_S16)
|
||||
code[0] |= 0x8100;
|
||||
|
|
@ -1190,29 +1193,45 @@ CodeEmitterNV50::emitDMUL(const Instruction *i)
|
|||
void
|
||||
CodeEmitterNV50::emitIMAD(const Instruction *i)
|
||||
{
|
||||
int mode;
|
||||
code[0] = 0x60000000;
|
||||
if (isSignedType(i->sType))
|
||||
code[1] = i->saturate ? 0x40000000 : 0x20000000;
|
||||
|
||||
assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
|
||||
if (!isSignedType(i->sType))
|
||||
mode = 0;
|
||||
else if (i->saturate)
|
||||
mode = 2;
|
||||
else
|
||||
code[1] = 0x00000000;
|
||||
mode = 1;
|
||||
|
||||
int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
|
||||
int neg2 = i->src(2).mod.neg();
|
||||
|
||||
assert(!(neg1 & neg2));
|
||||
code[1] |= neg1 << 27;
|
||||
code[1] |= neg2 << 26;
|
||||
|
||||
if (i->src(1).getFile() == FILE_IMMEDIATE)
|
||||
if (i->src(1).getFile() == FILE_IMMEDIATE) {
|
||||
code[1] = 0;
|
||||
emitForm_IMM(i);
|
||||
else
|
||||
code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
|
||||
if (i->flagsSrc >= 0) {
|
||||
assert(!(code[0] & 0x10400000));
|
||||
assert(SDATA(i->src(i->flagsSrc)).id == 0);
|
||||
code[0] |= 0x10400000;
|
||||
}
|
||||
} else
|
||||
if (i->encSize == 4) {
|
||||
emitForm_MUL(i);
|
||||
code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
|
||||
if (i->flagsSrc >= 0) {
|
||||
assert(!(code[0] & 0x10400000));
|
||||
assert(SDATA(i->src(i->flagsSrc)).id == 0);
|
||||
code[0] |= 0x10400000;
|
||||
}
|
||||
} else {
|
||||
code[1] = mode << 29;
|
||||
emitForm_MAD(i);
|
||||
|
||||
if (i->flagsSrc >= 0) {
|
||||
// add with carry from $cX
|
||||
assert(!(code[1] & 0x0c000000) && !i->getPredicate());
|
||||
code[1] |= 0xc << 24;
|
||||
srcId(i->src(i->flagsSrc), 32 + 12);
|
||||
if (i->flagsSrc >= 0) {
|
||||
// add with carry from $cX
|
||||
assert(!(code[1] & 0x0c000000) && !i->getPredicate());
|
||||
code[1] |= 0xc << 24;
|
||||
srcId(i->src(i->flagsSrc), 32 + 12);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2054,8 +2073,9 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
|
|||
|
||||
// check constraints on short MAD
|
||||
if (info.srcNr >= 2 && i->srcExists(2)) {
|
||||
if (!i->defExists(0) || !isFloatType(i->dType) ||
|
||||
i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
|
||||
if (!i->defExists(0) ||
|
||||
(i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
|
||||
DDATA(i->def(0)).id != SDATA(i->src(2)).id)
|
||||
return 8;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1897,7 +1897,7 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
|
|||
shd = fetchSrc(C >> 4, C & 3);
|
||||
|
||||
if (texi->op == OP_TXD) {
|
||||
for (c = 0; c < tgt.getDim(); ++c) {
|
||||
for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {
|
||||
texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
|
||||
texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
|
|||
Instruction *tex, *add;
|
||||
Value *zero = bld.loadImm(bld.getSSA(), 0);
|
||||
int l, c;
|
||||
const int dim = i->tex.target.getDim();
|
||||
const int dim = i->tex.target.getDim() + i->tex.target.isCube();
|
||||
const int array = i->tex.target.isArray();
|
||||
|
||||
i->op = OP_TEX; // no need to clone dPdx/dPdy later
|
||||
|
|
|
|||
|
|
@ -44,6 +44,8 @@ static bool
|
|||
expandIntegerMUL(BuildUtil *bld, Instruction *mul)
|
||||
{
|
||||
const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;
|
||||
ImmediateValue src1;
|
||||
bool src1imm = mul->src(1).getImmediate(src1);
|
||||
|
||||
DataType fTy; // full type
|
||||
switch (mul->sType) {
|
||||
|
|
@ -72,24 +74,41 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
|
|||
for (int j = 0; j < 4; ++j)
|
||||
t[j] = bld->getSSA(fullSize);
|
||||
|
||||
s[0] = mul->getSrc(0);
|
||||
s[1] = mul->getSrc(1);
|
||||
|
||||
if (isSignedType(mul->sType) && highResult) {
|
||||
s[0] = bld->getSSA(fullSize);
|
||||
s[1] = bld->getSSA(fullSize);
|
||||
bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
|
||||
bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1));
|
||||
src1.reg.data.s32 = abs(src1.reg.data.s32);
|
||||
} else {
|
||||
s[0] = mul->getSrc(0);
|
||||
s[1] = mul->getSrc(1);
|
||||
}
|
||||
|
||||
// split sources into halves
|
||||
i[0] = bld->mkSplit(a, halfSize, s[0]);
|
||||
i[1] = bld->mkSplit(b, halfSize, s[1]);
|
||||
|
||||
i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
|
||||
i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
|
||||
if (src1imm && (src1.reg.data.u32 & 0xffff0000) == 0) {
|
||||
i[2] = i[3] = bld->mkOp2(OP_MUL, fTy, t[1], a[1],
|
||||
bld->mkImm(src1.reg.data.u32 & 0xffff));
|
||||
} else {
|
||||
i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0],
|
||||
src1imm ? bld->mkImm(src1.reg.data.u32 >> 16) : b[1]);
|
||||
if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) {
|
||||
i[3] = i[2];
|
||||
t[1] = t[0];
|
||||
} else {
|
||||
i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
|
||||
}
|
||||
}
|
||||
i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8));
|
||||
i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
|
||||
if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) {
|
||||
i[4] = i[3];
|
||||
t[3] = t[2];
|
||||
} else {
|
||||
i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
|
||||
}
|
||||
|
||||
if (highResult) {
|
||||
Value *c[2];
|
||||
|
|
@ -911,7 +930,7 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
|
|||
Instruction *tex;
|
||||
Value *zero = bld.loadImm(bld.getSSA(), 0);
|
||||
int l, c;
|
||||
const int dim = i->tex.target.getDim();
|
||||
const int dim = i->tex.target.getDim() + i->tex.target.isCube();
|
||||
|
||||
handleTEX(i);
|
||||
i->op = OP_TEX; // no need to clone dPdx/dPdy later
|
||||
|
|
@ -1225,7 +1244,7 @@ NV50LoweringPreSSA::handleEXPORT(Instruction *i)
|
|||
i->setDef(0, new_LValue(func, FILE_GPR));
|
||||
i->getDef(0)->reg.data.id = id;
|
||||
|
||||
prog->maxGPR = MAX2(prog->maxGPR, id);
|
||||
prog->maxGPR = MAX2(prog->maxGPR, id * 2);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -186,91 +186,67 @@ NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
|
|||
uses.push_back(TexUse(usei, texi));
|
||||
}
|
||||
|
||||
// While it might be tempting to use the an algorithm that just looks at tex
|
||||
// uses, not all texture results are guaranteed to be used on all paths. In
|
||||
// the case where along some control flow path a texture result is never used,
|
||||
// we might reuse that register for something else, creating a
|
||||
// write-after-write hazard. So we have to manually look through all
|
||||
// instructions looking for ones that reference the registers in question.
|
||||
void
|
||||
NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
|
||||
Instruction *insn,
|
||||
const BasicBlock *term,
|
||||
std::list<TexUse> &uses)
|
||||
NVC0LegalizePostRA::findFirstUses(
|
||||
Instruction *texi, std::list<TexUse> &uses)
|
||||
{
|
||||
while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0)))
|
||||
insn = insn->getSrc(0)->getUniqueInsn();
|
||||
int minGPR = texi->def(0).rep()->reg.data.id;
|
||||
int maxGPR = minGPR + texi->def(0).rep()->reg.size / 4 - 1;
|
||||
|
||||
// NOTE: the tex itself is, of course, not an overwriting definition
|
||||
if (insn == texi || !insn->bb->reachableBy(texi->bb, term))
|
||||
return;
|
||||
|
||||
switch (insn->op) {
|
||||
/* Values not connected to the tex's definition through any of these should
|
||||
* not be conflicting.
|
||||
*/
|
||||
case OP_SPLIT:
|
||||
case OP_MERGE:
|
||||
case OP_PHI:
|
||||
case OP_UNION:
|
||||
/* recurse again */
|
||||
for (int s = 0; insn->srcExists(s); ++s)
|
||||
findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term,
|
||||
uses);
|
||||
break;
|
||||
default:
|
||||
// if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ?
|
||||
addTexUse(uses, insn, texi);
|
||||
break;
|
||||
}
|
||||
unordered_set<const BasicBlock *> visited;
|
||||
findFirstUsesBB(minGPR, maxGPR, texi->next, texi, uses, visited);
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LegalizePostRA::findFirstUses(
|
||||
const Instruction *texi,
|
||||
const Instruction *insn,
|
||||
std::list<TexUse> &uses,
|
||||
unordered_set<const Instruction *>& visited)
|
||||
NVC0LegalizePostRA::findFirstUsesBB(
|
||||
int minGPR, int maxGPR, Instruction *start,
|
||||
const Instruction *texi, std::list<TexUse> &uses,
|
||||
unordered_set<const BasicBlock *> &visited)
|
||||
{
|
||||
for (int d = 0; insn->defExists(d); ++d) {
|
||||
Value *v = insn->getDef(d);
|
||||
for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
|
||||
Instruction *usei = (*u)->getInsn();
|
||||
const BasicBlock *bb = start->bb;
|
||||
|
||||
// NOTE: In case of a loop that overwrites a value but never uses
|
||||
// it, it can happen that we have a cycle of uses that consists only
|
||||
// of phis and no-op moves and will thus cause an infinite loop here
|
||||
// since these are not considered actual uses.
|
||||
// The most obvious (and perhaps the only) way to prevent this is to
|
||||
// remember which instructions we've already visited.
|
||||
// We don't process the whole bb the first time around. This is correct,
|
||||
// however we might be in a loop and hit this BB again, and need to process
|
||||
// the full thing. So only mark a bb as visited if we processed it from the
|
||||
// beginning.
|
||||
if (start == bb->getEntry()) {
|
||||
if (visited.find(bb) != visited.end())
|
||||
return;
|
||||
visited.insert(bb);
|
||||
}
|
||||
|
||||
if (visited.find(usei) != visited.end())
|
||||
for (Instruction *insn = start; insn != bb->getExit(); insn = insn->next) {
|
||||
if (insn->isNop())
|
||||
continue;
|
||||
|
||||
for (int d = 0; insn->defExists(d); ++d) {
|
||||
if (insn->def(d).getFile() != FILE_GPR ||
|
||||
insn->def(d).rep()->reg.data.id < minGPR ||
|
||||
insn->def(d).rep()->reg.data.id > maxGPR)
|
||||
continue;
|
||||
|
||||
visited.insert(usei);
|
||||
|
||||
if (usei->op == OP_PHI || usei->op == OP_UNION) {
|
||||
// need a barrier before WAW cases, like:
|
||||
// %r0 = tex
|
||||
// if ...
|
||||
// texbar <- is required or tex might replace x again
|
||||
// %r1 = x <- overwriting def
|
||||
// %r2 = phi %r0, %r1
|
||||
for (int s = 0; usei->srcExists(s); ++s) {
|
||||
Instruction *defi = usei->getSrc(s)->getUniqueInsn();
|
||||
if (defi && &usei->src(s) != *u)
|
||||
findOverwritingDefs(texi, defi, usei->bb, uses);
|
||||
}
|
||||
}
|
||||
|
||||
if (usei->op == OP_SPLIT ||
|
||||
usei->op == OP_MERGE ||
|
||||
usei->op == OP_PHI ||
|
||||
usei->op == OP_UNION) {
|
||||
// these uses don't manifest in the machine code
|
||||
findFirstUses(texi, usei, uses, visited);
|
||||
} else
|
||||
if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) &&
|
||||
usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
|
||||
findFirstUses(texi, usei, uses, visited);
|
||||
} else {
|
||||
addTexUse(uses, usei, texi);
|
||||
}
|
||||
addTexUse(uses, insn, texi);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int s = 0; insn->srcExists(s); ++s) {
|
||||
if (insn->src(s).getFile() != FILE_GPR ||
|
||||
insn->src(s).rep()->reg.data.id < minGPR ||
|
||||
insn->src(s).rep()->reg.data.id > maxGPR)
|
||||
continue;
|
||||
addTexUse(uses, insn, texi);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
|
||||
findFirstUsesBB(minGPR, maxGPR, BasicBlock::get(ei.getNode())->getEntry(),
|
||||
texi, uses, visited);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -323,8 +299,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
|
|||
if (!uses)
|
||||
return false;
|
||||
for (size_t i = 0; i < texes.size(); ++i) {
|
||||
unordered_set<const Instruction *> visited;
|
||||
findFirstUses(texes[i], texes[i], uses[i], visited);
|
||||
findFirstUses(texes[i], uses[i]);
|
||||
}
|
||||
|
||||
// determine the barrier level at each use
|
||||
|
|
@ -870,7 +845,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
|
|||
Instruction *tex;
|
||||
Value *zero = bld.loadImm(bld.getSSA(), 0);
|
||||
int l, c;
|
||||
const int dim = i->tex.target.getDim();
|
||||
const int dim = i->tex.target.getDim() + i->tex.target.isCube();
|
||||
const int array = i->tex.target.isArray();
|
||||
|
||||
i->op = OP_TEX; // no need to clone dPdx/dPdy later
|
||||
|
|
@ -917,7 +892,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
|
|||
bool
|
||||
NVC0LoweringPass::handleTXD(TexInstruction *txd)
|
||||
{
|
||||
int dim = txd->tex.target.getDim();
|
||||
int dim = txd->tex.target.getDim() + txd->tex.target.isCube();
|
||||
unsigned arg = txd->tex.target.getArgCount();
|
||||
unsigned expected_args = arg;
|
||||
const int chipset = prog->getTarget()->getChipset();
|
||||
|
|
@ -937,8 +912,7 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
|
|||
|
||||
if (expected_args > 4 ||
|
||||
dim > 2 ||
|
||||
txd->tex.target.isShadow() ||
|
||||
txd->tex.target.isCube())
|
||||
txd->tex.target.isShadow())
|
||||
txd->op = OP_TEX;
|
||||
|
||||
handleTEX(txd);
|
||||
|
|
|
|||
|
|
@ -69,12 +69,10 @@ private:
|
|||
};
|
||||
bool insertTextureBarriers(Function *);
|
||||
inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
|
||||
void findFirstUses(const Instruction *tex, const Instruction *def,
|
||||
std::list<TexUse>&,
|
||||
unordered_set<const Instruction *>&);
|
||||
void findOverwritingDefs(const Instruction *tex, Instruction *insn,
|
||||
const BasicBlock *term,
|
||||
std::list<TexUse>&);
|
||||
void findFirstUses(Instruction *texi, std::list<TexUse> &uses);
|
||||
void findFirstUsesBB(int minGPR, int maxGPR, Instruction *start,
|
||||
const Instruction *texi, std::list<TexUse> &uses,
|
||||
unordered_set<const BasicBlock *> &visited);
|
||||
void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *);
|
||||
const Instruction *recurseDef(const Instruction *);
|
||||
|
||||
|
|
|
|||
|
|
@ -1501,6 +1501,7 @@ private:
|
|||
void handleSLCT(Instruction *);
|
||||
void handleLOGOP(Instruction *);
|
||||
void handleCVT_NEG(Instruction *);
|
||||
void handleCVT_CVT(Instruction *);
|
||||
void handleCVT_EXTBF(Instruction *);
|
||||
void handleSUCLAMP(Instruction *);
|
||||
|
||||
|
|
@ -1792,6 +1793,47 @@ AlgebraicOpt::handleCVT_NEG(Instruction *cvt)
|
|||
delete_Instruction(prog, cvt);
|
||||
}
|
||||
|
||||
// F2I(TRUNC()) and so on can be expressed as a single CVT. If the earlier CVT
|
||||
// does a type conversion, this becomes trickier as there might be range
|
||||
// changes/etc. We could handle those in theory as long as the range was being
|
||||
// reduced or kept the same.
|
||||
void
|
||||
AlgebraicOpt::handleCVT_CVT(Instruction *cvt)
|
||||
{
|
||||
Instruction *insn = cvt->getSrc(0)->getInsn();
|
||||
RoundMode rnd = insn->rnd;
|
||||
|
||||
if (insn->saturate ||
|
||||
insn->subOp ||
|
||||
insn->dType != insn->sType ||
|
||||
insn->dType != cvt->sType)
|
||||
return;
|
||||
|
||||
switch (insn->op) {
|
||||
case OP_CEIL:
|
||||
rnd = ROUND_PI;
|
||||
break;
|
||||
case OP_FLOOR:
|
||||
rnd = ROUND_MI;
|
||||
break;
|
||||
case OP_TRUNC:
|
||||
rnd = ROUND_ZI;
|
||||
break;
|
||||
case OP_CVT:
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isFloatType(cvt->dType) || !isFloatType(insn->sType))
|
||||
rnd = (RoundMode)(rnd & 3);
|
||||
|
||||
cvt->rnd = rnd;
|
||||
cvt->setSrc(0, insn->getSrc(0));
|
||||
cvt->src(0).mod *= insn->src(0).mod;
|
||||
cvt->sType = insn->sType;
|
||||
}
|
||||
|
||||
// Some shaders extract packed bytes out of words and convert them to
|
||||
// e.g. float. The Fermi+ CVT instruction can extract those directly, as can
|
||||
// nv50 for word sizes.
|
||||
|
|
@ -1961,6 +2003,7 @@ AlgebraicOpt::visit(BasicBlock *bb)
|
|||
break;
|
||||
case OP_CVT:
|
||||
handleCVT_NEG(i);
|
||||
handleCVT_CVT(i);
|
||||
if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32))
|
||||
handleCVT_EXTBF(i);
|
||||
break;
|
||||
|
|
@ -2532,6 +2575,7 @@ MemoryOpt::runOpt(BasicBlock *bb)
|
|||
class FlatteningPass : public Pass
|
||||
{
|
||||
private:
|
||||
virtual bool visit(Function *);
|
||||
virtual bool visit(BasicBlock *);
|
||||
|
||||
bool tryPredicateConditional(BasicBlock *);
|
||||
|
|
@ -2540,6 +2584,8 @@ private:
|
|||
inline bool isConstantCondition(Value *pred);
|
||||
inline bool mayPredicate(const Instruction *, const Value *pred) const;
|
||||
inline void removeFlow(Instruction *);
|
||||
|
||||
uint8_t gpr_unit;
|
||||
};
|
||||
|
||||
bool
|
||||
|
|
@ -2561,9 +2607,15 @@ FlatteningPass::isConstantCondition(Value *pred)
|
|||
file = ld->src(0).getFile();
|
||||
} else {
|
||||
file = insn->src(s).getFile();
|
||||
// catch $r63 on NVC0
|
||||
if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR)
|
||||
file = FILE_IMMEDIATE;
|
||||
// catch $r63 on NVC0 and $r63/$r127 on NV50. Unfortunately maxGPR is
|
||||
// in register "units", which can vary between targets.
|
||||
if (file == FILE_GPR) {
|
||||
Value *v = insn->getSrc(s);
|
||||
int bytes = v->reg.data.id * MIN2(v->reg.size, 4);
|
||||
int units = bytes >> gpr_unit;
|
||||
if (units > prog->maxGPR)
|
||||
file = FILE_IMMEDIATE;
|
||||
}
|
||||
}
|
||||
if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST)
|
||||
return false;
|
||||
|
|
@ -2668,6 +2720,14 @@ FlatteningPass::tryPropagateBranch(BasicBlock *bb)
|
|||
}
|
||||
}
|
||||
|
||||
bool
|
||||
FlatteningPass::visit(Function *fn)
|
||||
{
|
||||
gpr_unit = prog->getTarget()->getFileUnit(FILE_GPR);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
FlatteningPass::visit(BasicBlock *bb)
|
||||
{
|
||||
|
|
@ -2774,6 +2834,15 @@ private:
|
|||
virtual bool visit(BasicBlock *);
|
||||
};
|
||||
|
||||
static bool
|
||||
post_ra_dead(Instruction *i)
|
||||
{
|
||||
for (int d = 0; i->defExists(d); ++d)
|
||||
if (i->getDef(d)->refCount())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
NV50PostRaConstantFolding::visit(BasicBlock *bb)
|
||||
{
|
||||
|
|
@ -2787,24 +2856,48 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
|
|||
i->src(0).getFile() != FILE_GPR ||
|
||||
i->src(1).getFile() != FILE_GPR ||
|
||||
i->src(2).getFile() != FILE_GPR ||
|
||||
i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id ||
|
||||
!isFloatType(i->dType))
|
||||
i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
|
||||
break;
|
||||
|
||||
if (i->getDef(0)->reg.data.id >= 64 ||
|
||||
i->getSrc(0)->reg.data.id >= 64)
|
||||
break;
|
||||
|
||||
if (i->flagsSrc >= 0 && i->getSrc(i->flagsSrc)->reg.data.id != 0)
|
||||
break;
|
||||
|
||||
if (i->getPredicate())
|
||||
break;
|
||||
|
||||
def = i->getSrc(1)->getInsn();
|
||||
if (def && def->op == OP_SPLIT && typeSizeof(def->sType) == 4)
|
||||
def = def->getSrc(0)->getInsn();
|
||||
if (def && def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
|
||||
vtmp = i->getSrc(1);
|
||||
i->setSrc(1, def->getSrc(0));
|
||||
if (isFloatType(i->sType)) {
|
||||
i->setSrc(1, def->getSrc(0));
|
||||
} else {
|
||||
ImmediateValue val;
|
||||
bool ret = def->src(0).getImmediate(val);
|
||||
assert(ret);
|
||||
if (i->getSrc(1)->reg.data.id & 1)
|
||||
val.reg.data.u32 >>= 16;
|
||||
val.reg.data.u32 &= 0xffff;
|
||||
i->setSrc(1, new_ImmediateValue(bb->getProgram(), val.reg.data.u32));
|
||||
}
|
||||
|
||||
/* There's no post-RA dead code elimination, so do it here
|
||||
* XXX: if we add more code-removing post-RA passes, we might
|
||||
* want to create a post-RA dead-code elim pass */
|
||||
if (vtmp->refCount() == 0)
|
||||
delete_Instruction(bb->getProgram(), def);
|
||||
if (post_ra_dead(vtmp->getInsn())) {
|
||||
Value *src = vtmp->getInsn()->getSrc(0);
|
||||
// Careful -- splits will have already been removed from the
|
||||
// functions. Don't double-delete.
|
||||
if (vtmp->getInsn()->bb)
|
||||
delete_Instruction(prog, vtmp->getInsn());
|
||||
if (src->getInsn() && post_ra_dead(src->getInsn()))
|
||||
delete_Instruction(prog, src->getInsn());
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1473,7 +1473,6 @@ GCRA::allocateRegisters(ArrayList& insns)
|
|||
// Short encoding only possible if they're all GPRs, no need to
|
||||
// affect them otherwise.
|
||||
if (insn->flagsDef < 0 &&
|
||||
isFloatType(insn->dType) &&
|
||||
insn->src(0).getFile() == FILE_GPR &&
|
||||
insn->src(1).getFile() == FILE_GPR &&
|
||||
insn->src(2).getFile() == FILE_GPR)
|
||||
|
|
|
|||
|
|
@ -147,6 +147,12 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
|
|||
if (nv_dbg)
|
||||
nouveau_mesa_debug = atoi(nv_dbg);
|
||||
|
||||
/* These must be set before any failure is possible, as the cleanup
|
||||
* paths assume they're responsible for deleting them.
|
||||
*/
|
||||
screen->drm = nouveau_drm(&dev->object);
|
||||
screen->device = dev;
|
||||
|
||||
/*
|
||||
* this is initialized to 1 in nouveau_drm_screen_create after screen
|
||||
* is fully constructed and added to the global screen list.
|
||||
|
|
@ -175,7 +181,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
|
|||
data, size, &screen->channel);
|
||||
if (ret)
|
||||
return ret;
|
||||
screen->device = dev;
|
||||
|
||||
ret = nouveau_client_new(screen->device, &screen->client);
|
||||
if (ret)
|
||||
|
|
@ -229,6 +234,8 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
|
|||
void
|
||||
nouveau_screen_fini(struct nouveau_screen *screen)
|
||||
{
|
||||
int fd = screen->drm->fd;
|
||||
|
||||
nouveau_mm_destroy(screen->mm_GART);
|
||||
nouveau_mm_destroy(screen->mm_VRAM);
|
||||
|
||||
|
|
@ -238,6 +245,8 @@ nouveau_screen_fini(struct nouveau_screen *screen)
|
|||
nouveau_object_del(&screen->channel);
|
||||
|
||||
nouveau_device_del(&screen->device);
|
||||
nouveau_drm_del(&screen->drm);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ struct nouveau_bo;
|
|||
|
||||
struct nouveau_screen {
|
||||
struct pipe_screen base;
|
||||
struct nouveau_drm *drm;
|
||||
struct nouveau_device *device;
|
||||
struct nouveau_object *channel;
|
||||
struct nouveau_client *client;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "pipe/p_defines.h"
|
||||
|
||||
#include <drm.h>
|
||||
#include <nouveau.h>
|
||||
|
||||
#ifndef NV04_PFIFO_MAX_PACKET_LEN
|
||||
|
|
@ -79,13 +80,13 @@ nouveau_screen_transfer_flags(unsigned pipe)
|
|||
return flags;
|
||||
}
|
||||
|
||||
extern struct pipe_screen *
|
||||
extern struct nouveau_screen *
|
||||
nv30_screen_create(struct nouveau_device *);
|
||||
|
||||
extern struct pipe_screen *
|
||||
extern struct nouveau_screen *
|
||||
nv50_screen_create(struct nouveau_device *);
|
||||
|
||||
extern struct pipe_screen *
|
||||
extern struct nouveau_screen *
|
||||
nvc0_screen_create(struct nouveau_device *);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -413,23 +413,20 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
|
|||
#define FAIL_SCREEN_INIT(str, err) \
|
||||
do { \
|
||||
NOUVEAU_ERR(str, err); \
|
||||
nv30_screen_destroy(pscreen); \
|
||||
return NULL; \
|
||||
screen->base.base.context_create = NULL; \
|
||||
return &screen->base; \
|
||||
} while(0)
|
||||
|
||||
struct pipe_screen *
|
||||
struct nouveau_screen *
|
||||
nv30_screen_create(struct nouveau_device *dev)
|
||||
{
|
||||
struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen);
|
||||
struct nv30_screen *screen;
|
||||
struct pipe_screen *pscreen;
|
||||
struct nouveau_pushbuf *push;
|
||||
struct nv04_fifo *fifo;
|
||||
unsigned oclass = 0;
|
||||
int ret, i;
|
||||
|
||||
if (!screen)
|
||||
return NULL;
|
||||
|
||||
switch (dev->chipset & 0xf0) {
|
||||
case 0x30:
|
||||
if (RANKINE_0397_CHIPSET & (1 << (dev->chipset & 0x0f)))
|
||||
|
|
@ -458,10 +455,16 @@ nv30_screen_create(struct nouveau_device *dev)
|
|||
|
||||
if (!oclass) {
|
||||
NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset);
|
||||
FREE(screen);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
screen = CALLOC_STRUCT(nv30_screen);
|
||||
if (!screen)
|
||||
return NULL;
|
||||
|
||||
pscreen = &screen->base.base;
|
||||
pscreen->destroy = nv30_screen_destroy;
|
||||
|
||||
/*
|
||||
* Some modern apps try to use msaa without keeping in mind the
|
||||
* restrictions on videomem of older cards. Resulting in dmesg saying:
|
||||
|
|
@ -479,8 +482,6 @@ nv30_screen_create(struct nouveau_device *dev)
|
|||
if (screen->max_sample_count > 4)
|
||||
screen->max_sample_count = 4;
|
||||
|
||||
pscreen = &screen->base.base;
|
||||
pscreen->destroy = nv30_screen_destroy;
|
||||
pscreen->get_param = nv30_screen_get_param;
|
||||
pscreen->get_paramf = nv30_screen_get_paramf;
|
||||
pscreen->get_shader_param = nv30_screen_get_shader_param;
|
||||
|
|
@ -693,5 +694,5 @@ nv30_screen_create(struct nouveau_device *dev)
|
|||
nouveau_pushbuf_kick(push, push->channel);
|
||||
|
||||
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
|
||||
return pscreen;
|
||||
return &screen->base;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -336,9 +336,10 @@ nv50_miptree_create(struct pipe_screen *pscreen,
|
|||
const struct pipe_resource *templ)
|
||||
{
|
||||
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
|
||||
struct nouveau_drm *drm = nouveau_screen(pscreen)->drm;
|
||||
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
|
||||
struct pipe_resource *pt = &mt->base.base;
|
||||
bool compressed = dev->drm_version >= 0x01000101;
|
||||
bool compressed = drm->version >= 0x01000101;
|
||||
int ret;
|
||||
union nouveau_bo_config bo_config;
|
||||
uint32_t bo_flags;
|
||||
|
|
|
|||
|
|
@ -113,6 +113,12 @@ static void
|
|||
nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)
|
||||
{
|
||||
struct nv50_hw_query *hq = nv50_hw_query(q);
|
||||
|
||||
if (hq->funcs && hq->funcs->destroy_query) {
|
||||
hq->funcs->destroy_query(nv50, hq);
|
||||
return;
|
||||
}
|
||||
|
||||
nv50_hw_query_allocate(nv50, q, 0);
|
||||
nouveau_fence_ref(NULL, &hq->fence);
|
||||
FREE(hq);
|
||||
|
|
|
|||
|
|
@ -71,7 +71,8 @@ nv50_hw_metric_destroy_query(struct nv50_context *nv50,
|
|||
unsigned i;
|
||||
|
||||
for (i = 0; i < hmq->num_queries; i++)
|
||||
hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]);
|
||||
if (hmq->queries[i]->funcs->destroy_query)
|
||||
hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]);
|
||||
FREE(hmq);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -153,7 +153,9 @@ static void
|
|||
nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
|
||||
{
|
||||
struct nv50_query *q = &hq->base;
|
||||
q->funcs->destroy_query(nv50, q);
|
||||
nv50_hw_query_allocate(nv50, q, 0);
|
||||
nouveau_fence_ref(NULL, &hq->fence);
|
||||
FREE(hq);
|
||||
}
|
||||
|
||||
static boolean
|
||||
|
|
|
|||
|
|
@ -405,6 +405,11 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
|
|||
|
||||
if (screen->blitter)
|
||||
nv50_blitter_destroy(screen);
|
||||
if (screen->pm.prog) {
|
||||
screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
|
||||
nv50_program_destroy(NULL, screen->pm.prog);
|
||||
FREE(screen->pm.prog);
|
||||
}
|
||||
|
||||
nouveau_bo_ref(NULL, &screen->code);
|
||||
nouveau_bo_ref(NULL, &screen->tls_bo);
|
||||
|
|
@ -518,11 +523,11 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
|
|||
}
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1);
|
||||
PUSH_DATA(push, screen->base.device->drm_version >= 0x01000101);
|
||||
PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8);
|
||||
for (i = 0; i < 8; ++i)
|
||||
PUSH_DATA(push, screen->base.device->drm_version >= 0x01000101);
|
||||
PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
|
|
@ -747,7 +752,7 @@ int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
|
|||
return 1;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
struct nouveau_screen *
|
||||
nv50_screen_create(struct nouveau_device *dev)
|
||||
{
|
||||
struct nv50_screen *screen;
|
||||
|
|
@ -762,6 +767,7 @@ nv50_screen_create(struct nouveau_device *dev)
|
|||
if (!screen)
|
||||
return NULL;
|
||||
pscreen = &screen->base.base;
|
||||
pscreen->destroy = nv50_screen_destroy;
|
||||
|
||||
ret = nouveau_screen_init(&screen->base, dev);
|
||||
if (ret) {
|
||||
|
|
@ -782,7 +788,6 @@ nv50_screen_create(struct nouveau_device *dev)
|
|||
|
||||
chan = screen->base.channel;
|
||||
|
||||
pscreen->destroy = nv50_screen_destroy;
|
||||
pscreen->context_create = nv50_create;
|
||||
pscreen->is_format_supported = nv50_screen_is_format_supported;
|
||||
pscreen->get_param = nv50_screen_get_param;
|
||||
|
|
@ -961,11 +966,11 @@ nv50_screen_create(struct nouveau_device *dev)
|
|||
|
||||
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
|
||||
|
||||
return pscreen;
|
||||
return &screen->base;
|
||||
|
||||
fail:
|
||||
nv50_screen_destroy(pscreen);
|
||||
return NULL;
|
||||
screen->base.base.context_create = NULL;
|
||||
return &screen->base;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
|||
|
|
@ -192,8 +192,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
|
|||
tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR;
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
|
||||
return false;
|
||||
unreachable("unexpected/invalid texture target");
|
||||
}
|
||||
|
||||
tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
|
||||
|
|
|
|||
|
|
@ -91,6 +91,9 @@ nv50_vertex_state_create(struct pipe_context *pipe,
|
|||
}
|
||||
so->element[i].state = nv50_format_table[fmt].vtx;
|
||||
so->need_conversion = true;
|
||||
pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
|
||||
"Converting vertex element %d, no hw format %s",
|
||||
i, util_format_name(ve->src_format));
|
||||
}
|
||||
so->element[i].state |= i;
|
||||
|
||||
|
|
|
|||
|
|
@ -756,8 +756,8 @@ firmware_present(struct pipe_screen *pscreen, enum pipe_video_format codec)
|
|||
int present, ret;
|
||||
|
||||
if (!FIRMWARE_PRESENT(checked, VP_KERN)) {
|
||||
nouveau_object_new(screen->channel, 0, 0x7476, NULL, 0, &obj);
|
||||
if (obj)
|
||||
ret = nouveau_object_new(screen->channel, 0, 0x7476, NULL, 0, &obj);
|
||||
if (!ret)
|
||||
screen->firmware_info.profiles_present |= FIRMWARE_VP_KERN;
|
||||
nouveau_object_del(&obj);
|
||||
screen->firmware_info.profiles_checked |= FIRMWARE_VP_KERN;
|
||||
|
|
@ -765,8 +765,8 @@ firmware_present(struct pipe_screen *pscreen, enum pipe_video_format codec)
|
|||
|
||||
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
|
||||
if (!FIRMWARE_PRESENT(checked, BSP_KERN)) {
|
||||
nouveau_object_new(screen->channel, 0, 0x74b0, NULL, 0, &obj);
|
||||
if (obj)
|
||||
ret = nouveau_object_new(screen->channel, 0, 0x74b0, NULL, 0, &obj);
|
||||
if (!ret)
|
||||
screen->firmware_info.profiles_present |= FIRMWARE_BSP_KERN;
|
||||
nouveau_object_del(&obj);
|
||||
screen->firmware_info.profiles_checked |= FIRMWARE_BSP_KERN;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
#include "util/u_sampler.h"
|
||||
#include "util/u_format.h"
|
||||
|
||||
#include <nvif/class.h>
|
||||
|
||||
static void
|
||||
nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder,
|
||||
struct pipe_video_buffer *video_target,
|
||||
|
|
@ -56,6 +58,28 @@ nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder,
|
|||
nv98_decoder_ppp(dec, desc, target, comm_seq);
|
||||
}
|
||||
|
||||
static const struct nouveau_mclass
|
||||
nv98_decoder_msvld[] = {
|
||||
{ G98_MSVLD, -1 },
|
||||
{ IGT21A_MSVLD, -1 },
|
||||
{ GT212_MSVLD, -1 },
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct nouveau_mclass
|
||||
nv98_decoder_mspdec[] = {
|
||||
{ G98_MSPDEC, -1 },
|
||||
{ GT212_MSPDEC, -1 },
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct nouveau_mclass
|
||||
nv98_decoder_msppp[] = {
|
||||
{ G98_MSPPP, -1 },
|
||||
{ GT212_MSPPP, -1 },
|
||||
{}
|
||||
};
|
||||
|
||||
struct pipe_video_codec *
|
||||
nv98_create_decoder(struct pipe_context *context,
|
||||
const struct pipe_video_codec *templ)
|
||||
|
|
@ -103,12 +127,33 @@ nv98_create_decoder(struct pipe_context *context,
|
|||
}
|
||||
push = dec->pushbuf;
|
||||
|
||||
if (!ret)
|
||||
ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x85b1, NULL, 0, &dec->bsp);
|
||||
if (!ret)
|
||||
ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x85b2, NULL, 0, &dec->vp);
|
||||
if (!ret)
|
||||
ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x85b3, NULL, 0, &dec->ppp);
|
||||
if (!ret) {
|
||||
ret = nouveau_object_mclass(dec->channel[0], nv98_decoder_msvld);
|
||||
if (ret >= 0) {
|
||||
ret = nouveau_object_new(dec->channel[0], 0xbeef85b1,
|
||||
nv98_decoder_msvld[ret].oclass, NULL, 0,
|
||||
&dec->bsp);
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
ret = nouveau_object_mclass(dec->channel[1], nv98_decoder_mspdec);
|
||||
if (ret >= 0) {
|
||||
ret = nouveau_object_new(dec->channel[1], 0xbeef85b2,
|
||||
nv98_decoder_mspdec[ret].oclass, NULL, 0,
|
||||
&dec->vp);
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
ret = nouveau_object_mclass(dec->channel[2], nv98_decoder_msppp);
|
||||
if (ret >= 0) {
|
||||
ret = nouveau_object_new(dec->channel[2], 0xbeef85b3,
|
||||
nv98_decoder_msppp[ret].oclass, NULL, 0,
|
||||
&dec->ppp);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
|
|
|
|||
|
|
@ -248,9 +248,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
|
|||
const struct pipe_resource *templ)
|
||||
{
|
||||
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
|
||||
struct nouveau_drm *drm = nouveau_screen(pscreen)->drm;
|
||||
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
|
||||
struct pipe_resource *pt = &mt->base.base;
|
||||
bool compressed = dev->drm_version >= 0x01000101;
|
||||
bool compressed = drm->version >= 0x01000101;
|
||||
int ret;
|
||||
union nouveau_bo_config bo_config;
|
||||
uint32_t bo_flags;
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
|
|||
count++;
|
||||
#endif
|
||||
|
||||
if (screen->base.device->drm_version >= 0x01000101) {
|
||||
if (screen->base.drm->version >= 0x01000101) {
|
||||
if (screen->compute) {
|
||||
if (screen->base.class_3d == NVE4_3D_CLASS) {
|
||||
count += 2;
|
||||
|
|
|
|||
|
|
@ -116,6 +116,12 @@ static void
|
|||
nvc0_hw_destroy_query(struct nvc0_context *nvc0, struct nvc0_query *q)
|
||||
{
|
||||
struct nvc0_hw_query *hq = nvc0_hw_query(q);
|
||||
|
||||
if (hq->funcs && hq->funcs->destroy_query) {
|
||||
hq->funcs->destroy_query(nvc0, hq);
|
||||
return;
|
||||
}
|
||||
|
||||
nvc0_hw_query_allocate(nvc0, q, 0);
|
||||
nouveau_fence_ref(NULL, &hq->fence);
|
||||
FREE(hq);
|
||||
|
|
|
|||
|
|
@ -293,7 +293,8 @@ nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
|
|||
unsigned i;
|
||||
|
||||
for (i = 0; i < hmq->num_queries; i++)
|
||||
hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
|
||||
if (hmq->queries[i]->funcs->destroy_query)
|
||||
hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
|
||||
FREE(hmq);
|
||||
}
|
||||
|
||||
|
|
@ -420,7 +421,10 @@ sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
|
|||
{
|
||||
switch (hq->base.type - NVE4_HW_METRIC_QUERY(0)) {
|
||||
case NVE4_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
|
||||
return sm20_hw_metric_calc_result(hq, res64);
|
||||
/* (active_warps / active_cycles) / max. number of warps on a MP */
|
||||
if (res64[1])
|
||||
return (res64[0] / (double)res64[1]) / 64;
|
||||
break;
|
||||
case NVE4_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
|
||||
return sm20_hw_metric_calc_result(hq, res64);
|
||||
case NVE4_HW_METRIC_QUERY_INST_ISSUED:
|
||||
|
|
@ -561,7 +565,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
|
|||
uint16_t class_3d = screen->base.class_3d;
|
||||
int count = 0;
|
||||
|
||||
if (screen->base.device->drm_version >= 0x01000101) {
|
||||
if (screen->base.drm->version >= 0x01000101) {
|
||||
if (screen->compute) {
|
||||
if (screen->base.class_3d == NVE4_3D_CLASS) {
|
||||
count += NVE4_HW_METRIC_QUERY_COUNT;
|
||||
|
|
|
|||
|
|
@ -782,7 +782,9 @@ static void
|
|||
nvc0_hw_sm_destroy_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
|
||||
{
|
||||
struct nvc0_query *q = &hq->base;
|
||||
q->funcs->destroy_query(nvc0, q);
|
||||
nvc0_hw_query_allocate(nvc0, q, 0);
|
||||
nouveau_fence_ref(NULL, &hq->fence);
|
||||
FREE(hq);
|
||||
}
|
||||
|
||||
static boolean
|
||||
|
|
@ -1075,17 +1077,6 @@ nve4_hw_sm_query_read_data(uint32_t count[32][8],
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Metric calculations:
|
||||
* sum(x) ... sum of x over all MPs
|
||||
* avg(x) ... average of x over all MPs
|
||||
*
|
||||
* IPC : sum(inst_executed) / clock
|
||||
* INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
|
||||
* MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
|
||||
* MP_EFFICIENCY : avg(active_cycles / clock)
|
||||
*
|
||||
* NOTE: Interpretation of IPC requires knowledge of MP count.
|
||||
*/
|
||||
static boolean
|
||||
nvc0_hw_sm_get_query_result(struct nvc0_context *nvc0, struct nvc0_hw_query *hq,
|
||||
boolean wait, union pipe_query_result *result)
|
||||
|
|
@ -1130,7 +1121,7 @@ nvc0_hw_sm_create_query(struct nvc0_context *nvc0, unsigned type)
|
|||
struct nvc0_hw_query *hq;
|
||||
unsigned space;
|
||||
|
||||
if (nvc0->screen->base.device->drm_version < 0x01000101)
|
||||
if (nvc0->screen->base.drm->version < 0x01000101)
|
||||
return NULL;
|
||||
|
||||
if ((type < NVE4_HW_SM_QUERY(0) || type > NVE4_HW_SM_QUERY_LAST) &&
|
||||
|
|
@ -1225,7 +1216,7 @@ nvc0_hw_sm_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
|
|||
{
|
||||
int count = 0;
|
||||
|
||||
if (screen->base.device->drm_version >= 0x01000101) {
|
||||
if (screen->base.drm->version >= 0x01000101) {
|
||||
if (screen->compute) {
|
||||
if (screen->base.class_3d == NVE4_3D_CLASS) {
|
||||
count += NVE4_HW_SM_QUERY_COUNT;
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include <xf86drm.h>
|
||||
#include <nouveau_drm.h>
|
||||
#include <nvif/class.h>
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_format_s3tc.h"
|
||||
#include "pipe/p_screen.h"
|
||||
|
|
@ -428,6 +429,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
|
|||
if (screen->pm.prog) {
|
||||
screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
|
||||
nvc0_program_destroy(NULL, screen->pm.prog);
|
||||
FREE(screen->pm.prog);
|
||||
}
|
||||
|
||||
nouveau_bo_ref(NULL, &screen->text);
|
||||
|
|
@ -617,11 +619,10 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
|
|||
#define FAIL_SCREEN_INIT(str, err) \
|
||||
do { \
|
||||
NOUVEAU_ERR(str, err); \
|
||||
nvc0_screen_destroy(pscreen); \
|
||||
return NULL; \
|
||||
goto fail; \
|
||||
} while(0)
|
||||
|
||||
struct pipe_screen *
|
||||
struct nouveau_screen *
|
||||
nvc0_screen_create(struct nouveau_device *dev)
|
||||
{
|
||||
struct nvc0_screen *screen;
|
||||
|
|
@ -650,6 +651,7 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
if (!screen)
|
||||
return NULL;
|
||||
pscreen = &screen->base.base;
|
||||
pscreen->destroy = nvc0_screen_destroy;
|
||||
|
||||
ret = nouveau_screen_init(&screen->base, dev);
|
||||
if (ret) {
|
||||
|
|
@ -672,7 +674,6 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
screen->base.vidmem_bindings = 0;
|
||||
}
|
||||
|
||||
pscreen->destroy = nvc0_screen_destroy;
|
||||
pscreen->context_create = nvc0_create;
|
||||
pscreen->is_format_supported = nvc0_screen_is_format_supported;
|
||||
pscreen->get_param = nvc0_screen_get_param;
|
||||
|
|
@ -687,7 +688,7 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
|
||||
|
||||
flags = NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
|
||||
if (dev->drm_version >= 0x01000202)
|
||||
if (screen->base.drm->version >= 0x01000202)
|
||||
flags |= NOUVEAU_BO_COHERENT;
|
||||
|
||||
ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, &screen->fence.bo);
|
||||
|
|
@ -699,12 +700,13 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
screen->base.fence.update = nvc0_screen_fence_update;
|
||||
|
||||
|
||||
ret = nouveau_object_new(chan,
|
||||
(dev->chipset < 0xe0) ? 0x1f906e : 0x906e, 0x906e,
|
||||
NULL, 0, &screen->nvsw);
|
||||
ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
|
||||
NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
|
||||
if (ret)
|
||||
FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
|
||||
|
||||
BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
|
||||
PUSH_DATA (push, screen->nvsw->handle);
|
||||
|
||||
switch (dev->chipset & ~0xf) {
|
||||
case 0x110:
|
||||
|
|
@ -811,10 +813,11 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
PUSH_DATA (push, 0x17);
|
||||
}
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), dev->drm_version >= 0x01000101);
|
||||
IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE),
|
||||
screen->base.drm->version >= 0x01000101);
|
||||
BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8);
|
||||
for (i = 0; i < 8; ++i)
|
||||
PUSH_DATA(push, dev->drm_version >= 0x01000101);
|
||||
PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
|
|
@ -910,7 +913,7 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
|
||||
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
|
||||
|
||||
if (dev->drm_version >= 0x01000101) {
|
||||
if (screen->base.drm->version >= 0x01000101) {
|
||||
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
|
||||
if (ret) {
|
||||
NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n");
|
||||
|
|
@ -1061,11 +1064,11 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
|
||||
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
|
||||
|
||||
return pscreen;
|
||||
return &screen->base;
|
||||
|
||||
fail:
|
||||
nvc0_screen_destroy(pscreen);
|
||||
return NULL;
|
||||
screen->base.base.context_create = NULL;
|
||||
return &screen->base;
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
|||
|
|
@ -236,11 +236,8 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
|
|||
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
|
||||
struct nvc0_program *gp = nvc0->gmtyprog;
|
||||
|
||||
if (gp)
|
||||
nvc0_program_validate(nvc0, gp);
|
||||
|
||||
/* we allow GPs with no code for specifying stream output state only */
|
||||
if (gp && gp->code_size) {
|
||||
if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {
|
||||
const bool gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
|
||||
|
|
|
|||
|
|
@ -1030,9 +1030,11 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
|
|||
nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
|
||||
nvc0->cond_cond, nvc0->cond_mode);
|
||||
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
|
||||
nouveau_scratch_done(&nvc0->base);
|
||||
|
||||
nvc0->dirty = blit->saved.dirty |
|
||||
(NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK |
|
||||
|
|
|
|||
|
|
@ -193,9 +193,7 @@ nvc0_create_texture_view(struct pipe_context *pipe,
|
|||
tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
|
||||
break;
|
||||
default:
|
||||
NOUVEAU_ERR("unexpected/invalid texture target: %d\n",
|
||||
mt->base.base.target);
|
||||
return false;
|
||||
unreachable("unexpected/invalid texture target");
|
||||
}
|
||||
|
||||
tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
|
||||
|
|
|
|||
|
|
@ -95,6 +95,9 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
|
|||
}
|
||||
so->element[i].state = nvc0_format_table[fmt].vtx;
|
||||
so->need_conversion = true;
|
||||
pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
|
||||
"Converting vertex element %d, no hw format %s",
|
||||
i, util_format_name(ve->src_format));
|
||||
}
|
||||
size = util_format_get_blocksize(fmt);
|
||||
|
||||
|
|
|
|||
|
|
@ -2414,7 +2414,7 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
|
|||
struct r600_command_buffer *cb = &rctx->start_cs_cmd;
|
||||
int tmp, i;
|
||||
|
||||
r600_init_command_buffer(cb, 342);
|
||||
r600_init_command_buffer(cb, 338);
|
||||
|
||||
/* This must be first. */
|
||||
r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||
|
|
@ -2468,10 +2468,6 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
|
|||
|
||||
r600_store_context_reg(cb, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0);
|
||||
|
||||
r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
|
||||
r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
|
||||
r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
|
||||
|
||||
r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
|
||||
|
||||
r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
|
||||
|
|
@ -2671,7 +2667,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
|
|||
return;
|
||||
}
|
||||
|
||||
r600_init_command_buffer(cb, 342);
|
||||
r600_init_command_buffer(cb, 338);
|
||||
|
||||
/* This must be first. */
|
||||
r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||
|
|
@ -2896,10 +2892,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
|
|||
r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
|
||||
r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
|
||||
|
||||
r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
|
||||
r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
|
||||
r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
|
||||
|
||||
r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
|
||||
|
||||
r600_store_context_reg(cb, R_0288F0_SQ_VTX_SEMANTIC_CLEAR, ~0);
|
||||
|
|
@ -3731,7 +3723,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
|
|||
r600_init_atom(rctx, &rctx->blend_color.atom, id++, r600_emit_blend_color, 6);
|
||||
r600_init_atom(rctx, &rctx->blend_state.atom, id++, r600_emit_cso_state, 0);
|
||||
r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, evergreen_emit_cb_misc_state, 4);
|
||||
r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 6);
|
||||
r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 9);
|
||||
r600_init_atom(rctx, &rctx->clip_state.atom, id++, evergreen_emit_clip_state, 26);
|
||||
r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 10);
|
||||
r600_init_atom(rctx, &rctx->db_state.atom, id++, evergreen_emit_db_state, 14);
|
||||
|
|
|
|||
|
|
@ -152,6 +152,7 @@ struct r600_clip_misc_state {
|
|||
unsigned clip_plane_enable; /* from rasterizer */
|
||||
unsigned clip_dist_write; /* from vertex shader */
|
||||
boolean clip_disable; /* from vertex shader */
|
||||
boolean vs_out_viewport; /* from vertex shader */
|
||||
};
|
||||
|
||||
struct r600_alphatest_state {
|
||||
|
|
|
|||
|
|
@ -1377,11 +1377,13 @@ static void r600_update_clip_state(struct r600_context *rctx,
|
|||
{
|
||||
if (current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
|
||||
current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write ||
|
||||
current->shader.vs_position_window_space != rctx->clip_misc_state.clip_disable) {
|
||||
rctx->clip_misc_state.pa_cl_vs_out_cntl = current->pa_cl_vs_out_cntl;
|
||||
rctx->clip_misc_state.clip_dist_write = current->shader.clip_dist_write;
|
||||
rctx->clip_misc_state.clip_disable = current->shader.vs_position_window_space;
|
||||
r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
|
||||
current->shader.vs_position_window_space != rctx->clip_misc_state.clip_disable ||
|
||||
current->shader.vs_out_viewport != rctx->clip_misc_state.vs_out_viewport) {
|
||||
rctx->clip_misc_state.pa_cl_vs_out_cntl = current->pa_cl_vs_out_cntl;
|
||||
rctx->clip_misc_state.clip_dist_write = current->shader.clip_dist_write;
|
||||
rctx->clip_misc_state.clip_disable = current->shader.vs_position_window_space;
|
||||
rctx->clip_misc_state.vs_out_viewport = current->shader.vs_out_viewport;
|
||||
r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1656,6 +1658,10 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
|
|||
radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
|
||||
state->pa_cl_vs_out_cntl |
|
||||
(state->clip_plane_enable & state->clip_dist_write));
|
||||
/* reuse needs to be set off if we write oViewport */
|
||||
if (rctx->b.chip_class >= EVERGREEN)
|
||||
radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
|
||||
S_028AB4_REUSE_OFF(state->vs_out_viewport));
|
||||
}
|
||||
|
||||
static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
|
||||
|
|
|
|||
|
|
@ -16,7 +16,8 @@ libradeon_la_SOURCES = \
|
|||
if NEED_RADEON_LLVM
|
||||
|
||||
AM_CFLAGS += \
|
||||
$(LLVM_CFLAGS)
|
||||
$(LLVM_CFLAGS) \
|
||||
$(LIBELF_CFLAGS)
|
||||
|
||||
libradeon_la_SOURCES += \
|
||||
$(LLVM_C_FILES)
|
||||
|
|
@ -24,7 +25,7 @@ libradeon_la_SOURCES += \
|
|||
libradeon_la_LIBADD = \
|
||||
$(CLOCK_LIB) \
|
||||
$(LLVM_LIBS) \
|
||||
$(ELF_LIB)
|
||||
$(LIBELF_LIBS)
|
||||
|
||||
libradeon_la_LDFLAGS = \
|
||||
$(LLVM_LDFLAGS)
|
||||
|
|
|
|||
|
|
@ -202,9 +202,6 @@ static void r600_pc_query_add_result(struct r600_common_context *ctx,
|
|||
for (i = 0; i < query->num_counters; ++i) {
|
||||
struct r600_pc_counter *counter = &query->counters[i];
|
||||
|
||||
if (counter->base == ~0)
|
||||
continue;
|
||||
|
||||
for (j = 0; j < counter->dwords; ++j) {
|
||||
uint32_t value = results[counter->base + j * counter->stride];
|
||||
result->batch[i].u32 += value;
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ static void r600_query_sw_end(struct r600_common_context *rctx,
|
|||
rctx->b.flush(&rctx->b, &query->fence, 0);
|
||||
break;
|
||||
case R600_QUERY_DRAW_CALLS:
|
||||
query->begin_result = rctx->num_draw_calls;
|
||||
query->end_result = rctx->num_draw_calls;
|
||||
break;
|
||||
case R600_QUERY_REQUESTED_VRAM:
|
||||
case R600_QUERY_REQUESTED_GTT:
|
||||
|
|
@ -141,10 +141,10 @@ static void r600_query_sw_end(struct r600_common_context *rctx,
|
|||
query->begin_result = 0;
|
||||
break;
|
||||
case R600_QUERY_NUM_COMPILATIONS:
|
||||
query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
|
||||
query->end_result = p_atomic_read(&rctx->screen->num_compilations);
|
||||
break;
|
||||
case R600_QUERY_NUM_SHADERS_CREATED:
|
||||
query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
|
||||
query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
|
||||
break;
|
||||
default:
|
||||
unreachable("r600_query_sw_end: bad query type");
|
||||
|
|
|
|||
|
|
@ -188,8 +188,8 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
|
|||
if (mem_err) {
|
||||
fprintf(stderr, "%s: %s", __FUNCTION__, err);
|
||||
FREE(err);
|
||||
LLVMDisposeTargetMachine(tm);
|
||||
return 1;
|
||||
rval = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (0 != rval) {
|
||||
|
|
@ -205,6 +205,7 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
|
|||
/* Clean up */
|
||||
LLVMDisposeMemoryBuffer(out_buffer);
|
||||
|
||||
out:
|
||||
if (dispose_tm) {
|
||||
LLVMDisposeTargetMachine(tm);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -436,7 +436,7 @@ static void si_pc_emit_select(struct r600_common_context *ctx,
|
|||
|
||||
dw = count + regs->num_prelude;
|
||||
if (count >= regs->num_multi)
|
||||
count += regs->num_multi;
|
||||
dw += regs->num_multi;
|
||||
radeon_set_uconfig_reg_seq(cs, regs->select0, dw);
|
||||
for (idx = 0; idx < regs->num_prelude; ++idx)
|
||||
radeon_emit(cs, 0);
|
||||
|
|
|
|||
|
|
@ -605,6 +605,10 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
|
|||
(clipdist_mask ? 0 :
|
||||
sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
|
||||
S_028810_CLIP_DISABLE(window_space));
|
||||
|
||||
/* reuse needs to be set off if we write oViewport */
|
||||
radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
|
||||
S_028AB4_REUSE_OFF(info->writes_viewport_index));
|
||||
}
|
||||
|
||||
static void si_set_scissor_states(struct pipe_context *ctx,
|
||||
|
|
@ -3468,7 +3472,6 @@ static void si_init_config(struct si_context *sctx)
|
|||
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
|
||||
|
||||
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
|
||||
si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
|
||||
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
|
||||
if (sctx->b.chip_class < CIK)
|
||||
si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
|
||||
|
|
|
|||
|
|
@ -343,6 +343,13 @@ struct svga_hw_draw_state
|
|||
SVGA3dElementLayoutId layout_id;
|
||||
SVGA3dPrimitiveType topology;
|
||||
|
||||
struct svga_winsys_surface *ib; /**< index buffer for drawing */
|
||||
SVGA3dSurfaceFormat ib_format;
|
||||
unsigned ib_offset;
|
||||
|
||||
unsigned num_samplers[PIPE_SHADER_TYPES];
|
||||
SVGA3dSamplerId samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
|
||||
|
||||
/* used for rebinding */
|
||||
unsigned num_sampler_views[PIPE_SHADER_TYPES];
|
||||
unsigned default_constbuf_size[PIPE_SHADER_TYPES];
|
||||
|
|
|
|||
|
|
@ -539,11 +539,18 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
|
|||
SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth);
|
||||
|
||||
/* setup index buffer */
|
||||
ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
|
||||
indexFormat,
|
||||
range->indexArray.offset);
|
||||
if (ret != PIPE_OK)
|
||||
return ret;
|
||||
if (ib_handle != svga->state.hw_draw.ib ||
|
||||
indexFormat != svga->state.hw_draw.ib_format ||
|
||||
range->indexArray.offset != svga->state.hw_draw.ib_offset) {
|
||||
ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
|
||||
indexFormat,
|
||||
range->indexArray.offset);
|
||||
if (ret != PIPE_OK)
|
||||
return ret;
|
||||
svga->state.hw_draw.ib = ib_handle;
|
||||
svga->state.hw_draw.ib_format = indexFormat;
|
||||
svga->state.hw_draw.ib_offset = range->indexArray.offset;
|
||||
}
|
||||
|
||||
if (instance_count > 1) {
|
||||
ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
|
||||
|
|
|
|||
|
|
@ -129,7 +129,11 @@ update_state(struct svga_context *svga,
|
|||
const struct svga_tracked_state *atoms[],
|
||||
unsigned *state)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
boolean debug = TRUE;
|
||||
#else
|
||||
boolean debug = FALSE;
|
||||
#endif
|
||||
enum pipe_error ret = PIPE_OK;
|
||||
unsigned i;
|
||||
|
||||
|
|
|
|||
|
|
@ -301,13 +301,21 @@ update_samplers(struct svga_context *svga, unsigned dirty )
|
|||
}
|
||||
|
||||
if (count > 0) {
|
||||
ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
|
||||
count,
|
||||
0, /* start */
|
||||
svga_shader_type(shader), /* type */
|
||||
ids);
|
||||
if (ret != PIPE_OK)
|
||||
return ret;
|
||||
if (count != svga->state.hw_draw.num_samplers[shader] ||
|
||||
memcmp(ids, svga->state.hw_draw.samplers[shader],
|
||||
count * sizeof(ids[0])) != 0) {
|
||||
/* HW state is really changing */
|
||||
ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
|
||||
count,
|
||||
0, /* start */
|
||||
svga_shader_type(shader), /* type */
|
||||
ids);
|
||||
if (ret != PIPE_OK)
|
||||
return ret;
|
||||
memcpy(svga->state.hw_draw.samplers[shader], ids,
|
||||
count * sizeof(ids[0]));
|
||||
svga->state.hw_draw.num_samplers[shader] = count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ C_SOURCES := \
|
|||
vc4_program.c \
|
||||
vc4_qir.c \
|
||||
vc4_qir_lower_uniforms.c \
|
||||
vc4_qir_schedule.c \
|
||||
vc4_qir.h \
|
||||
vc4_qpu.c \
|
||||
vc4_qpu_defines.h \
|
||||
|
|
|
|||
|
|
@ -54,8 +54,8 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
|
|||
bool old_msaa = vc4->msaa;
|
||||
int old_tile_width = vc4->tile_width;
|
||||
int old_tile_height = vc4->tile_height;
|
||||
bool msaa = (info->src.resource->nr_samples ||
|
||||
info->dst.resource->nr_samples);
|
||||
bool msaa = (info->src.resource->nr_samples > 1 ||
|
||||
info->dst.resource->nr_samples > 1);
|
||||
int tile_width = msaa ? 32 : 64;
|
||||
int tile_height = msaa ? 32 : 64;
|
||||
|
||||
|
|
@ -110,9 +110,11 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
|
|||
|
||||
pipe_surface_reference(&vc4->color_read, src_surf);
|
||||
pipe_surface_reference(&vc4->color_write,
|
||||
dst_surf->texture->nr_samples ? NULL : dst_surf);
|
||||
dst_surf->texture->nr_samples > 1 ?
|
||||
NULL : dst_surf);
|
||||
pipe_surface_reference(&vc4->msaa_color_write,
|
||||
dst_surf->texture->nr_samples ? dst_surf : NULL);
|
||||
dst_surf->texture->nr_samples > 1 ?
|
||||
dst_surf : NULL);
|
||||
pipe_surface_reference(&vc4->zs_read, NULL);
|
||||
pipe_surface_reference(&vc4->zs_write, NULL);
|
||||
pipe_surface_reference(&vc4->msaa_zs_write, NULL);
|
||||
|
|
|
|||
|
|
@ -67,15 +67,13 @@ vc4_flush(struct pipe_context *pctx)
|
|||
cl_u8(&bcl, VC4_PACKET_FLUSH);
|
||||
cl_end(&vc4->bcl, bcl);
|
||||
|
||||
vc4->msaa = false;
|
||||
if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
|
||||
pipe_surface_reference(&vc4->color_write,
|
||||
cbuf->texture->nr_samples ? NULL : cbuf);
|
||||
cbuf->texture->nr_samples > 1 ?
|
||||
NULL : cbuf);
|
||||
pipe_surface_reference(&vc4->msaa_color_write,
|
||||
cbuf->texture->nr_samples ? cbuf : NULL);
|
||||
|
||||
if (cbuf->texture->nr_samples)
|
||||
vc4->msaa = true;
|
||||
cbuf->texture->nr_samples > 1 ?
|
||||
cbuf : NULL);
|
||||
|
||||
if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) {
|
||||
pipe_surface_reference(&vc4->color_read, cbuf);
|
||||
|
|
@ -92,15 +90,12 @@ vc4_flush(struct pipe_context *pctx)
|
|||
if (vc4->framebuffer.zsbuf &&
|
||||
(vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
|
||||
pipe_surface_reference(&vc4->zs_write,
|
||||
zsbuf->texture->nr_samples ?
|
||||
zsbuf->texture->nr_samples > 1 ?
|
||||
NULL : zsbuf);
|
||||
pipe_surface_reference(&vc4->msaa_zs_write,
|
||||
zsbuf->texture->nr_samples ?
|
||||
zsbuf->texture->nr_samples > 1 ?
|
||||
zsbuf : NULL);
|
||||
|
||||
if (zsbuf->texture->nr_samples)
|
||||
vc4->msaa = true;
|
||||
|
||||
if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
|
||||
pipe_surface_reference(&vc4->zs_read, zsbuf);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
#define DRM_VC4_CREATE_BO 0x03
|
||||
#define DRM_VC4_MMAP_BO 0x04
|
||||
#define DRM_VC4_CREATE_SHADER_BO 0x05
|
||||
#define DRM_VC4_GET_HANG_STATE 0x06
|
||||
|
||||
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
|
||||
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
|
||||
|
|
@ -39,6 +40,7 @@
|
|||
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
|
||||
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
|
||||
#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
|
||||
#define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
|
||||
|
||||
struct drm_vc4_submit_rcl_surface {
|
||||
uint32_t hindex; /* Handle index, or ~0 if not present. */
|
||||
|
|
@ -231,4 +233,47 @@ struct drm_vc4_mmap_bo {
|
|||
uint64_t offset;
|
||||
};
|
||||
|
||||
struct drm_vc4_get_hang_state_bo {
|
||||
uint32_t handle;
|
||||
uint32_t paddr;
|
||||
uint32_t size;
|
||||
uint32_t pad;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct drm_vc4_hang_state - ioctl argument for collecting state
|
||||
* from a GPU hang for analysis.
|
||||
*/
|
||||
struct drm_vc4_get_hang_state {
|
||||
/** Pointer to array of struct drm_vc4_get_hang_state_bo. */
|
||||
uint64_t bo;
|
||||
/**
|
||||
* On input, the size of the bo array. Output is the number
|
||||
* of bos to be returned.
|
||||
*/
|
||||
uint32_t bo_count;
|
||||
|
||||
uint32_t start_bin, start_render;
|
||||
|
||||
uint32_t ct0ca, ct0ea;
|
||||
uint32_t ct1ca, ct1ea;
|
||||
uint32_t ct0cs, ct1cs;
|
||||
uint32_t ct0ra0, ct1ra0;
|
||||
|
||||
uint32_t bpca, bpcs;
|
||||
uint32_t bpoa, bpos;
|
||||
|
||||
uint32_t vpmbase;
|
||||
|
||||
uint32_t dbge;
|
||||
uint32_t fdbgo;
|
||||
uint32_t fdbgb;
|
||||
uint32_t fdbgr;
|
||||
uint32_t fdbgs;
|
||||
uint32_t errstat;
|
||||
|
||||
/* Pad that we may save more registers into in the future. */
|
||||
uint32_t pad[16];
|
||||
};
|
||||
|
||||
#endif /* _UAPI_VC4_DRM_H_ */
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
|
|||
submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
|
||||
submit_surf->offset = surf->offset;
|
||||
|
||||
if (psurf->texture->nr_samples == 0) {
|
||||
if (psurf->texture->nr_samples <= 1) {
|
||||
if (is_depth) {
|
||||
submit_surf->bits =
|
||||
VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
|
||||
|
|
@ -132,7 +132,7 @@ vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4,
|
|||
submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
|
||||
submit_surf->offset = surf->offset;
|
||||
|
||||
if (psurf->texture->nr_samples == 0) {
|
||||
if (psurf->texture->nr_samples <= 1) {
|
||||
submit_surf->bits =
|
||||
VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
|
||||
VC4_RENDER_CONFIG_FORMAT_BGR565 :
|
||||
|
|
@ -240,9 +240,11 @@ vc4_job_submit(struct vc4_context *vc4)
|
|||
#else
|
||||
ret = vc4_simulator_flush(vc4, &submit);
|
||||
#endif
|
||||
if (ret) {
|
||||
fprintf(stderr, "VC4 submit failed\n");
|
||||
abort();
|
||||
static bool warned = false;
|
||||
if (ret && !warned) {
|
||||
fprintf(stderr, "Draw call returned %s. "
|
||||
"Expect corruption.\n", strerror(errno));
|
||||
warned = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1848,12 +1848,15 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
|
|||
qir_optimize(c);
|
||||
qir_lower_uniforms(c);
|
||||
|
||||
qir_schedule_instructions(c);
|
||||
|
||||
if (vc4_debug & VC4_DEBUG_QIR) {
|
||||
fprintf(stderr, "%s prog %d/%d QIR:\n",
|
||||
qir_get_stage_name(c->stage),
|
||||
c->program_id, c->variant_id);
|
||||
qir_dump(c);
|
||||
}
|
||||
|
||||
qir_reorder_uniforms(c);
|
||||
vc4_generate_code(vc4, c);
|
||||
|
||||
|
|
@ -2043,7 +2046,7 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
|
|||
key->tex[i].swizzle[2] = sampler->swizzle_b;
|
||||
key->tex[i].swizzle[3] = sampler->swizzle_a;
|
||||
|
||||
if (sampler->texture->nr_samples) {
|
||||
if (sampler->texture->nr_samples > 1) {
|
||||
key->tex[i].msaa_width = sampler->texture->width0;
|
||||
key->tex[i].msaa_height = sampler->texture->height0;
|
||||
} else if (sampler){
|
||||
|
|
|
|||
|
|
@ -459,6 +459,7 @@ void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);
|
|||
struct qreg qir_uniform(struct vc4_compile *c,
|
||||
enum quniform_contents contents,
|
||||
uint32_t data);
|
||||
void qir_schedule_instructions(struct vc4_compile *c);
|
||||
void qir_reorder_uniforms(struct vc4_compile *c);
|
||||
|
||||
void qir_emit(struct vc4_compile *c, struct qinst *inst);
|
||||
|
|
|
|||
619
src/gallium/drivers/vc4/vc4_qir_schedule.c
Normal file
619
src/gallium/drivers/vc4/vc4_qir_schedule.c
Normal file
|
|
@ -0,0 +1,619 @@
|
|||
/*
|
||||
* Copyright © 2010 Intel Corporation
|
||||
* Copyright © 2014-2015 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file vc4_qir_schedule.c
|
||||
*
|
||||
* The basic model of the list scheduler is to take a basic block, compute a
|
||||
* DAG of the dependencies from the bottom up, and make a list of the DAG
|
||||
* heads. Heuristically pick a DAG head and schedule (remove) it, then put
|
||||
* all the parents that are now DAG heads into the list of things to
|
||||
* schedule.
|
||||
*
|
||||
* The goal of scheduling here, before register allocation and conversion to
|
||||
* QPU instructions, is to reduce register pressure by reordering instructions
|
||||
* to consume values when possible.
|
||||
*/
|
||||
|
||||
#include "vc4_qir.h"
|
||||
|
||||
static bool debug;
|
||||
|
||||
struct schedule_node {
|
||||
struct list_head link;
|
||||
struct qinst *inst;
|
||||
|
||||
struct schedule_node **children;
|
||||
uint32_t child_count;
|
||||
uint32_t child_array_size;
|
||||
uint32_t parent_count;
|
||||
|
||||
/* Length of the longest (latency) chain from a DAG head to the this
|
||||
* instruction.
|
||||
*/
|
||||
uint32_t delay;
|
||||
|
||||
/* Longest time + latency_between(parent, this) of any parent of this
|
||||
* node.
|
||||
*/
|
||||
uint32_t unblocked_time;
|
||||
};
|
||||
|
||||
struct schedule_state {
|
||||
/* List of struct schedule_node *. This starts out with all
|
||||
* instructions, and after dependency updates it's trimmed to be just
|
||||
* the DAG heads.
|
||||
*/
|
||||
struct list_head worklist;
|
||||
|
||||
uint32_t time;
|
||||
|
||||
uint32_t *temp_writes;
|
||||
|
||||
BITSET_WORD *temp_live;
|
||||
};
|
||||
|
||||
/* When walking the instructions in reverse, we need to swap before/after in
|
||||
* add_dep().
|
||||
*/
|
||||
enum direction { F, R };
|
||||
|
||||
/**
|
||||
* Marks a dependency between two intructions, that @after must appear after
|
||||
* @before.
|
||||
*
|
||||
* Our dependencies are tracked as a DAG. Since we're scheduling bottom-up,
|
||||
* the latest instructions with nothing left to schedule are the DAG heads,
|
||||
* and their inputs are their children.
|
||||
*/
|
||||
static void
|
||||
add_dep(enum direction dir,
|
||||
struct schedule_node *before,
|
||||
struct schedule_node *after)
|
||||
{
|
||||
if (!before || !after)
|
||||
return;
|
||||
|
||||
assert(before != after);
|
||||
|
||||
if (dir == R) {
|
||||
struct schedule_node *t = before;
|
||||
before = after;
|
||||
after = t;
|
||||
}
|
||||
|
||||
for (int i = 0; i < after->child_count; i++) {
|
||||
if (after->children[i] == after)
|
||||
return;
|
||||
}
|
||||
|
||||
if (after->child_array_size <= after->child_count) {
|
||||
after->child_array_size = MAX2(after->child_array_size * 2, 16);
|
||||
after->children = reralloc(after, after->children,
|
||||
struct schedule_node *,
|
||||
after->child_array_size);
|
||||
}
|
||||
|
||||
after->children[after->child_count] = before;
|
||||
after->child_count++;
|
||||
before->parent_count++;
|
||||
}
|
||||
|
||||
static void
|
||||
add_write_dep(enum direction dir,
|
||||
struct schedule_node **before,
|
||||
struct schedule_node *after)
|
||||
{
|
||||
add_dep(dir, *before, after);
|
||||
*before = after;
|
||||
}
|
||||
|
||||
struct schedule_setup_state {
|
||||
struct schedule_node **last_temp_write;
|
||||
struct schedule_node *last_sf;
|
||||
struct schedule_node *last_vary_read;
|
||||
struct schedule_node *last_vpm_read;
|
||||
struct schedule_node *last_vpm_write;
|
||||
struct schedule_node *last_tex_coord;
|
||||
struct schedule_node *last_tex_result;
|
||||
struct schedule_node *last_tlb;
|
||||
enum direction dir;
|
||||
|
||||
/**
|
||||
* Texture FIFO tracking. This is done top-to-bottom, and is used to
|
||||
* track the QOP_TEX_RESULTs and add dependencies on previous ones
|
||||
* when trying to submit texture coords with TFREQ full or new texture
|
||||
* fetches with TXRCV full.
|
||||
*/
|
||||
struct {
|
||||
struct schedule_node *node;
|
||||
int coords;
|
||||
} tex_fifo[8];
|
||||
int tfreq_count; /**< Number of texture coords outstanding. */
|
||||
int tfrcv_count; /**< Number of texture results outstanding. */
|
||||
int tex_fifo_pos;
|
||||
};
|
||||
|
||||
static void
|
||||
block_until_tex_result(struct schedule_setup_state *state, struct schedule_node *n)
|
||||
{
|
||||
add_dep(state->dir, state->tex_fifo[0].node, n);
|
||||
|
||||
state->tfreq_count -= state->tex_fifo[0].coords;
|
||||
state->tfrcv_count--;
|
||||
|
||||
memmove(&state->tex_fifo[0],
|
||||
&state->tex_fifo[1],
|
||||
state->tex_fifo_pos * sizeof(state->tex_fifo[0]));
|
||||
state->tex_fifo_pos--;
|
||||
}
|
||||
|
||||
/**
|
||||
* Common code for dependencies that need to be tracked both forward and
|
||||
* backward.
|
||||
*
|
||||
* This is for things like "all VPM reads have to happen in order."
|
||||
*/
|
||||
static void
|
||||
calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
|
||||
{
|
||||
struct qinst *inst = n->inst;
|
||||
enum direction dir = state->dir;
|
||||
|
||||
|
||||
/* Add deps for temp registers and varyings accesses. Note that we
|
||||
* ignore uniforms accesses, because qir_reorder_uniforms() happens
|
||||
* after this.
|
||||
*/
|
||||
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
|
||||
switch (inst->src[i].file) {
|
||||
case QFILE_TEMP:
|
||||
add_dep(dir,
|
||||
state->last_temp_write[inst->src[i].index], n);
|
||||
break;
|
||||
|
||||
case QFILE_VARY:
|
||||
add_write_dep(dir, &state->last_vary_read, n);
|
||||
break;
|
||||
|
||||
case QFILE_VPM:
|
||||
add_write_dep(dir, &state->last_vpm_read, n);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (inst->op) {
|
||||
case QOP_VARY_ADD_C:
|
||||
add_dep(dir, state->last_vary_read, n);
|
||||
break;
|
||||
|
||||
case QOP_TEX_S:
|
||||
case QOP_TEX_T:
|
||||
case QOP_TEX_R:
|
||||
case QOP_TEX_B:
|
||||
case QOP_TEX_DIRECT:
|
||||
/* Texturing setup gets scheduled in order, because
|
||||
* the uniforms referenced by them have to land in a
|
||||
* specific order.
|
||||
*/
|
||||
add_write_dep(dir, &state->last_tex_coord, n);
|
||||
break;
|
||||
|
||||
case QOP_TEX_RESULT:
|
||||
/* Results have to be fetched in order. */
|
||||
add_write_dep(dir, &state->last_tex_result, n);
|
||||
break;
|
||||
|
||||
case QOP_TLB_COLOR_WRITE:
|
||||
case QOP_TLB_COLOR_READ:
|
||||
case QOP_TLB_Z_WRITE:
|
||||
case QOP_TLB_STENCIL_SETUP:
|
||||
case QOP_MS_MASK:
|
||||
add_write_dep(dir, &state->last_tlb, n);
|
||||
break;
|
||||
|
||||
case QOP_TLB_DISCARD_SETUP:
|
||||
add_write_dep(dir, &state->last_sf, n);
|
||||
add_write_dep(dir, &state->last_tlb, n);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (inst->dst.file == QFILE_VPM)
|
||||
add_write_dep(dir, &state->last_vpm_write, n);
|
||||
else if (inst->dst.file == QFILE_TEMP)
|
||||
add_write_dep(dir, &state->last_temp_write[inst->dst.index], n);
|
||||
|
||||
if (inst->sf)
|
||||
add_write_dep(dir, &state->last_sf, n);
|
||||
|
||||
if (qir_depends_on_flags(inst)) {
|
||||
add_dep(dir, state->last_sf, n);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
|
||||
struct list_head *schedule_list)
|
||||
{
|
||||
struct schedule_setup_state state;
|
||||
|
||||
memset(&state, 0, sizeof(state));
|
||||
state.last_temp_write = rzalloc_array(mem_ctx, struct schedule_node *,
|
||||
c->num_temps);
|
||||
state.dir = F;
|
||||
|
||||
list_for_each_entry(struct schedule_node, n, schedule_list, link) {
|
||||
struct qinst *inst = n->inst;
|
||||
|
||||
calculate_deps(&state, n);
|
||||
|
||||
switch (inst->op) {
|
||||
case QOP_TEX_S:
|
||||
case QOP_TEX_T:
|
||||
case QOP_TEX_R:
|
||||
case QOP_TEX_B:
|
||||
case QOP_TEX_DIRECT:
|
||||
/* If the texture coordinate fifo is full,
|
||||
* block this on the last QOP_TEX_RESULT.
|
||||
*/
|
||||
if (state.tfreq_count == 8) {
|
||||
block_until_tex_result(&state, n);
|
||||
}
|
||||
|
||||
/* If the texture result fifo is full, block
|
||||
* adding any more to it until the last
|
||||
* QOP_TEX_RESULT.
|
||||
*/
|
||||
if (inst->op == QOP_TEX_S ||
|
||||
inst->op == QOP_TEX_DIRECT) {
|
||||
if (state.tfrcv_count == 4)
|
||||
block_until_tex_result(&state, n);
|
||||
state.tfrcv_count++;
|
||||
}
|
||||
|
||||
state.tex_fifo[state.tex_fifo_pos].coords++;
|
||||
state.tfreq_count++;
|
||||
break;
|
||||
|
||||
case QOP_TEX_RESULT:
|
||||
/* Results have to be fetched after the
|
||||
* coordinate setup. Note that we're assuming
|
||||
* here that our input shader has the texture
|
||||
* coord setup and result fetch in order,
|
||||
* which is true initially but not of our
|
||||
* instruction stream after this pass.
|
||||
*/
|
||||
add_dep(state.dir, state.last_tex_coord, n);
|
||||
|
||||
state.tex_fifo[state.tex_fifo_pos].node = n;
|
||||
|
||||
state.tex_fifo_pos++;
|
||||
memset(&state.tex_fifo[state.tex_fifo_pos], 0,
|
||||
sizeof(state.tex_fifo[0]));
|
||||
break;
|
||||
default:
|
||||
assert(!qir_is_tex(inst));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
calculate_reverse_deps(struct vc4_compile *c, void *mem_ctx,
|
||||
struct list_head *schedule_list)
|
||||
{
|
||||
struct schedule_setup_state state;
|
||||
|
||||
memset(&state, 0, sizeof(state));
|
||||
state.dir = R;
|
||||
state.last_temp_write = rzalloc_array(mem_ctx, struct schedule_node *,
|
||||
c->num_temps);
|
||||
|
||||
list_for_each_entry_rev(struct schedule_node, n, schedule_list, link) {
|
||||
calculate_deps(&state, n);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
get_register_pressure_cost(struct schedule_state *state, struct qinst *inst)
|
||||
{
|
||||
int cost = 0;
|
||||
|
||||
if (inst->dst.file == QFILE_TEMP &&
|
||||
state->temp_writes[inst->dst.index] == 1)
|
||||
cost--;
|
||||
|
||||
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
|
||||
if (inst->src[i].file == QFILE_TEMP &&
|
||||
!BITSET_TEST(state->temp_live, inst->src[i].index)) {
|
||||
cost++;
|
||||
}
|
||||
}
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
static bool
|
||||
locks_scoreboard(struct qinst *inst)
|
||||
{
|
||||
switch (inst->op) {
|
||||
case QOP_TLB_Z_WRITE:
|
||||
case QOP_TLB_COLOR_WRITE:
|
||||
case QOP_TLB_COLOR_WRITE_MS:
|
||||
case QOP_TLB_COLOR_READ:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static struct schedule_node *
|
||||
choose_instruction(struct schedule_state *state)
|
||||
{
|
||||
struct schedule_node *chosen = NULL;
|
||||
|
||||
list_for_each_entry(struct schedule_node, n, &state->worklist, link) {
|
||||
if (!chosen) {
|
||||
chosen = n;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Prefer scheduling things that lock the scoreboard, so that
|
||||
* they appear late in the program and we get more parallelism
|
||||
* between shaders on multiple QPUs hitting the same fragment.
|
||||
*/
|
||||
if (locks_scoreboard(n->inst) &&
|
||||
!locks_scoreboard(chosen->inst)) {
|
||||
chosen = n;
|
||||
continue;
|
||||
} else if (!locks_scoreboard(n->inst) &&
|
||||
locks_scoreboard(chosen->inst)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If we would block on the previously chosen node, but would
|
||||
* block less on this one, then then prefer it.
|
||||
*/
|
||||
if (chosen->unblocked_time > state->time &&
|
||||
n->unblocked_time < chosen->unblocked_time) {
|
||||
chosen = n;
|
||||
continue;
|
||||
} else if (n->unblocked_time > state->time &&
|
||||
n->unblocked_time > chosen->unblocked_time) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If we can definitely reduce register pressure, do so
|
||||
* immediately.
|
||||
*/
|
||||
int register_pressure_cost =
|
||||
get_register_pressure_cost(state, n->inst);
|
||||
int chosen_register_pressure_cost =
|
||||
get_register_pressure_cost(state, chosen->inst);
|
||||
|
||||
if (register_pressure_cost < chosen_register_pressure_cost) {
|
||||
chosen = n;
|
||||
continue;
|
||||
} else if (register_pressure_cost >
|
||||
chosen_register_pressure_cost) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise, prefer instructions with the deepest chain to
|
||||
* the end of the program. This avoids the problem of
|
||||
* "everything generates a temp, nothing finishes freeing one,
|
||||
* guess I'll just keep emitting varying mul/adds".
|
||||
*/
|
||||
if (n->delay > chosen->delay) {
|
||||
chosen = n;
|
||||
continue;
|
||||
} else if (n->delay < chosen->delay) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return chosen;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_state(struct vc4_compile *c, struct schedule_state *state)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
list_for_each_entry(struct schedule_node, n, &state->worklist, link) {
|
||||
fprintf(stderr, "%3d: ", i++);
|
||||
qir_dump_inst(c, n->inst);
|
||||
fprintf(stderr, " (%d cost)\n",
|
||||
get_register_pressure_cost(state, n->inst));
|
||||
|
||||
for (int i = 0; i < n->child_count; i++) {
|
||||
struct schedule_node *child = n->children[i];
|
||||
fprintf(stderr, " - ");
|
||||
qir_dump_inst(c, child->inst);
|
||||
fprintf(stderr, " (%d parents)\n", child->parent_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Estimate of how many instructions we should schedule between operations.
|
||||
*
|
||||
* These aren't in real cycle counts, because we're just estimating cycle
|
||||
* times anyway. QIR instructions will get paired up when turned into QPU
|
||||
* instructions, or extra NOP delays will have to be added due to register
|
||||
* allocation choices.
|
||||
*/
|
||||
static uint32_t
|
||||
latency_between(struct schedule_node *before, struct schedule_node *after)
|
||||
{
|
||||
if ((before->inst->op == QOP_TEX_S ||
|
||||
before->inst->op == QOP_TEX_DIRECT) &&
|
||||
after->inst->op == QOP_TEX_RESULT)
|
||||
return 100;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/** Recursive computation of the delay member of a node. */
|
||||
static void
|
||||
compute_delay(struct schedule_node *n)
|
||||
{
|
||||
if (!n->child_count) {
|
||||
/* The color read needs to be scheduled late, to avoid locking
|
||||
* the scoreboard early. This is our best tool for
|
||||
* encouraging that. The other scoreboard locking ops will
|
||||
* have this happen by default, since they are generally the
|
||||
* DAG heads or close to them.
|
||||
*/
|
||||
if (n->inst->op == QOP_TLB_COLOR_READ)
|
||||
n->delay = 1000;
|
||||
else
|
||||
n->delay = 1;
|
||||
} else {
|
||||
for (int i = 0; i < n->child_count; i++) {
|
||||
if (!n->children[i]->delay)
|
||||
compute_delay(n->children[i]);
|
||||
n->delay = MAX2(n->delay,
|
||||
n->children[i]->delay +
|
||||
latency_between(n, n->children[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
schedule_instructions(struct vc4_compile *c, struct schedule_state *state)
|
||||
{
|
||||
if (debug) {
|
||||
fprintf(stderr, "initial deps:\n");
|
||||
dump_state(c, state);
|
||||
}
|
||||
|
||||
/* Remove non-DAG heads from the list. */
|
||||
list_for_each_entry_safe(struct schedule_node, n,
|
||||
&state->worklist, link) {
|
||||
if (n->parent_count != 0)
|
||||
list_del(&n->link);
|
||||
}
|
||||
|
||||
state->time = 0;
|
||||
while (!list_empty(&state->worklist)) {
|
||||
struct schedule_node *chosen = choose_instruction(state);
|
||||
struct qinst *inst = chosen->inst;
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "current list:\n");
|
||||
dump_state(c, state);
|
||||
fprintf(stderr, "chose: ");
|
||||
qir_dump_inst(c, inst);
|
||||
fprintf(stderr, " (%d cost)\n",
|
||||
get_register_pressure_cost(state, inst));
|
||||
}
|
||||
|
||||
state->time = MAX2(state->time, chosen->unblocked_time);
|
||||
|
||||
/* Schedule this instruction back onto the QIR list. */
|
||||
list_del(&chosen->link);
|
||||
list_add(&inst->link, &c->instructions);
|
||||
|
||||
/* Now that we've scheduled a new instruction, some of its
|
||||
* children can be promoted to the list of instructions ready to
|
||||
* be scheduled. Update the children's unblocked time for this
|
||||
* DAG edge as we do so.
|
||||
*/
|
||||
for (int i = chosen->child_count - 1; i >= 0; i--) {
|
||||
struct schedule_node *child = chosen->children[i];
|
||||
|
||||
child->unblocked_time = MAX2(child->unblocked_time,
|
||||
state->time +
|
||||
latency_between(chosen,
|
||||
child));
|
||||
child->parent_count--;
|
||||
if (child->parent_count == 0)
|
||||
list_add(&child->link, &state->worklist);
|
||||
}
|
||||
|
||||
/* Update our tracking of register pressure. */
|
||||
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
|
||||
if (inst->src[i].file == QFILE_TEMP)
|
||||
BITSET_SET(state->temp_live, inst->src[i].index);
|
||||
}
|
||||
if (inst->dst.file == QFILE_TEMP) {
|
||||
state->temp_writes[inst->dst.index]--;
|
||||
if (state->temp_writes[inst->dst.index] == 0)
|
||||
BITSET_CLEAR(state->temp_live, inst->dst.index);
|
||||
}
|
||||
|
||||
state->time++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
qir_schedule_instructions(struct vc4_compile *c)
|
||||
{
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
struct schedule_state state = { 0 };
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "Pre-schedule instructions\n");
|
||||
qir_dump(c);
|
||||
}
|
||||
|
||||
state.temp_writes = rzalloc_array(mem_ctx, uint32_t, c->num_temps);
|
||||
state.temp_live = rzalloc_array(mem_ctx, BITSET_WORD,
|
||||
BITSET_WORDS(c->num_temps));
|
||||
list_inithead(&state.worklist);
|
||||
|
||||
/* Wrap each instruction in a scheduler structure. */
|
||||
list_for_each_entry_safe(struct qinst, inst, &c->instructions, link) {
|
||||
struct schedule_node *n = rzalloc(mem_ctx, struct schedule_node);
|
||||
|
||||
n->inst = inst;
|
||||
list_del(&inst->link);
|
||||
list_addtail(&n->link, &state.worklist);
|
||||
|
||||
if (inst->dst.file == QFILE_TEMP)
|
||||
state.temp_writes[inst->dst.index]++;
|
||||
}
|
||||
|
||||
/* Dependencies tracked top-to-bottom. */
|
||||
calculate_forward_deps(c, mem_ctx, &state.worklist);
|
||||
/* Dependencies tracked bottom-to-top. */
|
||||
calculate_reverse_deps(c, mem_ctx, &state.worklist);
|
||||
|
||||
list_for_each_entry(struct schedule_node, n, &state.worklist, link)
|
||||
compute_delay(n);
|
||||
|
||||
schedule_instructions(c, &state);
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "Post-schedule instructions\n");
|
||||
qir_dump(c);
|
||||
}
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
|
@ -50,7 +50,7 @@ struct schedule_node {
|
|||
uint32_t child_array_size;
|
||||
uint32_t parent_count;
|
||||
|
||||
/* Longest cycles + n->latency of any parent of this node. */
|
||||
/* Longest cycles + instruction_latency() of any parent of this node. */
|
||||
uint32_t unblocked_time;
|
||||
|
||||
/**
|
||||
|
|
@ -259,7 +259,8 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
|
|||
}
|
||||
} else if (is_tmu_write(waddr)) {
|
||||
add_write_dep(state, &state->last_tmu_write, n);
|
||||
} else if (qpu_waddr_is_tlb(waddr)) {
|
||||
} else if (qpu_waddr_is_tlb(waddr) ||
|
||||
waddr == QPU_W_MS_FLAGS) {
|
||||
add_write_dep(state, &state->last_tlb, n);
|
||||
} else {
|
||||
switch (waddr) {
|
||||
|
|
@ -623,6 +624,46 @@ dump_state(struct list_head *schedule_list)
|
|||
}
|
||||
}
|
||||
|
||||
static uint32_t waddr_latency(uint32_t waddr, uint64_t after)
|
||||
{
|
||||
if (waddr < 32)
|
||||
return 2;
|
||||
|
||||
/* Apply some huge latency between texture fetch requests and getting
|
||||
* their results back.
|
||||
*/
|
||||
if (waddr == QPU_W_TMU0_S) {
|
||||
if (QPU_GET_FIELD(after, QPU_SIG) == QPU_SIG_LOAD_TMU0)
|
||||
return 100;
|
||||
}
|
||||
if (waddr == QPU_W_TMU1_S) {
|
||||
if (QPU_GET_FIELD(after, QPU_SIG) == QPU_SIG_LOAD_TMU1)
|
||||
return 100;
|
||||
}
|
||||
|
||||
switch(waddr) {
|
||||
case QPU_W_SFU_RECIP:
|
||||
case QPU_W_SFU_RECIPSQRT:
|
||||
case QPU_W_SFU_EXP:
|
||||
case QPU_W_SFU_LOG:
|
||||
return 3;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
instruction_latency(struct schedule_node *before, struct schedule_node *after)
|
||||
{
|
||||
uint64_t before_inst = before->inst->inst;
|
||||
uint64_t after_inst = after->inst->inst;
|
||||
|
||||
return MAX2(waddr_latency(QPU_GET_FIELD(before_inst, QPU_WADDR_ADD),
|
||||
after_inst),
|
||||
waddr_latency(QPU_GET_FIELD(before_inst, QPU_WADDR_MUL),
|
||||
after_inst));
|
||||
}
|
||||
|
||||
/** Recursive computation of the delay member of a node. */
|
||||
static void
|
||||
compute_delay(struct schedule_node *n)
|
||||
|
|
@ -634,7 +675,8 @@ compute_delay(struct schedule_node *n)
|
|||
if (!n->children[i].node->delay)
|
||||
compute_delay(n->children[i].node);
|
||||
n->delay = MAX2(n->delay,
|
||||
n->children[i].node->delay + n->latency);
|
||||
n->children[i].node->delay +
|
||||
instruction_latency(n, n->children[i].node));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -663,9 +705,14 @@ mark_instruction_scheduled(struct list_head *schedule_list,
|
|||
* immediately after (or paired with!) the thing reading the
|
||||
* destination.
|
||||
*/
|
||||
int latency_from_previous = war_only ? 0 : node->latency;
|
||||
uint32_t latency = 0;
|
||||
if (!war_only) {
|
||||
latency = instruction_latency(node,
|
||||
node->children[i].node);
|
||||
}
|
||||
|
||||
child->unblocked_time = MAX2(child->unblocked_time,
|
||||
time + latency_from_previous);
|
||||
time + latency);
|
||||
child->parent_count--;
|
||||
if (child->parent_count == 0)
|
||||
list_add(&child->link, schedule_list);
|
||||
|
|
@ -798,33 +845,6 @@ schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
|
|||
return time;
|
||||
}
|
||||
|
||||
static uint32_t waddr_latency(uint32_t waddr)
|
||||
{
|
||||
if (waddr < 32)
|
||||
return 2;
|
||||
|
||||
/* Some huge number, really. */
|
||||
if (waddr >= QPU_W_TMU0_S && waddr <= QPU_W_TMU1_B)
|
||||
return 100;
|
||||
|
||||
switch(waddr) {
|
||||
case QPU_W_SFU_RECIP:
|
||||
case QPU_W_SFU_RECIPSQRT:
|
||||
case QPU_W_SFU_EXP:
|
||||
case QPU_W_SFU_LOG:
|
||||
return 3;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
instruction_latency(uint64_t inst)
|
||||
{
|
||||
return MAX2(waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_ADD)),
|
||||
waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_MUL)));
|
||||
}
|
||||
|
||||
uint32_t
|
||||
qpu_schedule_instructions(struct vc4_compile *c)
|
||||
{
|
||||
|
|
@ -851,7 +871,6 @@ qpu_schedule_instructions(struct vc4_compile *c)
|
|||
struct schedule_node *n = rzalloc(mem_ctx, struct schedule_node);
|
||||
|
||||
n->inst = inst;
|
||||
n->latency = instruction_latency(inst->inst);
|
||||
|
||||
if (reads_uniform(inst->inst)) {
|
||||
n->uniform = next_uniform++;
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue