diff --git a/Android.mk b/Android.mk
index 69e0d33f1aa..ed160fb3d0e 100644
--- a/Android.mk
+++ b/Android.mk
@@ -45,8 +45,6 @@ endif
MESA_COMMON_MK := $(MESA_TOP)/Android.common.mk
MESA_PYTHON2 := python
-DRM_GRALLOC_TOP := hardware/drm_gralloc
-
classic_drivers := i915 i965
gallium_drivers := swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx vc4
@@ -91,8 +89,7 @@ SUBDIRS := \
src/glsl \
src/mesa \
src/util \
- src/egl/main \
- src/egl/drivers/dri2 \
+ src/egl \
src/mesa/drivers/dri
ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
diff --git a/VERSION b/VERSION
index 1edd8fc00e5..2b1181ddc3f 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-10.7.0-devel
+11.0.0-devel
diff --git a/bin/bugzilla_mesa.sh b/bin/bugzilla_mesa.sh
index 491ca0e7c0b..0cff4261f75 100755
--- a/bin/bugzilla_mesa.sh
+++ b/bin/bugzilla_mesa.sh
@@ -15,17 +15,14 @@
# $ DRYRUN=yes bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 | wc -l
-# regex pattern: trim before url
-trim_before='s/.*\(http\)/\1/'
+# regex pattern: trim before bug number
+trim_before='s/.*show_bug.cgi?id=\([0-9]*\).*/\1/'
-# regex pattern: trim after url
-trim_after='s/\(show_bug.cgi?id=[0-9]*\).*/\1/'
-
-# regex pattern: always use https
-use_https='s/http:/https:/'
+# regex pattern: reconstruct the url
+use_after='s,^,https://bugs.freedesktop.org/show_bug.cgi?id=,'
# extract fdo urls from commit log
-urls=$(git log $* | grep 'bugs.freedesktop.org/show_bug' | sed -e $trim_before -e $trim_after -e $use_https | sort | uniq)
+urls=$(git log $* | grep 'bugs.freedesktop.org/show_bug' | sed -e $trim_before | sort -n -u | sed -e $use_after)
# if DRYRUN is set to "yes", simply print the URLs and don't fetch the
# details from fdo bugzilla.
diff --git a/configure.ac b/configure.ac
index e78a4ba6325..74e13b3fcb7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -44,7 +44,7 @@ AC_INIT([Mesa], [MESA_VERSION],
AC_CONFIG_AUX_DIR([bin])
AC_CONFIG_MACRO_DIR([m4])
AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE([foreign tar-ustar dist-xz])
+AM_INIT_AUTOMAKE([foreign tar-ustar dist-xz subdir-objects])
dnl We only support native Windows builds (MinGW/MSVC) through SCons.
case "$host_os" in
@@ -64,13 +64,16 @@ m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
dnl Set internal versions
OSMESA_VERSION=8
AC_SUBST([OSMESA_VERSION])
+OPENCL_VERSION=1
+AC_SUBST([OPENCL_VERSION])
dnl Versions for external dependencies
-LIBDRM_REQUIRED=2.4.38
+LIBDRM_REQUIRED=2.4.60
LIBDRM_RADEON_REQUIRED=2.4.56
-LIBDRM_INTEL_REQUIRED=2.4.60
+LIBDRM_AMDGPU_REQUIRED=2.4.63
+LIBDRM_INTEL_REQUIRED=2.4.61
LIBDRM_NVVIEUX_REQUIRED=2.4.33
-LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
+LIBDRM_NOUVEAU_REQUIRED=2.4.62
LIBDRM_FREEDRENO_REQUIRED=2.4.57
DRI2PROTO_REQUIRED=2.6
DRI3PROTO_REQUIRED=1.0
@@ -79,7 +82,7 @@ LIBUDEV_REQUIRED=151
GLPROTO_REQUIRED=1.4.14
LIBOMXIL_BELLAGIO_REQUIRED=0.0
LIBVA_REQUIRED=0.35.0
-VDPAU_REQUIRED=0.4.1
+VDPAU_REQUIRED=1.1
WAYLAND_REQUIRED=1.2.0
XCB_REQUIRED=1.9.3
XCBDRI2_REQUIRED=1.8
@@ -205,11 +208,14 @@ AX_GCC_BUILTIN([__builtin_popcount])
AX_GCC_BUILTIN([__builtin_popcountll])
AX_GCC_BUILTIN([__builtin_unreachable])
+AX_GCC_FUNC_ATTRIBUTE([const])
AX_GCC_FUNC_ATTRIBUTE([flatten])
AX_GCC_FUNC_ATTRIBUTE([format])
AX_GCC_FUNC_ATTRIBUTE([malloc])
AX_GCC_FUNC_ATTRIBUTE([packed])
+AX_GCC_FUNC_ATTRIBUTE([pure])
AX_GCC_FUNC_ATTRIBUTE([unused])
+AX_GCC_FUNC_ATTRIBUTE([warn_unused_result])
AM_CONDITIONAL([GEN_ASM_OFFSETS], test "x$GEN_ASM_OFFSETS" = xyes)
@@ -230,7 +236,7 @@ _SAVE_LDFLAGS="$LDFLAGS"
_SAVE_CPPFLAGS="$CPPFLAGS"
dnl Compiler macros
-DEFINES=""
+DEFINES="-D__STDC_LIMIT_MACROS"
AC_SUBST([DEFINES])
case "$host_os" in
linux*|*-gnu*|gnu*)
@@ -281,6 +287,9 @@ if test "x$GCC" = xyes; then
# Work around aliasing bugs - developers should comment this out
CFLAGS="$CFLAGS -fno-strict-aliasing"
+ # We don't want floating-point math functions to set errno or trap
+ CFLAGS="$CFLAGS -fno-math-errno -fno-trapping-math"
+
# gcc's builtin memcmp is slower than glibc's
# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
CFLAGS="$CFLAGS -fno-builtin-memcmp"
@@ -651,6 +660,7 @@ fi
AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
+AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
dnl Check to see if dlopen is in default libraries (like Solaris, which
dnl has it in libc), or if libdl is needed to get it.
@@ -910,6 +920,13 @@ fi
AM_CONDITIONAL(HAVE_DRI_GLX, test "x$enable_glx" = xyes -a \
"x$enable_dri" = xyes)
+# Check for libdrm
+PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED],
+ [have_libdrm=yes], [have_libdrm=no])
+if test "x$have_libdrm" = xyes; then
+ DEFINES="$DEFINES -DHAVE_LIBDRM"
+fi
+
# Select which platform-dependent DRI code gets built
case "$host_os" in
darwin*)
@@ -922,8 +939,8 @@ esac
AM_CONDITIONAL(HAVE_DRICOMMON, test "x$enable_dri" = xyes )
AM_CONDITIONAL(HAVE_DRISW, test "x$enable_dri" = xyes )
-AM_CONDITIONAL(HAVE_DRI2, test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm )
-AM_CONDITIONAL(HAVE_DRI3, test "x$enable_dri3" = xyes -a "x$dri_platform" = xdrm )
+AM_CONDITIONAL(HAVE_DRI2, test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes )
+AM_CONDITIONAL(HAVE_DRI3, test "x$enable_dri3" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes )
AM_CONDITIONAL(HAVE_APPLEDRI, test "x$enable_dri" = xyes -a "x$dri_platform" = xapple )
AC_ARG_ENABLE([shared-glapi],
@@ -952,11 +969,9 @@ dnl
dnl Driver specific build directories
dnl
-case "x$enable_glx$enable_xlib_glx" in
-xyesyes)
+if test -n "$with_gallium_drivers" -a "x$enable_glx$enable_xlib_glx" = xyesyes; then
NEED_WINSYS_XLIB="yes"
- ;;
-esac
+fi
if test "x$enable_dri" = xyes; then
enable_gallium_loader="$enable_shared_pipe_drivers"
@@ -1111,13 +1126,6 @@ if test "x$with_sha1" = "x"; then
fi
AM_CONDITIONAL([ENABLE_SHADER_CACHE], [test x$enable_shader_cache = xyes])
-# Check for libdrm
-PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED],
- [have_libdrm=yes], [have_libdrm=no])
-if test "x$have_libdrm" = xyes; then
- DEFINES="$DEFINES -DHAVE_LIBDRM"
-fi
-
case "$host_os" in
linux*)
need_pci_id=yes ;;
@@ -1357,7 +1365,7 @@ if test "x$enable_dri" = xyes; then
fi
;;
darwin*)
- DEFINES="$DEFINES -DGLX_ALIAS_UNSUPPORTED"
+ DEFINES="$DEFINES -DGLX_ALIAS_UNSUPPORTED -DBUILDING_MESA"
if test "x$with_dri_drivers" = "xyes"; then
with_dri_drivers="swrast"
fi
@@ -1378,26 +1386,6 @@ if test "x$enable_dri" = xyes; then
[AC_MSG_ERROR([Expat library required for DRI not found])])
EXPAT_LIBS="-lexpat"])
- DRICOMMON_NEED_LIBDRM=no
- # If we are building any DRI driver other than swrast.
- if test -n "$with_dri_drivers"; then
- if test "x$with_dri_drivers" != xswrast; then
- # ... libdrm is required
- if test "x$have_libdrm" != xyes; then
- AC_MSG_ERROR([DRI drivers requires libdrm >= $LIBDRM_REQUIRED])
- fi
- DRICOMMON_NEED_LIBDRM=yes
- fi
- fi
-
- # If we're building any gallium DRI driver other than swrast
- if test -n "$with_gallium_drivers" -a "x$DRICOMMON_NEED_LIBDRM" = xno; then
- if test "x$with_gallium_drivers" != xswrast; then
- # ... build a libdrm aware dricommon
- DRICOMMON_NEED_LIBDRM=yes
- fi
- fi
-
# put all the necessary libs together
DRI_LIB_DEPS="$DRI_LIB_DEPS $SELINUX_LIBS $LIBDRM_LIBS $EXPAT_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS"
fi
@@ -1425,7 +1413,7 @@ if test -n "$with_dri_drivers"; then
;;
xnouveau)
HAVE_NOUVEAU_DRI=yes;
- PKG_CHECK_MODULES([NOUVEAU], [libdrm_nouveau >= $LIBDRM_NVVIEUX_REQUIRED])
+ PKG_CHECK_MODULES([NVVIEUX], [libdrm_nouveau >= $LIBDRM_NVVIEUX_REQUIRED])
;;
xradeon)
HAVE_RADEON_DRI=yes;
@@ -1765,6 +1753,9 @@ egl_platforms=`IFS=', '; echo $with_egl_platforms`
for plat in $egl_platforms; do
case "$plat" in
wayland)
+ test "x$have_libdrm" != xyes &&
+ AC_MSG_ERROR([EGL platform wayland requires libdrm >= $LIBDRM_REQUIRED])
+
PKG_CHECK_MODULES([WAYLAND], [wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])
if test "x$WAYLAND_SCANNER" = x; then
@@ -1788,9 +1779,6 @@ for plat in $egl_platforms; do
AC_MSG_ERROR([EGL platform surfaceless requires libdrm >= $LIBDRM_REQUIRED])
;;
- android|gdi|null)
- ;;
-
*)
AC_MSG_ERROR([EGL platform '$plat' does not exist])
;;
@@ -1811,9 +1799,6 @@ else
EGL_NATIVE_PLATFORM="_EGL_INVALID_PLATFORM"
fi
-if echo "$egl_platforms" | grep -q 'x11'; then
- NEED_WINSYS_XLIB=yes
-fi
AM_CONDITIONAL(HAVE_EGL_PLATFORM_X11, echo "$egl_platforms" | grep -q 'x11')
AM_CONDITIONAL(HAVE_EGL_PLATFORM_WAYLAND, echo "$egl_platforms" | grep -q 'wayland')
AM_CONDITIONAL(HAVE_EGL_PLATFORM_DRM, echo "$egl_platforms" | grep -q 'drm')
@@ -2127,6 +2112,7 @@ if test -n "$with_gallium_drivers"; then
xradeonsi)
HAVE_GALLIUM_RADEONSI=yes
PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
+ PKG_CHECK_MODULES([AMDGPU], [libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
gallium_require_drm "radeonsi"
gallium_require_drm_loader
radeon_llvm_check "radeonsi"
@@ -2237,31 +2223,15 @@ AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers"
# use by XA tracker in particular, but could be used in any case
# where communication with xserver is not desired).
if test "x$enable_gallium_loader" = xyes; then
- if test "x$NEED_WINSYS_XLIB" = xyes; then
- GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_XLIB"
- fi
-
if test "x$enable_dri" = xyes; then
GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI"
fi
if test "x$enable_gallium_drm_loader" = xyes; then
GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRM"
- PKG_CHECK_MODULES([GALLIUM_PIPE_LOADER_XCB], [xcb xcb-dri2],
- pipe_loader_have_xcb=yes, pipe_loader_have_xcb=no)
- if test "x$pipe_loader_have_xcb" = xyes; then
- GALLIUM_PIPE_LOADER_CLIENT_DEFINES="$GALLIUM_PIPE_LOADER_CLIENT_DEFINES -DHAVE_PIPE_LOADER_XCB"
- GALLIUM_PIPE_LOADER_CLIENT_LIBS="$GALLIUM_PIPE_LOADER_CLIENT_LIBS $GALLIUM_PIPE_LOADER_XCB_LIBS $LIBDRM_LIBS"
- fi
fi
- GALLIUM_PIPE_LOADER_CLIENT_DEFINES="$GALLIUM_PIPE_LOADER_CLIENT_DEFINES $GALLIUM_PIPE_LOADER_DEFINES"
- GALLIUM_PIPE_LOADER_CLIENT_LIBS="$GALLIUM_PIPE_LOADER_CLIENT_LIBS $GALLIUM_PIPE_LOADER_LIBS"
-
AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES])
- AC_SUBST([GALLIUM_PIPE_LOADER_LIBS])
- AC_SUBST([GALLIUM_PIPE_LOADER_CLIENT_DEFINES])
- AC_SUBST([GALLIUM_PIPE_LOADER_CLIENT_LIBS])
fi
AM_CONDITIONAL(HAVE_I915_DRI, test x$HAVE_I915_DRI = xyes)
@@ -2288,7 +2258,6 @@ fi
AC_SUBST([ELF_LIB])
-AM_CONDITIONAL(DRICOMMON_NEED_LIBDRM, test "x$DRICOMMON_NEED_LIBDRM" = xyes)
AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes)
AM_CONDITIONAL(HAVE_X11_DRIVER, test "x$enable_xlib_glx" = xyes)
AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
@@ -2348,8 +2317,7 @@ CXXFLAGS="$CXXFLAGS $USER_CXXFLAGS"
dnl Substitute the config
AC_CONFIG_FILES([Makefile
src/Makefile
- src/egl/drivers/dri2/Makefile
- src/egl/main/Makefile
+ src/egl/Makefile
src/egl/main/egl.pc
src/egl/wayland/wayland-drm/Makefile
src/egl/wayland/wayland-egl/Makefile
@@ -2388,6 +2356,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/targets/libgl-xlib/Makefile
src/gallium/targets/omx/Makefile
src/gallium/targets/opencl/Makefile
+ src/gallium/targets/opencl/mesa.icd
src/gallium/targets/osmesa/Makefile
src/gallium/targets/osmesa/osmesa.pc
src/gallium/targets/pipe-loader/Makefile
@@ -2403,6 +2372,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/winsys/intel/drm/Makefile
src/gallium/winsys/nouveau/drm/Makefile
src/gallium/winsys/radeon/drm/Makefile
+ src/gallium/winsys/amdgpu/drm/Makefile
src/gallium/winsys/svga/drm/Makefile
src/gallium/winsys/sw/dri/Makefile
src/gallium/winsys/sw/kms-dri/Makefile
diff --git a/docs/GL3.txt b/docs/GL3.txt
index 220bcc8742f..54c0c5aa6a8 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -92,43 +92,43 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft
GL_ARB_vertex_type_2_10_10_10_rev DONE ()
-GL 4.0, GLSL 4.00:
+GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi
- GL_ARB_draw_buffers_blend DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
- GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
- GL_ARB_gpu_shader5 DONE (i965, nvc0)
+ GL_ARB_draw_buffers_blend DONE (i965, nv50, r600, llvmpipe, softpipe)
+ GL_ARB_draw_indirect DONE (i965, r600, llvmpipe, softpipe)
+ GL_ARB_gpu_shader5 DONE (i965)
- 'precise' qualifier DONE
- Dynamically uniform sampler array indices DONE (r600, softpipe)
- Dynamically uniform UBO array indices DONE (r600)
- Implicit signed -> unsigned conversions DONE
- Fused multiply-add DONE ()
- - Packing/bitfield/conversion functions DONE (r600, radeonsi, softpipe)
- - Enhanced textureGather DONE (r600, radeonsi, softpipe)
+ - Packing/bitfield/conversion functions DONE (r600, softpipe)
+ - Enhanced textureGather DONE (r600, softpipe)
- Geometry shader instancing DONE (r600, llvmpipe, softpipe)
- Geometry shader multiple streams DONE ()
- - Enhanced per-sample shading DONE (r600, radeonsi)
+ - Enhanced per-sample shading DONE (r600)
- Interpolation functions DONE (r600)
- New overload resolution rules DONE
- GL_ARB_gpu_shader_fp64 DONE (nvc0, softpipe)
- GL_ARB_sample_shading DONE (i965, nv50, nvc0, r600, radeonsi)
- GL_ARB_shader_subroutine started (Dave)
- GL_ARB_tessellation_shader started (Chris, Ilia)
- GL_ARB_texture_buffer_object_rgb32 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
- GL_ARB_texture_cube_map_array DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
- GL_ARB_texture_gather DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
- GL_ARB_texture_query_lod DONE (i965, nv50, nvc0, r600, radeonsi)
- GL_ARB_transform_feedback2 DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
- GL_ARB_transform_feedback3 DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
+ GL_ARB_gpu_shader_fp64 DONE (llvmpipe, softpipe)
+ GL_ARB_sample_shading DONE (i965, nv50, r600)
+ GL_ARB_shader_subroutine DONE (i965, nv50, r600, llvmpipe, softpipe)
+ GL_ARB_tessellation_shader DONE ()
+ GL_ARB_texture_buffer_object_rgb32 DONE (i965, r600, llvmpipe, softpipe)
+ GL_ARB_texture_cube_map_array DONE (i965, nv50, r600, llvmpipe, softpipe)
+ GL_ARB_texture_gather DONE (i965, nv50, r600, llvmpipe, softpipe)
+ GL_ARB_texture_query_lod DONE (i965, nv50, r600)
+ GL_ARB_transform_feedback2 DONE (i965, nv50, r600, llvmpipe, softpipe)
+ GL_ARB_transform_feedback3 DONE (i965, nv50, r600, llvmpipe, softpipe)
-GL 4.1, GLSL 4.10:
+GL 4.1, GLSL 4.10 --- all DONE: nvc0, radeonsi
- GL_ARB_ES2_compatibility DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
+ GL_ARB_ES2_compatibility DONE (i965, nv50, r600, llvmpipe, softpipe)
GL_ARB_get_program_binary DONE (0 binary formats)
GL_ARB_separate_shader_objects DONE (all drivers)
- GL_ARB_shader_precision started (Micah)
- GL_ARB_vertex_attrib_64bit DONE (nvc0, softpipe)
- GL_ARB_viewport_array DONE (i965, nv50, nvc0, r600, llvmpipe)
+ GL_ARB_shader_precision DONE (all drivers that support GLSL 4.10)
+ GL_ARB_vertex_attrib_64bit DONE (llvmpipe, softpipe)
+ GL_ARB_viewport_array DONE (i965, nv50, r600, llvmpipe)
GL 4.2, GLSL 4.20:
@@ -139,7 +139,7 @@ GL 4.2, GLSL 4.20:
GL_ARB_texture_storage DONE (all drivers)
GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
- GL_ARB_shader_image_load_store in progress (curro)
+ GL_ARB_shader_image_load_store DONE (i965)
GL_ARB_conservative_depth DONE (all drivers that support GLSL 1.30)
GL_ARB_shading_language_420pack DONE (all drivers that support GLSL 1.30)
GL_ARB_shading_language_packing DONE (all drivers)
@@ -156,7 +156,7 @@ GL 4.3, GLSL 4.30:
GL_ARB_copy_image DONE (i965) (gallium - in progress, VMware)
GL_KHR_debug DONE (all drivers)
GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL)
- GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, llvmpipe)
+ GL_ARB_fragment_layer_viewport DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
GL_ARB_framebuffer_no_attachments DONE (i965)
GL_ARB_internalformat_query2 not started
GL_ARB_invalidate_subdata DONE (all drivers)
@@ -189,20 +189,11 @@ GL 4.5, GLSL 4.50:
GL_ARB_ES3_1_compatibility not started
GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
- GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, llvmpipe, softpipe)
+ GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_cull_distance in progress (Tobias)
- GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600)
+ GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600, radeonsi)
GL_ARB_direct_state_access DONE (all drivers)
- - Transform Feedback object DONE
- - Buffer object DONE
- - Framebuffer object DONE
- - Renderbuffer object DONE
- - Texture object DONE
- - Vertex array object DONE
- - Sampler object DONE
- - Program Pipeline object DONE
- - Query object DONE (will require changes when GL_ARB_query_buffer_object lands)
- GL_ARB_get_texture_sub_image started (Brian Paul)
+ GL_ARB_get_texture_sub_image DONE (all drivers)
GL_ARB_shader_texture_image_samples not started
GL_ARB_texture_barrier DONE (nv50, nvc0, r600, radeonsi)
GL_KHR_context_flush_control DONE (all - but needs GLX/EXT extension to be useful)
diff --git a/docs/egl.html b/docs/egl.html
index 3ab1a6018fd..bc21c6c4894 100644
--- a/docs/egl.html
+++ b/docs/egl.html
@@ -88,10 +88,10 @@ types such as EGLNativeDisplayType or
EGLNativeWindowType defined for.
The available platforms are x11, drm,
-wayland, null, android,
-haiku, and gdi. The android platform
+wayland, surfaceless, android,
+and haiku. The android platform
can only be built as a system component, part of AOSP, while the
-haiku and gdi platforms can only be built with SCons.
+haiku platform can only be built with SCons.
Unless for special needs, the build system should
select the right platforms automatically.
diff --git a/docs/index.html b/docs/index.html
index 80c6e03e3f1..b9e6148914e 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,6 +16,40 @@
News
+August 11 2015
+
+Mesa 10.6.4 is released.
+This is a bug-fix release.
+
+
+July 26 2015
+
+Mesa 10.6.3 is released.
+This is a bug-fix release.
+
+
+July 11 2015
+
+Mesa 10.6.2 is released.
+This is a bug-fix release.
+
+
+July 04, 2015
+
+Mesa 10.5.9 is released.
+This is a bug-fix release.
+
+NOTE: It is anticipated that 10.5.9 will be the final release in the 10.5
+series. Users of 10.5 are encouraged to migrate to the 10.6 series in order
+to obtain future fixes.
+
+
+June 29, 2015
+
+Mesa 10.6.1 is released.
+This is a bug-fix release.
+
+
June 20, 2015
Mesa 10.5.8 is released.
diff --git a/docs/relnotes.html b/docs/relnotes.html
index 5fd80025a39..39e7f61e792 100644
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,11 @@ The release notes summarize what's new or changed in each Mesa release.
+10.6.4 release notes
+10.6.3 release notes
+10.6.2 release notes
+10.5.9 release notes
+10.6.1 release notes
10.5.8 release notes
10.6.0 release notes
10.5.7 release notes
diff --git a/docs/relnotes/10.5.9.html b/docs/relnotes/10.5.9.html
new file mode 100644
index 00000000000..a1d11c3b70d
--- /dev/null
+++ b/docs/relnotes/10.5.9.html
@@ -0,0 +1,140 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 10.5.9 Release Notes / July 04, 2015
+
+
+Mesa 10.5.9 is a bug fix release which fixes bugs found since the 10.5.8 release.
+
+
+Mesa 10.5.9 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3. OpenGL
+3.3 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+0c081b59572ee9732e7438d34adc3817fe8cc8d4b58abc0e71fd4b4c904945cb mesa-10.5.9.tar.gz
+71c69f31d3dbc35cfa79950e58a01d27030378d8c7ef1259a0b31d4d0487f4ec mesa-10.5.9.tar.xz
+
+
+
+
New features
+
None
+
+
Bug fixes
+
+
This list is likely incomplete.
+
+
+
+Bug 84225 - Allow constant-index-expression sampler array indexing with GLSL-ES < 300
+
+Bug 88999 - [SKL] Compiz crashes after opening unity dash
+
+Bug 89118 - [SKL Bisected]many Ogles3conform cases core dumped
+
+Bug 90537 - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen->ws->buffer_from_handle returns NULL)
+
+Bug 90839 - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath
+
+Bug 90873 - Kernel hang, TearFree On, Mate desktop environment
+
+Bug 91056 - The Bard's Tale (2005, native) has rendering issues
+
+Bug 91117 - Nimbus (running in wine) has rendering issues, objects are semi-transparent
+
+Bug 91124 - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted
+
+
+
+
+
Changes
+
+
Ben Widawsky (2):
+
+ i965/gen9: Implement Push Constant Buffer workaround
+ i965/skl: Use 1 register for uniform pull constant payload
+
+
+
Boyan Ding (1):
+
+ egl/x11: Remove duplicate call to dri2_x11_add_configs_for_visuals
+
+
+
Chris Wilson (3):
+
+ i965: Fix HW blitter pitch limits
+ i915: Blit RGBX<->RGBA drawpixels
+ i965: Export format comparison for blitting between miptrees
+
+
+
Emil Velikov (6):
+
+ docs: Add sha256sums for the 10.5.8 release
+ configure: warn about shared_glapi & xlib-glx only when both are set
+ configure: error out when building backend-less libEGL
+ configure: error out when building libEGL without shared-glapi
+ gbm: do not (over)link against libglapi.so
+ Update version to 10.5.9
+
+
+
Frank Henigman (1):
+
+ gbm: dlopen libglapi so gbm_create_device works
+
+
+
Ilia Mirkin (8):
+
+ glsl: add version checks to conditionals for builtin variable enablement
+ mesa: add GL_PROGRAM_PIPELINE support in KHR_debug calls
+ glsl: binding point is a texture unit, which is a combined space
+ nvc0: always put all tfb bufs into bufctx
+ nv50,nvc0: make sure to pushbuf_refn before putting bo into pushbuf_data
+ nv50/ir: propagate modifier to right arg when const-folding mad
+ nv50/ir: fix emission of address reg in 3rd source
+ nv50/ir: copy joinAt when splitting both before and after
+
+
+
Mario Kleiner (2):
+
+ nouveau: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.
+ winsys/radeon: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.
+
+
+
Michel Dänzer (1):
+
+ winsys/radeon: Unmap GPU VM address range when destroying BO
+
+
+
Tapani Pälli (6):
+
+ glsl: Allow dynamic sampler array indexing with GLSL ES < 3.00
+ mesa/glsl: new compiler option EmitNoIndirectSampler
+ i915: use EmitNoIndirectSampler
+ mesa/st: use EmitNoIndirectSampler if !ARB_gpu_shader5
+ i965: use EmitNoIndirectSampler for gen < 7
+ glsl: validate sampler array indexing for 'constant-index-expression'
+
+
+
+
+
+
diff --git a/docs/relnotes/10.6.1.html b/docs/relnotes/10.6.1.html
new file mode 100644
index 00000000000..f197b0f3a42
--- /dev/null
+++ b/docs/relnotes/10.6.1.html
@@ -0,0 +1,104 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 10.6.1 Release Notes / June 29, 2015
+
+
+Mesa 10.6.1 is a bug fix release which fixes bugs found since the 10.6.0 release.
+
+
+Mesa 10.6.1 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3. OpenGL
+3.3 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+b4cccd4d0eabcc2bca00c3175d3ad88fdda57ffdb883a7998525b873a21fe607 mesa-10.6.1.tar.gz
+6c80a2b647e57c85dc36e609d9aed17f878f0d8e0cf9ace86d14cf604101e1eb mesa-10.6.1.tar.xz
+
+
+
+
New features
+
None
+
+
Bug fixes
+
+
This list is likely incomplete.
+
+
+Bug 90347 - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)
+
+
+
+
+
Changes
+
+
Anuj Phogat (4):
+
+ mesa: Handle integer formats in need_rgb_to_luminance_conversion()
+ mesa: Use helper function need_rgb_to_luminance_conversion()
+ mesa: Turn need_rgb_to_luminance_conversion() in to a global function
+ meta: Abort meta path if ReadPixels need rgb to luminance conversion
+
+
+
Ben Widawsky (1):
+
+ i965/gen9: Implement Push Constant Buffer workaround
+
+
+
Boyan Ding (2):
+
+ egl/x11: Set version of swrastLoader to 2
+ egl/x11: Remove duplicate call to dri2_x11_add_configs_for_visuals
+
+
+
Emil Velikov (6):
+
+ docs: Add sha256sums for the 10.6.0 release
+ configure: warn about shared_glapi & xlib-glx only when both are set
+ configure: error out when building backend-less libEGL
+ configure: error out when building libEGL without shared-glapi
+ gbm: do not (over)link against libglapi.so
+ Update version to 10.6.1
+
+
+
Frank Henigman (1):
+
+ gbm: dlopen libglapi so gbm_create_device works
+
+
+
Ilia Mirkin (9):
+
+ nvc0/ir: fix collection of first uses for texture barrier insertion
+ nv50,nvc0: clamp uniform size to 64k
+ nvc0/ir: can't have a join on a load with an indirect source
+ glsl: handle conversions to double when comparing param matches
+ glsl: add version checks to conditionals for builtin variable enablement
+ mesa: add GL_PROGRAM_PIPELINE support in KHR_debug calls
+ glsl: binding point is a texture unit, which is a combined space
+ nvc0: always put all tfb bufs into bufctx
+ nv50,nvc0: make sure to pushbuf_refn before putting bo into pushbuf_data
+
+
+
+
+
+
diff --git a/docs/relnotes/10.6.2.html b/docs/relnotes/10.6.2.html
new file mode 100644
index 00000000000..d95417a8521
--- /dev/null
+++ b/docs/relnotes/10.6.2.html
@@ -0,0 +1,165 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 10.6.2 Release Notes / July 11, 2015
+
+
+Mesa 10.6.2 is a bug fix release which fixes bugs found since the 10.6.1 release.
+
+
+Mesa 10.6.2 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3. OpenGL
+3.3 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+9c7ab9300dda6c912faaaff97995ec1820ba21d114d9cf555f145cbad90995f4 mesa-10.6.2.tar.gz
+05753d3db4212900927b9894221a1669a10f56786e86a7e818b6e18a0817dca9 mesa-10.6.2.tar.xz
+
+
+
+
New features
+
None
+
+
Bug fixes
+
+
This list is likely incomplete.
+
+
+Bug 73528 - Deferred lighting in Second Life causes system hiccups and screen flickering
+
+Bug 80500 - Flickering shadows in unreleased title trace
+
+Bug 82186 - [r600g] BARTS GPU lockup with minecraft shaders
+
+Bug 84225 - Allow constant-index-expression sampler array indexing with GLSL-ES < 300
+
+Bug 90537 - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen->ws->buffer_from_handle returns NULL)
+
+Bug 90873 - Kernel hang, TearFree On, Mate desktop environment
+
+Bug 91022 - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix
+
+Bug 91047 - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin
+
+Bug 91056 - The Bard's Tale (2005, native) has rendering issues
+
+Bug 91117 - Nimbus (running in wine) has rendering issues, objects are semi-transparent
+
+Bug 91124 - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted
+
+Bug 91173 - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors
+
+Bug 91226 - Crash in glLinkProgram (NEW)
+
+Bug 91231 - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled
+
+
+
+
+
Changes
+
+
Chris Wilson (1):
+
+ loader: Look for any version of currently linked libudev.so
+
+
+
Emil Velikov (2):
+
+ docs: Add sha256 checksums for the 10.6.1 release
+ Update version to 10.6.2
+
+
+
Ilia Mirkin (8):
+
+ nv50/ir: propagate modifier to right arg when const-folding mad
+ nv50/ir: fix emission of address reg in 3rd source
+ nv50/ir: copy joinAt when splitting both before and after
+ mesa: reset the source packing when creating temp transfer image
+ nv50/ir: don't emit src2 in immediate form
+ mesa/prog: relative offsets into constbufs are not constant
+ nv50/ir: UCMP arguments are float, so make sure modifiers are applied
+ nvc0: turn sample counts off during blit
+
+
+
Kenneth Graunke (5):
+
+ i965/fs: Fix ir_txs in emit_texture_gen4_simd16().
+ i965: Reserve more batch space to accomodate Gen6 perfmonitors.
+ i965/vs: Fix matNxM vertex attributes where M != 4.
+ Revert "glsl: clone inputs and outputs during linking"
+ Revert "i965: Delete linked GLSL IR when using NIR."
+
+
+
Marek Olšák (3):
+
+ r600g: disable single-sample fast color clear due to hangs
+ radeonsi: fix a hang with DrawTransformFeedback on 4 SE chips
+ st/dri: don't set PIPE_BIND_SCANOUT for MSAA surfaces
+
+
+
Mario Kleiner (2):
+
+ nouveau: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.
+ winsys/radeon: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.
+
+
+
Matt Turner (2):
+
+ i965/fs: Don't mess up stride for uniform integer multiplication.
+ Revert SHA1 additions.
+
+
+
Michel Dänzer (1):
+
+ winsys/radeon: Unmap GPU VM address range when destroying BO
+
+
+
Mike Stroyan (2):
+
+ meta: Only change and restore viewport 0 in mesa meta mode
+ i965: allocate at least 1 BLEND_STATE element
+
+
+
Neil Roberts (4):
+
+ i965/skl: Set the pulls bary bit in 3DSTATE_PS_EXTRA
+ glsl: Add missing check for whether an expression is an add operation
+ glsl: Make sure not to dereference NULL
+ i965: Don't try to print the GLSL IR if it has been freed
+
+
+
Tapani Pälli (8):
+
+ glsl: clone inputs and outputs during linking
+ i965: Delete linked GLSL IR when using NIR.
+ glsl: Allow dynamic sampler array indexing with GLSL ES < 3.00
+ mesa/glsl: new compiler option EmitNoIndirectSampler
+ i965: use EmitNoIndirectSampler for gen < 7
+ i915: use EmitNoIndirectSampler
+ mesa/st: use EmitNoIndirectSampler if !ARB_gpu_shader5
+ glsl: validate sampler array indexing for 'constant-index-expression'
+
+
+
+
+
+
diff --git a/docs/relnotes/10.6.3.html b/docs/relnotes/10.6.3.html
new file mode 100644
index 00000000000..1622c87cde2
--- /dev/null
+++ b/docs/relnotes/10.6.3.html
@@ -0,0 +1,106 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 10.6.3 Release Notes / July 26, 2015
+
+
+Mesa 10.6.3 is a bug fix release which fixes bugs found since the 10.6.2 release.
+
+
+Mesa 10.6.3 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3. OpenGL
+3.3 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+c27e1e33798e69a6d2d2425aee8ac7b4c0b243066a65dd76cbb182ea31b1c7f2 mesa-10.6.3.tar.gz
+58592e07c350cd2e8969b73fa83048c657a39fe2f13f3b88f5e5818fe2e4676d mesa-10.6.3.tar.xz
+
+
+
+
New features
+
None
+
+
Bug fixes
+
+
This list is likely incomplete.
+
+
+Bug 90728 - dvd playback with vlc and vdpau causes segmentation fault
+
+Bug 91337 - OSMesaGetProcAdress("OSMesaPixelStore") returns nil
+
+
+
+
+
Changes
+
+
Brian Paul (1):
+
+ osmesa: fix OSMesaPixelsStore typo
+
+
+
Chad Versace (1):
+
+ mesa: Fix generation of git_sha1.h.tmp for gitlinks
+
+
+
Christian König (2):
+
+ vl: cleanup video buffer private when the decoder is destroyed
+ st/vdpau: fix mixer size checks
+
+
+
Emil Velikov (3):
+
+ docs: Add sha256 checksums for the 10.6.2 release
+ auxiliary/vl: use the correct screen index
+ Update version to 10.6.3
+
+
+
Francisco Jerez (1):
+
+ i965/gen9: Use custom MOCS entries set up by the kernel.
+
+
+
Ilia Mirkin (5):
+
+ nv50, nvc0: enable at least one color RT if alphatest is enabled
+ nvc0/ir: fix txq on indirect samplers
+ nvc0/ir: don't worry about sampler in txq handling
+ gm107/ir: fix indirect txq emission
+ nv50: fix max level clamping on G80
+
+
+
Kenneth Graunke (1):
+
+ program: Allow redundant OPTION ARB_fog_* directives.
+
+
+
Rob Clark (1):
+
+ xa: don't leak fences
+
+
+
+
+
+
diff --git a/docs/relnotes/10.6.4.html b/docs/relnotes/10.6.4.html
new file mode 100644
index 00000000000..168182ec52e
--- /dev/null
+++ b/docs/relnotes/10.6.4.html
@@ -0,0 +1,137 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 10.6.4 Release Notes / August 11, 2015
+
+
+Mesa 10.6.4 is a bug fix release which fixes bugs found since the 10.6.3 release.
+
+
+Mesa 10.6.4 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3. OpenGL
+3.3 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+4960bf17d8b5d6a6503c6954ec6cf480b5cd930797bac901c60bea192675f85e mesa-10.6.4.tar.gz
+8f5ac103f0f503de2f7a985b0df349bd4ecdfe7f51c714be146fa5a9a3c07b77 mesa-10.6.4.tar.xz
+
+
+
+
New features
+
None
+
+
Bug fixes
+
+
This list is likely incomplete.
+
+
+Bug 73512 - [clover] mesa.icd. should contain full path
+
+Bug 91290 - SIGSEGV glcpp/glcpp-parse.y:1077
+
+
+
+
+
Changes
+
+
Anuj Phogat (6):
+
+ mesa: Turn get_readpixels_transfer_ops() in to a global function
+ meta: Fix transfer operations check in meta pbo path for readpixels
+ meta: Abort meta pbo path if readpixels need signed-unsigned conversion
+ meta: Don't do fragment color clamping in _mesa_meta_pbo_GetTexSubImage
+ mesa: Add a helper function _mesa_need_luminance_to_rgb_conversion()
+ meta: Fix reading luminance texture as rgba in _mesa_meta_pbo_GetTexSubImage()
+
+
+
Ben Widawsky (1):
+
+ i965/skl: Add production thread counts and URB size
+
+
+
Eduardo Lima Mitev (3):
+
+ mesa: Fix errors values returned by glShaderBinary()
+ mesa: Validate target before resolving tex obj in glTex(ture)SubImageXD
+ mesa: Fix error returned by glCopyTexImage2D() upon an invalid internal format
+
+
+
Emil Velikov (6):
+
+ docs: Add checksums for mesa 10.6.3 tarballs
+ configure.ac: do not set HAVE_DRI(23) when libdrm is missing
+ egl/wayland: libdrm is a hard requirement, treat it as such
+ winsys/radeon: don't leak the fd when it is 0
+ bugzilla_mesa.sh: sort the bugs list by number
+ Update version to 10.6.4
+
+
+
Francisco Jerez (1):
+
+ i965/fs: Fix fs_inst::regs_read() for sources in the ATTR file.
+
+
+
Frank Binns (2):
+
+ egl/dri: Add error info needed for EGL_EXT_image_dma_buf_import extension
+ egl: Add eglQuerySurface surface type check for EGL_LARGEST_PBUFFER attrib
+
+
+
Igor Gnatenko (1):
+
+ opencl: use versioned .so in mesa.icd
+
+
+
Ilia Mirkin (1):
+
+ nvc0: fix geometry program revalidation of clipping params
+
+
+
Kenneth Graunke (1):
+
+ glsl: Fix a bug where LHS swizzles of swizzles were too small.
+
+
+
Marek Olšák (6):
+
+ st/mesa: don't call st_validate_state in BlitFramebuffer
+ radeonsi: upload shader rodata after updating scratch relocations
+ st/mesa: don't ignore texture buffer state changes
+ radeonsi: rework how shader pointers to descriptors are set
+ radeonsi: completely rework updating descriptors without CP DMA
+ r600g: fix the CB_SHADER_MASK setup
+
+
+
Samuel Iglesias Gonsalvez (1):
+
+ glsl/glcpp: fix SIGSEGV when checking error condition for macro redefinition
+
+
+
Samuel Pitoiset (1):
+
+ nv50: avoid segfault with enabled but unbound vertex attrib
+
+
+
+
+
+
diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html
deleted file mode 100644
index e089889667d..00000000000
--- a/docs/relnotes/10.7.0.html
+++ /dev/null
@@ -1,61 +0,0 @@
-
-
-
-
- Mesa Release Notes
-
-
-
-
-
-
-
-
-
-
Mesa 10.7.0 Release Notes / TBD
-
-
-Mesa 10.7.0 is a new development release.
-People who are concerned with stability and reliability should stick
-with a previous release or wait for Mesa 10.7.1.
-
-
-Mesa 10.7.0 implements the OpenGL 3.3 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 3.3. OpenGL
-3.3 is only available if requested at context creation
-because compatibility contexts are not supported.
-
-
-
-
SHA256 checksums
-
-TBD.
-
-
-
-
New features
-
-
-Note: some of the new features are only available with certain drivers.
-
-
-
-GL_ARB_framebuffer_no_attachments on i965
-GL_ARB_shader_stencil_export on llvmpipe
-
-
-
Bug fixes
-
-TBD.
-
-
Changes
-
-TBD.
-
-
-
-
diff --git a/docs/relnotes/11.0.0.html b/docs/relnotes/11.0.0.html
new file mode 100644
index 00000000000..75967ac7eec
--- /dev/null
+++ b/docs/relnotes/11.0.0.html
@@ -0,0 +1,89 @@
+
+
+
+
+ Mesa Release Notes
+
+
+
+
+
+
+
+
+
+
Mesa 11.0.0 Release Notes / TBD
+
+
+Mesa 11.0.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 11.0.1.
+
+
+Mesa 11.0.0 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1. OpenGL
+4.1 is only available if requested at context creation
+because compatibility contexts are not supported.
+
+
+
+
SHA256 checksums
+
+TBD.
+
+
+
+
New features
+
+
+Note: some of the new features are only available with certain drivers.
+
+
+
+New hardware support for AMD GCN 1.2 GPUs: Tonga, Iceland, Carrizo, Fiji
+OpenGL 4.1 on radeonsi, nvc0
+GL_AMD_vertex_shader_viewport_index on radeonsi
+GL_ARB_conditional_render_inverted on r600, radeonsi
+GL_ARB_derivative_control on radeonsi
+GL_ARB_fragment_layer_viewport on radeonsi
+GL_ARB_framebuffer_no_attachments on i965
+GL_ARB_get_texture_sub_image for all drivers
+GL_ARB_gpu_shader5 on radeonsi
+GL_ARB_gpu_shader_fp64 on llvmpipe, radeonsi
+GL_ARB_shader_image_load_store on i965
+GL_ARB_shader_precision on radeonsi, nvc0
+GL_ARB_shader_stencil_export on llvmpipe
+GL_ARB_shader_subroutine on core profile all drivers
+GL_ARB_tessellation_shader on nvc0, radeonsi
+GL_ARB_vertex_attrib_64bit on llvmpipe, radeonsi
+GL_ARB_viewport_array on radeonsi
+GL_EXT_depth_bounds_test on radeonsi, nv30, nv50, nvc0
+GL_NV_read_depth (GLES) on all drivers
+GL_NV_read_depth_stencil (GLES) on all drivers
+GL_NV_read_stencil (GLES) on all drivers
+GL_OES_texture_float on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe
+GL_OES_texture_half_float on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe
+GL_OES_texture_float_linear on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe
+GL_OES_texture_half_float_linear on all r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe
+GLX_ARB_create_context_robustness on r600, radeonsi
+EGL_EXT_create_context_robustness on r600, radeonsi
+EGL_KHR_gl_colorspace on r600, radeonsi, nv50, nvc0
+EGL_KHR_gl_texture_3D_image on r600, radeonsi, nv50, nvc0
+EGL 1.5 on r600, radeonsi, nv50, nvc0
+
+
+
Bug fixes
+
+TBD.
+
+
Changes
+
+TBD.
+
+
+
+
diff --git a/doxygen/.gitignore b/doxygen/.gitignore
index abf56ac682d..a5f3921b445 100644
--- a/doxygen/.gitignore
+++ b/doxygen/.gitignore
@@ -1,3 +1,4 @@
+*.db
*.tag
*.tmp
agpgart
diff --git a/doxygen/Makefile b/doxygen/Makefile
index 0a95a3516a2..01c2691cfe0 100644
--- a/doxygen/Makefile
+++ b/doxygen/Makefile
@@ -33,3 +33,4 @@ subset: $(SUBSET:.doxy=.tag)
clean:
-rm -rf $(FULL:.doxy=) $(SUBSET:.doxy=)
-rm -rf *.tag
+ -rm -rf *.db
diff --git a/include/EGL/eglplatform.h b/include/EGL/eglplatform.h
index 7802542ad0f..b376e642822 100644
--- a/include/EGL/eglplatform.h
+++ b/include/EGL/eglplatform.h
@@ -77,7 +77,7 @@ typedef HDC EGLNativeDisplayType;
typedef HBITMAP EGLNativePixmapType;
typedef HWND EGLNativeWindowType;
-#elif defined(__APPLE__) || defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */
+#elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */
typedef int EGLNativeDisplayType;
typedef void *EGLNativeWindowType;
@@ -105,7 +105,7 @@ typedef struct ANativeWindow* EGLNativeWindowType;
typedef struct egl_native_pixmap_t* EGLNativePixmapType;
typedef void* EGLNativeDisplayType;
-#elif defined(__unix__)
+#elif defined(__unix__) || defined(__APPLE__)
#if defined(MESA_EGL_NO_X11_HEADERS)
diff --git a/include/GL/glext.h b/include/GL/glext.h
index a3873a613f9..e5f1d891ec5 100644
--- a/include/GL/glext.h
+++ b/include/GL/glext.h
@@ -3879,7 +3879,12 @@ GLAPI void APIENTRY glMinSampleShadingARB (GLfloat value);
#ifndef GL_ARB_shader_objects
#define GL_ARB_shader_objects 1
#ifdef __APPLE__
+#ifdef BUILDING_MESA
+/* Avoid uint <-> void* warnings */
+typedef unsigned long GLhandleARB;
+#else
typedef void *GLhandleARB;
+#endif
#else
typedef unsigned int GLhandleARB;
#endif
diff --git a/include/GL/glx.h b/include/GL/glx.h
index 78f5052b23a..1e4bb7d7176 100644
--- a/include/GL/glx.h
+++ b/include/GL/glx.h
@@ -368,18 +368,6 @@ extern Bool glXDrawableAttribARB(Display *dpy, GLXDrawable draw, const int *attr
#endif /* GLX_ARB_render_texture */
-/*
- * Remove this when glxext.h is updated.
- */
-#ifndef GLX_NV_float_buffer
-#define GLX_NV_float_buffer 1
-
-#define GLX_FLOAT_COMPONENTS_NV 0x20B0
-
-#endif /* GLX_NV_float_buffer */
-
-
-
/*
* #?. GLX_MESA_swap_frame_usage
*/
@@ -415,86 +403,6 @@ typedef int (*PFNGLXGETSWAPINTERVALMESAPROC)(void);
#endif /* GLX_MESA_swap_control */
-
-/*
- * #?. GLX_EXT_texture_from_pixmap
- * XXX not finished?
- */
-#ifndef GLX_EXT_texture_from_pixmap
-#define GLX_EXT_texture_from_pixmap 1
-
-#define GLX_BIND_TO_TEXTURE_RGB_EXT 0x20D0
-#define GLX_BIND_TO_TEXTURE_RGBA_EXT 0x20D1
-#define GLX_BIND_TO_MIPMAP_TEXTURE_EXT 0x20D2
-#define GLX_BIND_TO_TEXTURE_TARGETS_EXT 0x20D3
-#define GLX_Y_INVERTED_EXT 0x20D4
-
-#define GLX_TEXTURE_FORMAT_EXT 0x20D5
-#define GLX_TEXTURE_TARGET_EXT 0x20D6
-#define GLX_MIPMAP_TEXTURE_EXT 0x20D7
-
-#define GLX_TEXTURE_FORMAT_NONE_EXT 0x20D8
-#define GLX_TEXTURE_FORMAT_RGB_EXT 0x20D9
-#define GLX_TEXTURE_FORMAT_RGBA_EXT 0x20DA
-
-#define GLX_TEXTURE_1D_BIT_EXT 0x00000001
-#define GLX_TEXTURE_2D_BIT_EXT 0x00000002
-#define GLX_TEXTURE_RECTANGLE_BIT_EXT 0x00000004
-
-#define GLX_TEXTURE_1D_EXT 0x20DB
-#define GLX_TEXTURE_2D_EXT 0x20DC
-#define GLX_TEXTURE_RECTANGLE_EXT 0x20DD
-
-#define GLX_FRONT_LEFT_EXT 0x20DE
-#define GLX_FRONT_RIGHT_EXT 0x20DF
-#define GLX_BACK_LEFT_EXT 0x20E0
-#define GLX_BACK_RIGHT_EXT 0x20E1
-#define GLX_FRONT_EXT GLX_FRONT_LEFT_EXT
-#define GLX_BACK_EXT GLX_BACK_LEFT_EXT
-#define GLX_AUX0_EXT 0x20E2
-#define GLX_AUX1_EXT 0x20E3
-#define GLX_AUX2_EXT 0x20E4
-#define GLX_AUX3_EXT 0x20E5
-#define GLX_AUX4_EXT 0x20E6
-#define GLX_AUX5_EXT 0x20E7
-#define GLX_AUX6_EXT 0x20E8
-#define GLX_AUX7_EXT 0x20E9
-#define GLX_AUX8_EXT 0x20EA
-#define GLX_AUX9_EXT 0x20EB
-
-extern void glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer, const int *attrib_list);
-extern void glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer);
-
-#endif /* GLX_EXT_texture_from_pixmap */
-
-
-#ifndef GLX_MESA_query_renderer
-#define GLX_MESA_query_renderer 1
-
-#define GLX_RENDERER_VENDOR_ID_MESA 0x8183
-#define GLX_RENDERER_DEVICE_ID_MESA 0x8184
-#define GLX_RENDERER_VERSION_MESA 0x8185
-#define GLX_RENDERER_ACCELERATED_MESA 0x8186
-#define GLX_RENDERER_VIDEO_MEMORY_MESA 0x8187
-#define GLX_RENDERER_UNIFIED_MEMORY_ARCHITECTURE_MESA 0x8188
-#define GLX_RENDERER_PREFERRED_PROFILE_MESA 0x8189
-#define GLX_RENDERER_OPENGL_CORE_PROFILE_VERSION_MESA 0x818A
-#define GLX_RENDERER_OPENGL_COMPATIBILITY_PROFILE_VERSION_MESA 0x818B
-#define GLX_RENDERER_OPENGL_ES_PROFILE_VERSION_MESA 0x818C
-#define GLX_RENDERER_OPENGL_ES2_PROFILE_VERSION_MESA 0x818D
-#define GLX_RENDERER_ID_MESA 0x818E
-
-Bool glXQueryRendererIntegerMESA(Display *dpy, int screen, int renderer, int attribute, unsigned int *value);
-Bool glXQueryCurrentRendererIntegerMESA(int attribute, unsigned int *value);
-const char *glXQueryRendererStringMESA(Display *dpy, int screen, int renderer, int attribute);
-const char *glXQueryCurrentRendererStringMESA(int attribute);
-
-typedef Bool (*PFNGLXQUERYRENDERERINTEGERMESAPROC) (Display *dpy, int screen, int renderer, int attribute, unsigned int *value);
-typedef Bool (*PFNGLXQUERYCURRENTRENDERERINTEGERMESAPROC) (int attribute, unsigned int *value);
-typedef const char *(*PFNGLXQUERYRENDERERSTRINGMESAPROC) (Display *dpy, int screen, int renderer, int attribute);
-typedef const char *(*PFNGLXQUERYCURRENTRENDERERSTRINGMESAPROC) (int attribute);
-#endif /* GLX_MESA_query_renderer */
-
/*** Should these go here, or in another header? */
/*
** GLX Events
diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h
index c827bb640f3..a0f155a1f42 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -40,14 +40,7 @@
#ifndef DRI_INTERFACE_H
#define DRI_INTERFACE_H
-/* For archs with no drm.h */
-#if defined(__APPLE__) || defined(__CYGWIN__) || defined(__GNU__)
-#ifndef __NOT_HAVE_DRM_H
-#define __NOT_HAVE_DRM_H
-#endif
-#endif
-
-#ifndef __NOT_HAVE_DRM_H
+#ifdef HAVE_LIBDRM
#include
#else
typedef unsigned int drm_context_t;
@@ -1101,12 +1094,15 @@ struct __DRIdri2ExtensionRec {
/**
- * Four CC formats that matches with WL_DRM_FORMAT_* from wayland_drm.h
- * and GBM_FORMAT_* from gbm.h, used with createImageFromNames.
+ * Four CC formats that matches with WL_DRM_FORMAT_* from wayland_drm.h,
+ * GBM_FORMAT_* from gbm.h, and DRM_FORMAT_* from drm_fourcc.h. Used with
+ * createImageFromNames.
*
* \since 5
*/
+#define __DRI_IMAGE_FOURCC_R8 0x20203852
+#define __DRI_IMAGE_FOURCC_GR88 0x38385247
#define __DRI_IMAGE_FOURCC_RGB565 0x36314752
#define __DRI_IMAGE_FOURCC_ARGB8888 0x34325241
#define __DRI_IMAGE_FOURCC_XRGB8888 0x34325258
@@ -1141,6 +1137,8 @@ struct __DRIdri2ExtensionRec {
#define __DRI_IMAGE_COMPONENTS_Y_U_V 0x3003
#define __DRI_IMAGE_COMPONENTS_Y_UV 0x3004
#define __DRI_IMAGE_COMPONENTS_Y_XUXV 0x3005
+#define __DRI_IMAGE_COMPONENTS_R 0x3006
+#define __DRI_IMAGE_COMPONENTS_RG 0x3007
/**
@@ -1180,7 +1178,8 @@ enum __DRIChromaSiting {
};
/**
- * \name Reasons that __DRIimageExtensionRec::createImageFromTexture might fail
+ * \name Reasons that __DRIimageExtensionRec::createImageFromTexture or
+ * __DRIimageExtensionRec::createImageFromDmaBufs might fail
*/
/*@{*/
/** Success! */
@@ -1189,11 +1188,14 @@ enum __DRIChromaSiting {
/** Memory allocation failure */
#define __DRI_IMAGE_ERROR_BAD_ALLOC 1
-/** Client requested an invalid attribute for a texture object */
+/** Client requested an invalid attribute */
#define __DRI_IMAGE_ERROR_BAD_MATCH 2
/** Client requested an invalid texture object */
#define __DRI_IMAGE_ERROR_BAD_PARAMETER 3
+
+/** Client requested an invalid pitch and/or offset */
+#define __DRI_IMAGE_ERROR_BAD_ACCESS 4
/*@}*/
/**
@@ -1444,6 +1446,11 @@ typedef struct __DRIDriverVtableExtensionRec {
#define __DRI2_RENDERER_OPENGL_COMPATIBILITY_PROFILE_VERSION 0x0008
#define __DRI2_RENDERER_OPENGL_ES_PROFILE_VERSION 0x0009
#define __DRI2_RENDERER_OPENGL_ES2_PROFILE_VERSION 0x000a
+#define __DRI2_RENDERER_HAS_TEXTURE_3D 0x000b
+/* Whether there is an sRGB format support for every supported 32-bit UNORM
+ * color format.
+ */
+#define __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB 0x000c
typedef struct __DRI2rendererQueryExtensionRec __DRI2rendererQueryExtension;
struct __DRI2rendererQueryExtensionRec {
diff --git a/include/c99_math.h b/include/c99_math.h
index 7ed7cc22176..8a67fb133d6 100644
--- a/include/c99_math.h
+++ b/include/c99_math.h
@@ -140,6 +140,18 @@ llrintf(float f)
return rounded;
}
+static inline float
+exp2f(float f)
+{
+ return powf(2.0f, f);
+}
+
+static inline double
+exp2(double d)
+{
+ return pow(2.0, d);
+}
+
#endif /* C99 */
diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h
index 8d757aaa767..8a425999429 100644
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -128,3 +128,6 @@ CHIPSET(0x22B0, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B1, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B2, chv, "Intel(R) HD Graphics (Cherryview)")
CHIPSET(0x22B3, chv, "Intel(R) HD Graphics (Cherryview)")
+CHIPSET(0x0A84, bxt, "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A84, bxt, "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x5A84, bxt, "Intel(R) HD Graphics (Broxton)")
diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h
index cd5da99a6a6..52eada1d3d5 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -63,6 +63,7 @@ CHIPSET(0x6608, OLAND_6608, OLAND)
CHIPSET(0x6610, OLAND_6610, OLAND)
CHIPSET(0x6611, OLAND_6611, OLAND)
CHIPSET(0x6613, OLAND_6613, OLAND)
+CHIPSET(0x6617, OLAND_6617, OLAND)
CHIPSET(0x6620, OLAND_6620, OLAND)
CHIPSET(0x6621, OLAND_6621, OLAND)
CHIPSET(0x6623, OLAND_6623, OLAND)
@@ -156,3 +157,27 @@ CHIPSET(0x67B8, HAWAII_67B8, HAWAII)
CHIPSET(0x67B9, HAWAII_67B9, HAWAII)
CHIPSET(0x67BA, HAWAII_67BA, HAWAII)
CHIPSET(0x67BE, HAWAII_67BE, HAWAII)
+
+CHIPSET(0x6900, ICELAND_, ICELAND)
+CHIPSET(0x6901, ICELAND_, ICELAND)
+CHIPSET(0x6902, ICELAND_, ICELAND)
+CHIPSET(0x6903, ICELAND_, ICELAND)
+CHIPSET(0x6907, ICELAND_, ICELAND)
+
+CHIPSET(0x6920, TONGA_, TONGA)
+CHIPSET(0x6921, TONGA_, TONGA)
+CHIPSET(0x6928, TONGA_, TONGA)
+CHIPSET(0x6929, TONGA_, TONGA)
+CHIPSET(0x692B, TONGA_, TONGA)
+CHIPSET(0x692F, TONGA_, TONGA)
+CHIPSET(0x6930, TONGA_, TONGA)
+CHIPSET(0x6938, TONGA_, TONGA)
+CHIPSET(0x6939, TONGA_, TONGA)
+
+CHIPSET(0x9870, CARRIZO_, CARRIZO)
+CHIPSET(0x9874, CARRIZO_, CARRIZO)
+CHIPSET(0x9875, CARRIZO_, CARRIZO)
+CHIPSET(0x9876, CARRIZO_, CARRIZO)
+CHIPSET(0x9877, CARRIZO_, CARRIZO)
+
+CHIPSET(0x7300, FIJI_, FIJI)
diff --git a/src/Makefile.am b/src/Makefile.am
index d41a087ae1c..da638a811fb 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
SUBDIRS = . gtest util mapi/glapi/gen mapi
if NEED_OPENGL_COMMON
@@ -37,16 +35,12 @@ if HAVE_EGL_PLATFORM_WAYLAND
SUBDIRS += egl/wayland/wayland-egl egl/wayland/wayland-drm
endif
-if HAVE_EGL_DRIVER_DRI2
-SUBDIRS += egl/drivers/dri2
-endif
-
if HAVE_GBM
SUBDIRS += gbm
endif
if HAVE_EGL
-SUBDIRS += egl/main
+SUBDIRS += egl
endif
if HAVE_GALLIUM
@@ -54,8 +48,6 @@ SUBDIRS += gallium
endif
EXTRA_DIST = \
- egl/drivers/haiku \
- egl/docs \
getopt hgl SConscript
AM_CFLAGS = $(VISIBILITY_CFLAGS)
diff --git a/src/SConscript b/src/SConscript
index b0578e89258..106b87d4251 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -31,13 +31,8 @@ SConscript('mesa/SConscript')
if not env['embedded']:
if env['platform'] not in ('cygwin', 'darwin', 'freebsd', 'haiku', 'windows'):
SConscript('glx/SConscript')
- if env['platform'] not in ['darwin', 'haiku', 'sunos', 'windows']:
- if env['dri']:
- SConscript('egl/drivers/dri2/SConscript')
- SConscript('egl/main/SConscript')
if env['platform'] == 'haiku':
- SConscript('egl/drivers/haiku/SConscript')
- SConscript('egl/main/SConscript')
+ SConscript('egl/SConscript')
if env['gles']:
SConscript('mapi/shared-glapi/SConscript')
diff --git a/src/egl/main/Android.mk b/src/egl/Android.mk
similarity index 79%
rename from src/egl/main/Android.mk
rename to src/egl/Android.mk
index 0ba72953960..ebd67af34cc 100644
--- a/src/egl/main/Android.mk
+++ b/src/egl/Android.mk
@@ -27,21 +27,36 @@ LOCAL_PATH := $(call my-dir)
include $(LOCAL_PATH)/Makefile.sources
-SOURCES := \
- ${LIBEGL_C_FILES}
-
# ---------------------------------------
# Build libGLES_mesa
# ---------------------------------------
include $(CLEAR_VARS)
-LOCAL_SRC_FILES := $(SOURCES)
+LOCAL_SRC_FILES := \
+ $(LIBEGL_C_FILES) \
+ $(dri2_backend_core_FILES) \
+ drivers/dri2/platform_android.c
LOCAL_CFLAGS := \
-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_ANDROID \
- -D_EGL_DRIVER_SEARCH_DIR=\"/system/lib/egl\" \
- -D_EGL_OS_UNIX=1
+ -D_EGL_BUILT_IN_DRIVER_DRI2 \
+ -DHAVE_ANDROID_PLATFORM
+
+ifeq ($(MESA_LOLLIPOP_BUILD),true)
+LOCAL_CFLAGS_arm := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
+LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
+LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
+else
+LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
+endif
+
+LOCAL_C_INCLUDES := \
+ $(MESA_TOP)/src/egl/main \
+ $(MESA_TOP)/src/egl/drivers/dri2 \
+
+LOCAL_STATIC_LIBRARIES := \
+ libmesa_loader
LOCAL_SHARED_LIBRARIES := \
libdl \
@@ -55,12 +70,11 @@ LOCAL_SHARED_LIBRARIES += libsync
endif
# add libdrm if there are hardware drivers
-ifneq ($(MESA_GPU_DRIVERS),swrast)
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DHAVE_LIBDRM
LOCAL_SHARED_LIBRARIES += libdrm
endif
-LOCAL_CFLAGS += -D_EGL_BUILT_IN_DRIVER_DRI2
-
ifeq ($(strip $(MESA_BUILD_CLASSIC)),true)
# require i915_dri and/or i965_dri
LOCAL_REQUIRED_MODULES += \
@@ -71,9 +85,6 @@ ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
LOCAL_REQUIRED_MODULES += gallium_dri
endif # MESA_BUILD_GALLIUM
-LOCAL_STATIC_LIBRARIES := \
- libmesa_egl_dri2 \
- libmesa_loader
LOCAL_MODULE := libGLES_mesa
ifeq ($(MESA_LOLLIPOP_BUILD),true)
diff --git a/src/egl/main/Makefile.am b/src/egl/Makefile.am
similarity index 69%
rename from src/egl/main/Makefile.am
rename to src/egl/Makefile.am
index 9030d272b53..5c2ba301ffb 100644
--- a/src/egl/main/Makefile.am
+++ b/src/egl/Makefile.am
@@ -23,18 +23,19 @@ include Makefile.sources
AM_CFLAGS = \
-I$(top_srcdir)/include \
+ -I$(top_srcdir)/src/egl/main \
-I$(top_srcdir)/src/gbm/main \
+ -I$(top_srcdir)/src \
$(DEFINES) \
$(VISIBILITY_CFLAGS) \
+ $(LIBDRM_CFLAGS) \
$(EGL_CFLAGS) \
- -D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) \
- -D_EGL_DRIVER_SEARCH_DIR=\"$(libdir)/egl\" \
- -D_EGL_OS_UNIX=1
+ -D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM)
lib_LTLIBRARIES = libEGL.la
libEGL_la_SOURCES = \
- ${LIBEGL_C_FILES}
+ $(LIBEGL_C_FILES)
libEGL_la_LIBADD = \
$(EGL_LIB_DEPS)
@@ -45,10 +46,13 @@ libEGL_la_LDFLAGS = \
$(GC_SECTIONS) \
$(LD_NO_UNDEFINED)
+dri2_backend_FILES =
+
if HAVE_EGL_PLATFORM_X11
AM_CFLAGS += -DHAVE_X11_PLATFORM
AM_CFLAGS += $(XCB_DRI2_CFLAGS)
libEGL_la_LIBADD += $(XCB_DRI2_LIBS)
+dri2_backend_FILES += drivers/dri2/platform_x11.c
endif
if HAVE_EGL_PLATFORM_WAYLAND
@@ -56,26 +60,37 @@ AM_CFLAGS += -DHAVE_WAYLAND_PLATFORM
AM_CFLAGS += $(WAYLAND_CFLAGS)
libEGL_la_LIBADD += $(WAYLAND_LIBS)
libEGL_la_LIBADD += $(LIBDRM_LIBS)
-libEGL_la_LIBADD += ../wayland/wayland-drm/libwayland-drm.la
+libEGL_la_LIBADD += $(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.la
+dri2_backend_FILES += drivers/dri2/platform_wayland.c
endif
if HAVE_EGL_PLATFORM_DRM
AM_CFLAGS += -DHAVE_DRM_PLATFORM
-libEGL_la_LIBADD += ../../gbm/libgbm.la
-endif
-
-if HAVE_EGL_PLATFORM_NULL
-AM_CFLAGS += -DHAVE_NULL_PLATFORM
+libEGL_la_LIBADD += $(top_builddir)/src/gbm/libgbm.la
+dri2_backend_FILES += drivers/dri2/platform_drm.c
endif
if HAVE_EGL_PLATFORM_SURFACELESS
AM_CFLAGS += -DHAVE_SURFACELESS_PLATFORM
+dri2_backend_FILES += drivers/dri2/platform_surfaceless.c
endif
if HAVE_EGL_DRIVER_DRI2
-AM_CFLAGS += -D_EGL_BUILT_IN_DRIVER_DRI2
-AM_CFLAGS += -DHAVE_XCB_DRI2
-libEGL_la_LIBADD += ../drivers/dri2/libegl_dri2.la
+AM_CFLAGS += \
+ -I$(top_srcdir)/src/loader \
+ -I$(top_srcdir)/src/egl/drivers/dri2 \
+ -I$(top_srcdir)/src/gbm/backends/dri \
+ -I$(top_srcdir)/src/egl/wayland/wayland-egl \
+ -I$(top_srcdir)/src/egl/wayland/wayland-drm \
+ -I$(top_builddir)/src/egl/wayland/wayland-drm \
+ -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
+ -D_EGL_BUILT_IN_DRIVER_DRI2
+
+libEGL_la_SOURCES += \
+ $(dri2_backend_core_FILES) \
+ $(dri2_backend_FILES)
+
+libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader.la
libEGL_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS)
endif
@@ -83,7 +98,7 @@ include $(top_srcdir)/install-lib-links.mk
pkgconfigdir = $(libdir)/pkgconfig
-pkgconfig_DATA = egl.pc
+pkgconfig_DATA = main/egl.pc
khrdir = $(includedir)/KHR
khr_HEADERS = $(top_srcdir)/include/KHR/khrplatform.h
@@ -97,6 +112,8 @@ egl_HEADERS = \
$(top_srcdir)/include/EGL/eglplatform.h
EXTRA_DIST = \
- egl.def \
- README.txt \
- SConscript
+ SConscript \
+ drivers/haiku \
+ docs \
+ main/egl.def \
+ main/README.txt
diff --git a/src/egl/Makefile.sources b/src/egl/Makefile.sources
new file mode 100644
index 00000000000..48db8518f8a
--- /dev/null
+++ b/src/egl/Makefile.sources
@@ -0,0 +1,34 @@
+LIBEGL_C_FILES := \
+ main/eglapi.c \
+ main/eglapi.h \
+ main/eglarray.c \
+ main/eglarray.h \
+ main/eglcompiler.h \
+ main/eglconfig.c \
+ main/eglconfig.h \
+ main/eglcontext.c \
+ main/eglcontext.h \
+ main/eglcurrent.c \
+ main/eglcurrent.h \
+ main/egldefines.h \
+ main/egldisplay.c \
+ main/egldisplay.h \
+ main/egldriver.c \
+ main/egldriver.h \
+ main/eglfallbacks.c \
+ main/eglglobals.c \
+ main/eglglobals.h \
+ main/eglimage.c \
+ main/eglimage.h \
+ main/egllog.c \
+ main/egllog.h \
+ main/eglsurface.c \
+ main/eglsurface.h \
+ main/eglsync.c \
+ main/eglsync.h \
+ main/egltypedefs.h
+
+dri2_backend_core_FILES := \
+ drivers/dri2/egl_dri2.c \
+ drivers/dri2/egl_dri2.h \
+ drivers/dri2/egl_dri2_fallbacks.h
diff --git a/src/egl/SConscript b/src/egl/SConscript
new file mode 100644
index 00000000000..1b2a4271ef7
--- /dev/null
+++ b/src/egl/SConscript
@@ -0,0 +1,34 @@
+#######################################################################
+# SConscript for EGL
+
+
+Import('*')
+
+env = env.Clone()
+
+env.Append(CPPPATH = [
+ '#/include',
+ '#/src/egl/main',
+ '#/src',
+])
+
+
+# parse Makefile.sources
+egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
+egl_sources.append(env.ParseSourceList('Makefile.sources', 'dri2_backend_core_FILES'))
+
+env.Append(CPPDEFINES = [
+ '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
+ '_EGL_BUILT_IN_DRIVER_HAIKU',
+ 'HAVE_HAIKU_PLATFORM',
+])
+egl_sources.append('drivers/haiku/egl_haiku.cpp')
+
+egl = env.SharedLibrary(
+ target = 'EGL',
+ source = egl_sources,
+)
+
+egl = env.InstallSharedLibrary(egl, version=(1, 0, 0))
+
+env.Alias('egl', egl)
diff --git a/src/egl/drivers/dri2/Android.mk b/src/egl/drivers/dri2/Android.mk
deleted file mode 100644
index 109e4d4a0d8..00000000000
--- a/src/egl/drivers/dri2/Android.mk
+++ /dev/null
@@ -1,64 +0,0 @@
-# Mesa 3-D graphics library
-#
-# Copyright (C) 2010-2011 Chia-I Wu
-# Copyright (C) 2010-2011 LunarG Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# Android.mk for egl_dri2
-
-LOCAL_PATH := $(call my-dir)
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
- egl_dri2.c \
- platform_android.c
-
-LOCAL_CFLAGS := \
- -DHAVE_SHARED_GLAPI \
- -DHAVE_ANDROID_PLATFORM
-
-ifeq ($(MESA_LOLLIPOP_BUILD),true)
-LOCAL_CFLAGS_arm := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
-LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
-LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
-else
-LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
-endif
-
-LOCAL_C_INCLUDES := \
- $(MESA_TOP)/src/mapi \
- $(MESA_TOP)/src/egl/main \
- $(DRM_GRALLOC_TOP)
-
-LOCAL_STATIC_LIBRARIES := \
- libmesa_loader
-
-LOCAL_SHARED_LIBRARIES := libdrm
-
-ifeq ($(shell echo "$(MESA_ANDROID_VERSION) >= 4.2" | bc),1)
-LOCAL_SHARED_LIBRARIES += \
- libsync
-endif
-
-LOCAL_MODULE := libmesa_egl_dri2
-
-include $(MESA_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
diff --git a/src/egl/drivers/dri2/Makefile.am b/src/egl/drivers/dri2/Makefile.am
deleted file mode 100644
index 55be4a75ba5..00000000000
--- a/src/egl/drivers/dri2/Makefile.am
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright © 2012 Intel Corporation
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-AM_CFLAGS = \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/src/egl/main \
- -I$(top_srcdir)/src/loader \
- -I$(top_srcdir)/src/gbm/main \
- -I$(top_srcdir)/src/gbm/backends/dri \
- -I$(top_srcdir)/src/egl/wayland/wayland-egl \
- -I$(top_srcdir)/src/egl/wayland/wayland-drm \
- -I$(top_builddir)/src/egl/wayland/wayland-drm \
- $(DEFINES) \
- $(VISIBILITY_CFLAGS) \
- $(LIBDRM_CFLAGS) \
- -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\"
-
-noinst_LTLIBRARIES = libegl_dri2.la
-
-libegl_dri2_la_SOURCES = \
- egl_dri2.c \
- egl_dri2.h \
- egl_dri2_fallbacks.h
-
-libegl_dri2_la_LIBADD = \
- $(top_builddir)/src/loader/libloader.la \
- $(EGL_LIB_DEPS)
-
-if HAVE_SHARED_GLAPI
-AM_CFLAGS += -DHAVE_SHARED_GLAPI
-endif
-
-if HAVE_EGL_PLATFORM_X11
-libegl_dri2_la_SOURCES += platform_x11.c
-AM_CFLAGS += -DHAVE_X11_PLATFORM
-AM_CFLAGS += $(XCB_DRI2_CFLAGS)
-endif
-
-if HAVE_EGL_PLATFORM_WAYLAND
-libegl_dri2_la_SOURCES += platform_wayland.c
-AM_CFLAGS += -DHAVE_WAYLAND_PLATFORM
-AM_CFLAGS += $(WAYLAND_CFLAGS)
-endif
-
-if HAVE_EGL_PLATFORM_DRM
-libegl_dri2_la_SOURCES += platform_drm.c
-AM_CFLAGS += -DHAVE_DRM_PLATFORM
-endif
-
-if HAVE_EGL_PLATFORM_SURFACELESS
-libegl_dri2_la_SOURCES += platform_surfaceless.c
-AM_CFLAGS += -DHAVE_SURFACELESS_PLATFORM
-endif
-
-EXTRA_DIST = SConscript
diff --git a/src/egl/drivers/dri2/SConscript b/src/egl/drivers/dri2/SConscript
deleted file mode 100644
index 5b03107cbb3..00000000000
--- a/src/egl/drivers/dri2/SConscript
+++ /dev/null
@@ -1,40 +0,0 @@
-Import('*')
-
-env = env.Clone()
-
-env.Append(CPPDEFINES = [
- 'DEFAULT_DRIVER_DIR=\\"\\"'
-])
-
-env.Append(CPPPATH = [
- '#/include',
- '#/src/egl/main',
- '#/src/loader',
-])
-
-sources = [
- 'egl_dri2.c',
-]
-
-if env['x11']:
- sources.append('platform_x11.c')
- env.Append(CPPDEFINES = [
- 'HAVE_X11_PLATFORM',
- ])
- #env.Append(CPPPATH = [
- # 'XCB_DRI2_CFLAGS',
- #])
-
-if env['drm']:
- env.PkgUseModules('DRM')
-
-env.Prepend(LIBS = [
- libloader,
-])
-
-egl_dri2 = env.ConvenienceLibrary(
- target = 'egl_dri2',
- source = sources,
-)
-
-Export('egl_dri2')
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index a1cbd437f53..461735fe9e3 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -28,6 +28,7 @@
#define WL_HIDE_DEPRECATED
#include
+#include
#include
#include
#include
@@ -51,7 +52,23 @@
#endif
#include "egl_dri2.h"
-#include "../util/u_atomic.h"
+#include "util/u_atomic.h"
+
+/* The kernel header drm_fourcc.h defines the DRM formats below. We duplicate
+ * some of the definitions here so that building Mesa won't bleeding-edge
+ * kernel headers.
+ */
+#ifndef DRM_FORMAT_R8
+#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
+#endif
+
+#ifndef DRM_FORMAT_RG88
+#define DRM_FORMAT_RG88 fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */
+#endif
+
+#ifndef DRM_FORMAT_GR88
+#define DRM_FORMAT_GR88 fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */
+#endif
const __DRIuseInvalidateExtension use_invalidate = {
.base = { __DRI_USE_INVALIDATE, 1 }
@@ -109,6 +126,18 @@ EGLint dri2_to_egl_attribute_map[] = {
0, /* __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE */
};
+const __DRIconfig *
+dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
+ EGLenum colorspace)
+{
+ if (colorspace == EGL_GL_COLORSPACE_SRGB_KHR)
+ return surface_type == EGL_WINDOW_BIT ? conf->dri_srgb_double_config :
+ conf->dri_srgb_single_config;
+ else
+ return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config :
+ conf->dri_single_config;
+}
+
static EGLBoolean
dri2_match_config(const _EGLConfig *conf, const _EGLConfig *criteria)
{
@@ -130,6 +159,7 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
struct dri2_egl_display *dri2_dpy;
_EGLConfig base;
unsigned int attrib, value, double_buffer;
+ bool srgb = false;
EGLint key, bind_to_texture_rgb, bind_to_texture_rgba;
unsigned int dri_masks[4] = { 0, 0, 0, 0 };
_EGLConfig *matching_config;
@@ -139,7 +169,7 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
dri2_dpy = disp->DriverData;
_eglInitConfig(&base, disp, id);
-
+
i = 0;
double_buffer = 0;
bind_to_texture_rgb = 0;
@@ -155,7 +185,7 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
else
return NULL;
_eglSetConfigKey(&base, EGL_COLOR_BUFFER_TYPE, value);
- break;
+ break;
case __DRI_ATTRIB_CONFIG_CAVEAT:
if (value & __DRI_ATTRIB_NON_CONFORMANT_CONFIG)
@@ -204,6 +234,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
return NULL;
break;
+ case __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE:
+ srgb = value != 0;
+ break;
+
default:
key = dri2_to_egl_attribute_map[attrib];
if (key != 0)
@@ -249,28 +283,35 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
if (num_configs == 1) {
conf = (struct dri2_egl_config *) matching_config;
- if (double_buffer && !conf->dri_double_config)
+ if (double_buffer && srgb && !conf->dri_srgb_double_config)
+ conf->dri_srgb_double_config = dri_config;
+ else if (double_buffer && !srgb && !conf->dri_double_config)
conf->dri_double_config = dri_config;
- else if (!double_buffer && !conf->dri_single_config)
+ else if (!double_buffer && srgb && !conf->dri_srgb_single_config)
+ conf->dri_srgb_single_config = dri_config;
+ else if (!double_buffer && !srgb && !conf->dri_single_config)
conf->dri_single_config = dri_config;
else
/* a similar config type is already added (unlikely) => discard */
return NULL;
}
else if (num_configs == 0) {
- conf = malloc(sizeof *conf);
+ conf = calloc(1, sizeof *conf);
if (conf == NULL)
return NULL;
memcpy(&conf->base, &base, sizeof base);
if (double_buffer) {
- conf->dri_double_config = dri_config;
- conf->dri_single_config = NULL;
+ if (srgb)
+ conf->dri_srgb_double_config = dri_config;
+ else
+ conf->dri_double_config = dri_config;
} else {
- conf->dri_single_config = dri_config;
- conf->dri_double_config = NULL;
+ if (srgb)
+ conf->dri_srgb_single_config = dri_config;
+ else
+ conf->dri_single_config = dri_config;
}
- conf->base.SurfaceType = 0;
conf->base.ConfigID = config_id;
_eglLinkConfig(&conf->base);
@@ -365,7 +406,7 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy,
}
}
}
-
+
for (j = 0; matches[j].name; j++) {
field = ((char *) dri2_dpy + matches[j].offset);
if (*(const __DRIextension **) field == NULL) {
@@ -500,6 +541,19 @@ dri2_load_driver_swrast(_EGLDisplay *disp)
return EGL_TRUE;
}
+static unsigned
+dri2_renderer_query_integer(struct dri2_egl_display *dri2_dpy, int param)
+{
+ const __DRI2rendererQueryExtension *rendererQuery = dri2_dpy->rendererQuery;
+ unsigned int value = 0;
+
+ if (!rendererQuery ||
+ rendererQuery->queryInteger(dri2_dpy->dri_screen, param, &value) == -1)
+ return 0;
+
+ return value;
+}
+
void
dri2_setup_screen(_EGLDisplay *disp)
{
@@ -530,6 +584,10 @@ dri2_setup_screen(_EGLDisplay *disp)
disp->Extensions.KHR_surfaceless_context = EGL_TRUE;
disp->Extensions.MESA_configless_context = EGL_TRUE;
+ if (dri2_renderer_query_integer(dri2_dpy,
+ __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
+ disp->Extensions.KHR_gl_colorspace = EGL_TRUE;
+
if (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) {
disp->Extensions.KHR_create_context = EGL_TRUE;
@@ -567,6 +625,9 @@ dri2_setup_screen(_EGLDisplay *disp)
disp->Extensions.KHR_gl_texture_2D_image = EGL_TRUE;
disp->Extensions.KHR_gl_texture_cubemap_image = EGL_TRUE;
}
+ if (dri2_renderer_query_integer(dri2_dpy,
+ __DRI2_RENDERER_HAS_TEXTURE_3D))
+ disp->Extensions.KHR_gl_texture_3D_image = EGL_TRUE;
#ifdef HAVE_LIBDRM
if (dri2_dpy->image->base.version >= 8 &&
dri2_dpy->image->createImageFromDmaBufs) {
@@ -624,7 +685,7 @@ dri2_create_screen(_EGLDisplay *disp)
dri2_dpy->own_dri_screen = 1;
extensions = dri2_dpy->core->getExtensions(dri2_dpy->dri_screen);
-
+
if (dri2_dpy->dri2) {
if (!dri2_bind_extensions(dri2_dpy, dri2_core_extensions, extensions))
goto cleanup_dri_screen;
@@ -644,6 +705,9 @@ dri2_create_screen(_EGLDisplay *disp)
if (strcmp(extensions[i]->name, __DRI2_FENCE) == 0) {
dri2_dpy->fence = (__DRI2fenceExtension *) extensions[i];
}
+ if (strcmp(extensions[i]->name, __DRI2_RENDERER_QUERY) == 0) {
+ dri2_dpy->rendererQuery = (__DRI2rendererQueryExtension *) extensions[i];
+ }
}
dri2_setup_screen(disp);
@@ -1384,53 +1448,6 @@ dri2_create_image_khr_renderbuffer(_EGLDisplay *disp, _EGLContext *ctx,
return dri2_create_image_from_dri(disp, dri_image);
}
-#ifdef HAVE_LIBDRM
-static _EGLImage *
-dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
- EGLClientBuffer buffer, const EGLint *attr_list)
-{
- struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
- EGLint format, name, pitch, err;
- _EGLImageAttribs attrs;
- __DRIimage *dri_image;
-
- name = (EGLint) (uintptr_t) buffer;
-
- err = _eglParseImageAttribList(&attrs, disp, attr_list);
- if (err != EGL_SUCCESS)
- return NULL;
-
- if (attrs.Width <= 0 || attrs.Height <= 0 ||
- attrs.DRMBufferStrideMESA <= 0) {
- _eglError(EGL_BAD_PARAMETER,
- "bad width, height or stride");
- return NULL;
- }
-
- switch (attrs.DRMBufferFormatMESA) {
- case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
- format = __DRI_IMAGE_FORMAT_ARGB8888;
- pitch = attrs.DRMBufferStrideMESA;
- break;
- default:
- _eglError(EGL_BAD_PARAMETER,
- "dri2_create_image_khr: unsupported pixmap depth");
- return NULL;
- }
-
- dri_image =
- dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
- attrs.Width,
- attrs.Height,
- format,
- name,
- pitch,
- NULL);
-
- return dri2_create_image_from_dri(disp, dri_image);
-}
-#endif
-
#ifdef HAVE_WAYLAND_PLATFORM
/* This structure describes how a wl_buffer maps to one or more
@@ -1528,6 +1545,10 @@ dri2_create_image_khr_texture_error(int dri_error)
egl_error = EGL_BAD_PARAMETER;
break;
+ case __DRI_IMAGE_ERROR_BAD_ACCESS:
+ egl_error = EGL_BAD_ACCESS;
+ break;
+
default:
assert(0);
egl_error = EGL_BAD_MATCH;
@@ -1566,9 +1587,15 @@ dri2_create_image_khr_texture(_EGLDisplay *disp, _EGLContext *ctx,
gl_target = GL_TEXTURE_2D;
break;
case EGL_GL_TEXTURE_3D_KHR:
- depth = attrs.GLTextureZOffset;
- gl_target = GL_TEXTURE_3D;
- break;
+ if (disp->Extensions.KHR_gl_texture_3D_image) {
+ depth = attrs.GLTextureZOffset;
+ gl_target = GL_TEXTURE_3D;
+ break;
+ }
+ else {
+ _eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
+ return EGL_NO_IMAGE_KHR;
+ }
case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR:
case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR:
case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR:
@@ -1621,6 +1648,51 @@ dri2_create_wayland_buffer_from_image(_EGLDriver *drv, _EGLDisplay *dpy,
}
#ifdef HAVE_LIBDRM
+static _EGLImage *
+dri2_create_image_mesa_drm_buffer(_EGLDisplay *disp, _EGLContext *ctx,
+ EGLClientBuffer buffer, const EGLint *attr_list)
+{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+ EGLint format, name, pitch, err;
+ _EGLImageAttribs attrs;
+ __DRIimage *dri_image;
+
+ name = (EGLint) (uintptr_t) buffer;
+
+ err = _eglParseImageAttribList(&attrs, disp, attr_list);
+ if (err != EGL_SUCCESS)
+ return NULL;
+
+ if (attrs.Width <= 0 || attrs.Height <= 0 ||
+ attrs.DRMBufferStrideMESA <= 0) {
+ _eglError(EGL_BAD_PARAMETER,
+ "bad width, height or stride");
+ return NULL;
+ }
+
+ switch (attrs.DRMBufferFormatMESA) {
+ case EGL_DRM_BUFFER_FORMAT_ARGB32_MESA:
+ format = __DRI_IMAGE_FORMAT_ARGB8888;
+ pitch = attrs.DRMBufferStrideMESA;
+ break;
+ default:
+ _eglError(EGL_BAD_PARAMETER,
+ "dri2_create_image_khr: unsupported pixmap depth");
+ return NULL;
+ }
+
+ dri_image =
+ dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
+ attrs.Width,
+ attrs.Height,
+ format,
+ name,
+ pitch,
+ NULL);
+
+ return dri2_create_image_from_dri(disp, dri_image);
+}
+
static EGLBoolean
dri2_check_dma_buf_attribs(const _EGLImageAttribs *attrs)
{
@@ -1673,6 +1745,9 @@ dri2_check_dma_buf_format(const _EGLImageAttribs *attrs)
unsigned i, plane_n;
switch (attrs->DMABufFourCC.Value) {
+ case DRM_FORMAT_R8:
+ case DRM_FORMAT_RG88:
+ case DRM_FORMAT_GR88:
case DRM_FORMAT_RGB332:
case DRM_FORMAT_BGR233:
case DRM_FORMAT_XRGB4444:
@@ -1850,59 +1925,6 @@ dri2_create_image_dma_buf(_EGLDisplay *disp, _EGLContext *ctx,
return res;
}
-#endif
-
-_EGLImage *
-dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
- _EGLContext *ctx, EGLenum target,
- EGLClientBuffer buffer, const EGLint *attr_list)
-{
- (void) drv;
-
- switch (target) {
- case EGL_GL_TEXTURE_2D_KHR:
- case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR:
- case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR:
- case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR:
- case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR:
- case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR:
- case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR:
- return dri2_create_image_khr_texture(disp, ctx, target, buffer, attr_list);
- case EGL_GL_RENDERBUFFER_KHR:
- return dri2_create_image_khr_renderbuffer(disp, ctx, buffer, attr_list);
-#ifdef HAVE_LIBDRM
- case EGL_DRM_BUFFER_MESA:
- return dri2_create_image_mesa_drm_buffer(disp, ctx, buffer, attr_list);
-#endif
-#ifdef HAVE_WAYLAND_PLATFORM
- case EGL_WAYLAND_BUFFER_WL:
- return dri2_create_image_wayland_wl_buffer(disp, ctx, buffer, attr_list);
-#endif
-#ifdef HAVE_LIBDRM
- case EGL_LINUX_DMA_BUF_EXT:
- return dri2_create_image_dma_buf(disp, ctx, buffer, attr_list);
-#endif
- default:
- _eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
- return EGL_NO_IMAGE_KHR;
- }
-}
-
-static EGLBoolean
-dri2_destroy_image_khr(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *image)
-{
- struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
- struct dri2_egl_image *dri2_img = dri2_egl_image(image);
-
- (void) drv;
-
- dri2_dpy->image->destroyImage(dri2_img->dri_image);
- free(dri2_img);
-
- return EGL_TRUE;
-}
-
-#ifdef HAVE_LIBDRM
static _EGLImage *
dri2_create_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp,
const EGLint *attr_list)
@@ -1970,7 +1992,7 @@ dri2_create_drm_image_mesa(_EGLDriver *drv, _EGLDisplay *disp,
if (attrs.DRMBufferUseMESA & EGL_DRM_BUFFER_USE_CURSOR_MESA)
dri_use |= __DRI_IMAGE_USE_CURSOR;
- dri2_img->dri_image =
+ dri2_img->dri_image =
dri2_dpy->image->createImage(dri2_dpy->dri_screen,
attrs.Width, attrs.Height,
format, dri_use, dri2_img);
@@ -2062,8 +2084,65 @@ dri2_export_dma_buf_image_mesa(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *im
return EGL_TRUE;
}
+
#endif
+_EGLImage *
+dri2_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
+ _EGLContext *ctx, EGLenum target,
+ EGLClientBuffer buffer, const EGLint *attr_list)
+{
+ (void) drv;
+
+ switch (target) {
+ case EGL_GL_TEXTURE_2D_KHR:
+ case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR:
+ case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR:
+ case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR:
+ case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR:
+ case EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR:
+ case EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR:
+ return dri2_create_image_khr_texture(disp, ctx, target, buffer, attr_list);
+ case EGL_GL_TEXTURE_3D_KHR:
+ if (disp->Extensions.KHR_gl_texture_3D_image) {
+ return dri2_create_image_khr_texture(disp, ctx, target, buffer, attr_list);
+ }
+ else {
+ _eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
+ return EGL_NO_IMAGE_KHR;
+ }
+ case EGL_GL_RENDERBUFFER_KHR:
+ return dri2_create_image_khr_renderbuffer(disp, ctx, buffer, attr_list);
+#ifdef HAVE_LIBDRM
+ case EGL_DRM_BUFFER_MESA:
+ return dri2_create_image_mesa_drm_buffer(disp, ctx, buffer, attr_list);
+ case EGL_LINUX_DMA_BUF_EXT:
+ return dri2_create_image_dma_buf(disp, ctx, buffer, attr_list);
+#endif
+#ifdef HAVE_WAYLAND_PLATFORM
+ case EGL_WAYLAND_BUFFER_WL:
+ return dri2_create_image_wayland_wl_buffer(disp, ctx, buffer, attr_list);
+#endif
+ default:
+ _eglError(EGL_BAD_PARAMETER, "dri2_create_image_khr");
+ return EGL_NO_IMAGE_KHR;
+ }
+}
+
+static EGLBoolean
+dri2_destroy_image_khr(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *image)
+{
+ struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+ struct dri2_egl_image *dri2_img = dri2_egl_image(image);
+
+ (void) drv;
+
+ dri2_dpy->image->destroyImage(dri2_img->dri_image);
+ free(dri2_img);
+
+ return EGL_TRUE;
+}
+
#ifdef HAVE_WAYLAND_PLATFORM
static void
@@ -2141,13 +2220,11 @@ dri2_bind_wayland_display_wl(_EGLDriver *drv, _EGLDisplay *disp,
wl_drm_callbacks.authenticate =
(int(*)(void *, uint32_t)) dri2_dpy->vtbl->authenticate;
-#ifdef HAVE_LIBDRM
if (drmGetCap(dri2_dpy->fd, DRM_CAP_PRIME, &cap) == 0 &&
cap == (DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT) &&
dri2_dpy->image->base.version >= 7 &&
dri2_dpy->image->createImageFromFds != NULL)
flags |= WAYLAND_DRM_PRIME;
-#endif
dri2_dpy->wl_server_drm =
wayland_drm_init(wl_dpy, dri2_dpy->device_name,
@@ -2351,18 +2428,12 @@ static EGLBoolean
dri2_load(_EGLDriver *drv)
{
struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
-#ifdef HAVE_SHARED_GLAPI
#ifdef HAVE_ANDROID_PLATFORM
const char *libname = "libglapi.so";
+#elif defined(__APPLE__)
+ const char *libname = "libglapi.0.dylib";
#else
const char *libname = "libglapi.so.0";
-#endif
-#else
- /*
- * Both libGL.so and libglapi.so are glapi providers. There is no way to
- * tell which one to load.
- */
- const char *libname = NULL;
#endif
void *handle;
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 9985c49f984..9aa2a8c1003 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -120,9 +120,9 @@ struct dri2_egl_display_vtbl {
EGLBoolean (*swap_buffers)(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLSurface *surf);
- EGLBoolean (*swap_buffers_with_damage)(_EGLDriver *drv, _EGLDisplay *dpy,
- _EGLSurface *surface,
- const EGLint *rects, EGLint n_rects);
+ EGLBoolean (*swap_buffers_with_damage)(_EGLDriver *drv, _EGLDisplay *dpy,
+ _EGLSurface *surface,
+ const EGLint *rects, EGLint n_rects);
EGLBoolean (*swap_buffers_region)(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLSurface *surf, EGLint numRects,
@@ -166,6 +166,7 @@ struct dri2_egl_display
const __DRIrobustnessExtension *robustness;
const __DRI2configQueryExtension *config;
const __DRI2fenceExtension *fence;
+ const __DRI2rendererQueryExtension *rendererQuery;
int fd;
int own_device;
@@ -285,6 +286,8 @@ struct dri2_egl_config
_EGLConfig base;
const __DRIconfig *dri_single_config;
const __DRIconfig *dri_double_config;
+ const __DRIconfig *dri_srgb_single_config;
+ const __DRIconfig *dri_srgb_double_config;
};
struct dri2_egl_image
@@ -357,4 +360,8 @@ dri2_initialize_surfaceless(_EGLDriver *drv, _EGLDisplay *disp);
void
dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw);
+const __DRIconfig *
+dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
+ EGLenum colorspace);
+
#endif /* EGL_DRI2_INCLUDED */
diff --git a/src/egl/drivers/dri2/platform_android.c b/src/egl/drivers/dri2/platform_android.c
index fed3073088a..4abe82f63a0 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -199,6 +199,7 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
struct dri2_egl_surface *dri2_surf;
struct ANativeWindow *window = native_window;
+ const __DRIconfig *config;
dri2_surf = calloc(1, sizeof *dri2_surf);
if (!dri2_surf) {
@@ -230,9 +231,11 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
window->query(window, NATIVE_WINDOW_HEIGHT, &dri2_surf->base.Height);
}
+ config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+ dri2_surf->base.GLColorspace);
+
dri2_surf->dri_drawable =
- (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen,
- dri2_conf->dri_double_config,
+ (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
dri2_surf);
if (dri2_surf->dri_drawable == NULL) {
_eglError(EGL_BAD_ALLOC, "dri2->createNewDrawable");
diff --git a/src/egl/drivers/dri2/platform_drm.c b/src/egl/drivers/dri2/platform_drm.c
index a62da4121fe..a439a3be6b6 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -115,8 +115,11 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
switch (type) {
case EGL_WINDOW_BIT:
- if (!window)
- return NULL;
+ if (!window) {
+ _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+ goto cleanup_surf;
+ }
+
surf = gbm_dri_surface(window);
dri2_surf->gbm_surf = surf;
dri2_surf->base.Width = surf->base.width;
@@ -128,10 +131,13 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
}
if (dri2_dpy->dri2) {
+ const __DRIconfig *config =
+ dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+ dri2_surf->base.GLColorspace);
+
dri2_surf->dri_drawable =
- (*dri2_dpy->dri2->createNewDrawable) (dri2_dpy->dri_screen,
- dri2_conf->dri_double_config,
- dri2_surf->gbm_surf);
+ (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
+ dri2_surf->gbm_surf);
} else {
assert(dri2_dpy->swrast != NULL);
diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index 1c985523862..dabaf1ebbd1 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -65,7 +65,7 @@ sync_callback(void *data, struct wl_callback *callback, uint32_t serial)
}
static const struct wl_callback_listener sync_listener = {
- sync_callback
+ .done = sync_callback
};
static int
@@ -104,8 +104,8 @@ wl_buffer_release(void *data, struct wl_buffer *buffer)
dri2_surf->color_buffers[i].locked = 0;
}
-static struct wl_buffer_listener wl_buffer_listener = {
- wl_buffer_release
+static const struct wl_buffer_listener wl_buffer_listener = {
+ .release = wl_buffer_release
};
static void
@@ -130,6 +130,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
struct wl_egl_window *window = native_window;
struct dri2_egl_surface *dri2_surf;
+ const __DRIconfig *config;
(void) drv;
@@ -138,7 +139,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
_eglError(EGL_BAD_ALLOC, "dri2_create_surface");
return NULL;
}
-
+
if (!_eglInitSurface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list))
goto cleanup_surf;
@@ -149,6 +150,11 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
else
dri2_surf->format = WL_DRM_FORMAT_ARGB8888;
+ if (!window) {
+ _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+ goto cleanup_surf;
+ }
+
dri2_surf->wl_win = window;
dri2_surf->wl_win->private = dri2_surf;
@@ -157,19 +163,19 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,
dri2_surf->base.Width = -1;
dri2_surf->base.Height = -1;
+ config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+ dri2_surf->base.GLColorspace);
+
dri2_surf->dri_drawable =
- (*dri2_dpy->dri2->createNewDrawable) (dri2_dpy->dri_screen,
- dri2_conf->dri_double_config,
- dri2_surf);
+ (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
+ dri2_surf);
if (dri2_surf->dri_drawable == NULL) {
_eglError(EGL_BAD_ALLOC, "dri2->createNewDrawable");
- goto cleanup_dri_drawable;
+ goto cleanup_surf;
}
return &dri2_surf->base;
- cleanup_dri_drawable:
- dri2_dpy->core->destroyDrawable(dri2_surf->dri_drawable);
cleanup_surf:
free(dri2_surf);
@@ -361,7 +367,7 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
}
if (dri2_surf->back->dri_image == NULL) {
- dri2_surf->back->dri_image =
+ dri2_surf->back->dri_image =
dri2_dpy->image->createImage(dri2_dpy->dri_screen,
dri2_surf->base.Width,
dri2_surf->base.Height,
@@ -595,7 +601,7 @@ wayland_throttle_callback(void *data,
}
static const struct wl_callback_listener throttle_listener = {
- wayland_throttle_callback
+ .done = wayland_throttle_callback
};
static void
@@ -839,22 +845,6 @@ bad_format:
return NULL;
}
-static char
-is_fd_render_node(int fd)
-{
- struct stat render;
-
- if (fstat(fd, &render))
- return 0;
-
- if (!S_ISCHR(render.st_mode))
- return 0;
-
- if (render.st_rdev & 0x80)
- return 1;
- return 0;
-}
-
static int
dri2_wl_authenticate(_EGLDisplay *disp, uint32_t id)
{
@@ -898,7 +888,7 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device)
return;
}
- if (is_fd_render_node(dri2_dpy->fd)) {
+ if (drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER) {
dri2_dpy->authenticated = 1;
} else {
drmGetMagic(dri2_dpy->fd, &magic);
@@ -941,10 +931,10 @@ drm_handle_authenticated(void *data, struct wl_drm *drm)
}
static const struct wl_drm_listener drm_listener = {
- drm_handle_device,
- drm_handle_format,
- drm_handle_authenticated,
- drm_handle_capabilities
+ .device = drm_handle_device,
+ .format = drm_handle_format,
+ .authenticated = drm_handle_authenticated,
+ .capabilities = drm_handle_capabilities
};
static void
@@ -969,8 +959,8 @@ registry_handle_global_remove(void *data, struct wl_registry *registry,
}
static const struct wl_registry_listener registry_listener_drm = {
- registry_handle_global_drm,
- registry_handle_global_remove
+ .global = registry_handle_global_drm,
+ .global_remove = registry_handle_global_remove
};
static EGLBoolean
@@ -1108,7 +1098,7 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp)
* will return a render-node when the requested gpu is different
* to the server, but also if the client asks for the same gpu than
* the server by requesting its pci-id */
- dri2_dpy->is_render_node = is_fd_render_node(dri2_dpy->fd);
+ dri2_dpy->is_render_node = drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER;
dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
if (dri2_dpy->driver_name == NULL) {
@@ -1220,7 +1210,7 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp)
wl_event_queue_destroy(dri2_dpy->wl_queue);
cleanup_dpy:
free(dri2_dpy);
-
+
return EGL_FALSE;
}
@@ -1726,7 +1716,7 @@ shm_handle_format(void *data, struct wl_shm *shm, uint32_t format)
}
static const struct wl_shm_listener shm_listener = {
- shm_handle_format
+ .format = shm_handle_format
};
static void
@@ -1743,8 +1733,8 @@ registry_handle_global_swrast(void *data, struct wl_registry *registry, uint32_t
}
static const struct wl_registry_listener registry_listener_swrast = {
- registry_handle_global_swrast,
- registry_handle_global_remove
+ .global = registry_handle_global_swrast,
+ .global_remove = registry_handle_global_remove
};
static struct dri2_egl_display_vtbl dri2_wl_swrast_display_vtbl = {
diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c
index 56c14288204..bf7d2bea4c1 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -56,7 +56,7 @@ swrastCreateDrawable(struct dri2_egl_display * dri2_dpy,
uint32_t mask;
const uint32_t function = GXcopy;
uint32_t valgc[2];
-
+
/* create GC's */
dri2_surf->gc = xcb_generate_id(dri2_dpy->conn);
mask = XCB_GC_FUNCTION;
@@ -226,7 +226,7 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
screen = get_xcb_screen(s, dri2_dpy->screen);
if (!screen) {
- _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+ _eglError(EGL_BAD_ALLOC, "failed to get xcb screen");
goto cleanup_surf;
}
@@ -235,16 +235,23 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
dri2_surf->drawable, screen->root,
dri2_surf->base.Width, dri2_surf->base.Height);
} else {
+ if (!drawable) {
+ if (type == EGL_WINDOW_BIT)
+ _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_create_surface");
+ else
+ _eglError(EGL_BAD_NATIVE_PIXMAP, "dri2_create_surface");
+ goto cleanup_surf;
+ }
dri2_surf->drawable = drawable;
}
if (dri2_dpy->dri2) {
- dri2_surf->dri_drawable =
- (*dri2_dpy->dri2->createNewDrawable) (dri2_dpy->dri_screen,
- type == EGL_WINDOW_BIT ?
- dri2_conf->dri_double_config :
- dri2_conf->dri_single_config,
- dri2_surf);
+ const __DRIconfig *config =
+ dri2_get_dri_config(dri2_conf, type, dri2_surf->base.GLColorspace);
+
+ dri2_surf->dri_drawable =
+ (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
+ dri2_surf);
} else {
assert(dri2_dpy->swrast);
dri2_surf->dri_drawable =
@@ -261,10 +268,18 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
if (type != EGL_PBUFFER_BIT) {
cookie = xcb_get_geometry (dri2_dpy->conn, dri2_surf->drawable);
reply = xcb_get_geometry_reply (dri2_dpy->conn, cookie, &error);
- if (reply == NULL || error != NULL) {
- _eglError(EGL_BAD_ALLOC, "xcb_get_geometry");
- free(error);
- goto cleanup_dri_drawable;
+ if (error != NULL) {
+ if (error->error_code == BadAlloc)
+ _eglError(EGL_BAD_ALLOC, "xcb_get_geometry");
+ else if (type == EGL_WINDOW_BIT)
+ _eglError(EGL_BAD_NATIVE_WINDOW, "xcb_get_geometry");
+ else
+ _eglError(EGL_BAD_NATIVE_PIXMAP, "xcb_get_geometry");
+ free(error);
+ goto cleanup_dri_drawable;
+ } else if (reply == NULL) {
+ _eglError(EGL_BAD_ALLOC, "xcb_get_geometry");
+ goto cleanup_dri_drawable;
}
dri2_surf->base.Width = reply->width;
@@ -274,7 +289,25 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
}
if (dri2_dpy->dri2) {
- xcb_dri2_create_drawable (dri2_dpy->conn, dri2_surf->drawable);
+ xcb_void_cookie_t cookie;
+ int conn_error;
+
+ cookie = xcb_dri2_create_drawable_checked(dri2_dpy->conn,
+ dri2_surf->drawable);
+ error = xcb_request_check(dri2_dpy->conn, cookie);
+ conn_error = xcb_connection_has_error(dri2_dpy->conn);
+ if (conn_error || error != NULL) {
+ if (type == EGL_PBUFFER_BIT || conn_error || error->error_code == BadAlloc)
+ _eglError(EGL_BAD_ALLOC, "xcb_dri2_create_drawable_checked");
+ else if (type == EGL_WINDOW_BIT)
+ _eglError(EGL_BAD_NATIVE_WINDOW,
+ "xcb_dri2_create_drawable_checked");
+ else
+ _eglError(EGL_BAD_NATIVE_PIXMAP,
+ "xcb_dri2_create_drawable_checked");
+ free(error);
+ goto cleanup_dri_drawable;
+ }
} else {
if (type == EGL_PBUFFER_BIT) {
dri2_surf->depth = _eglGetConfigKey(conf, EGL_BUFFER_SIZE);
@@ -515,7 +548,7 @@ dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
xcb_generic_error_t *error;
xcb_screen_iterator_t s;
xcb_screen_t *screen;
- char *driver_name, *device_name;
+ char *driver_name, *loader_driver_name, *device_name;
const xcb_query_extension_reply_t *extension;
xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_xfixes_id);
@@ -540,7 +573,7 @@ dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
screen = get_xcb_screen(s, dri2_dpy->screen);
if (!screen) {
- _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_x11_connect");
+ _eglLog(_EGL_WARNING, "DRI2: failed to get xcb screen");
return EGL_FALSE;
}
connect_cookie = xcb_dri2_connect_unchecked(dri2_dpy->conn, screen->root,
@@ -575,18 +608,38 @@ dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
return EGL_FALSE;
}
- driver_name = xcb_dri2_connect_driver_name (connect);
- dri2_dpy->driver_name =
- strndup(driver_name,
- xcb_dri2_connect_driver_name_length(connect));
-
device_name = xcb_dri2_connect_device_name (connect);
dri2_dpy->device_name =
strndup(device_name,
xcb_dri2_connect_device_name_length(connect));
+ dri2_dpy->fd = loader_open_device(dri2_dpy->device_name);
+ if (dri2_dpy->fd == -1) {
+ _eglLog(_EGL_WARNING,
+ "DRI2: could not open %s (%s)", dri2_dpy->device_name,
+ strerror(errno));
+ free(dri2_dpy->device_name);
+ free(connect);
+ return EGL_FALSE;
+ }
+
+ driver_name = xcb_dri2_connect_driver_name (connect);
+
+ /* If Mesa knows about the appropriate driver for this fd, then trust it.
+ * Otherwise, default to the server's value.
+ */
+ loader_driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
+ if (loader_driver_name) {
+ dri2_dpy->driver_name = loader_driver_name;
+ } else {
+ dri2_dpy->driver_name =
+ strndup(driver_name,
+ xcb_dri2_connect_driver_name_length(connect));
+ }
+
if (dri2_dpy->device_name == NULL || dri2_dpy->driver_name == NULL) {
+ close(dri2_dpy->fd);
free(dri2_dpy->device_name);
free(dri2_dpy->driver_name);
free(connect);
@@ -611,7 +664,7 @@ dri2_x11_authenticate(_EGLDisplay *disp, uint32_t id)
screen = get_xcb_screen(s, dri2_dpy->screen);
if (!screen) {
- _eglError(EGL_BAD_NATIVE_WINDOW, "dri2_x11_authenticate");
+ _eglLog(_EGL_WARNING, "DRI2: failed to get xcb screen");
return -1;
}
@@ -1099,7 +1152,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
dri2_dpy->screen = DefaultScreen(dpy);
}
- if (xcb_connection_has_error(dri2_dpy->conn)) {
+ if (!dri2_dpy->conn || xcb_connection_has_error(dri2_dpy->conn)) {
_eglLog(_EGL_WARNING, "DRI2: xcb_connect failed");
goto cleanup_dpy;
}
@@ -1125,10 +1178,8 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
if (!dri2_create_screen(disp))
goto cleanup_driver;
- if (dri2_dpy->conn) {
- if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
- goto cleanup_configs;
- }
+ if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+ goto cleanup_configs;
/* Fill vtbl last to prevent accidentally calling virtual function during
* initialization.
@@ -1218,31 +1269,19 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
dri2_dpy->screen = DefaultScreen(dpy);
}
- if (xcb_connection_has_error(dri2_dpy->conn)) {
+ if (!dri2_dpy->conn || xcb_connection_has_error(dri2_dpy->conn)) {
_eglLog(_EGL_WARNING, "DRI2: xcb_connect failed");
goto cleanup_dpy;
}
- if (dri2_dpy->conn) {
- if (!dri2_x11_connect(dri2_dpy))
- goto cleanup_conn;
- }
-
- if (!dri2_load_driver(disp))
+ if (!dri2_x11_connect(dri2_dpy))
goto cleanup_conn;
- dri2_dpy->fd = loader_open_device(dri2_dpy->device_name);
- if (dri2_dpy->fd == -1) {
- _eglLog(_EGL_WARNING,
- "DRI2: could not open %s (%s)", dri2_dpy->device_name,
- strerror(errno));
- goto cleanup_driver;
- }
+ if (!dri2_x11_local_authenticate(disp))
+ goto cleanup_fd;
- if (dri2_dpy->conn) {
- if (!dri2_x11_local_authenticate(disp))
- goto cleanup_fd;
- }
+ if (!dri2_load_driver(disp))
+ goto cleanup_fd;
if (dri2_dpy->dri2_minor >= 1) {
dri2_dpy->dri2_loader_extension.base.name = __DRI_DRI2_LOADER;
@@ -1267,7 +1306,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
dri2_dpy->invalidate_available = (dri2_dpy->dri2_minor >= 3);
if (!dri2_create_screen(disp))
- goto cleanup_fd;
+ goto cleanup_driver;
dri2_x11_setup_swap_interval(dri2_dpy);
@@ -1281,10 +1320,8 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
#endif
- if (dri2_dpy->conn) {
- if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
- goto cleanup_configs;
- }
+ if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+ goto cleanup_configs;
/* Fill vtbl last to prevent accidentally calling virtual function during
* initialization.
@@ -1296,10 +1333,10 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
cleanup_configs:
_eglCleanupDisplay(disp);
dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
- cleanup_fd:
- close(dri2_dpy->fd);
cleanup_driver:
dlclose(dri2_dpy->driver);
+ cleanup_fd:
+ close(dri2_dpy->fd);
cleanup_conn:
if (disp->PlatformDisplay == NULL)
xcb_disconnect(dri2_dpy->conn);
diff --git a/src/egl/drivers/haiku/SConscript b/src/egl/drivers/haiku/SConscript
deleted file mode 100644
index ec6020ece77..00000000000
--- a/src/egl/drivers/haiku/SConscript
+++ /dev/null
@@ -1,29 +0,0 @@
-Import('*')
-
-env = env.Clone()
-
-env.Append(CPPDEFINES = [
- 'DEFAULT_DRIVER_DIR=\\"\\"',
-])
-
-env.Append(CPPPATH = [
- '#/include',
- '#/src/egl/main',
-])
-
-sources = [
- 'egl_haiku.cpp'
-]
-
-if env['platform'] == 'haiku':
- env.Append(CPPDEFINES = [
- 'HAVE_HAIKU_PLATFORM',
- '_EGL_NATIVE_PLATFORM=haiku',
- ])
-
-egl_haiku = env.ConvenienceLibrary(
- target = 'egl_haiku',
- source = sources,
-)
-
-Export('egl_haiku')
diff --git a/src/egl/drivers/haiku/egl_haiku.cpp b/src/egl/drivers/haiku/egl_haiku.cpp
index 3d00e47c8e6..ef74f657b14 100644
--- a/src/egl/drivers/haiku/egl_haiku.cpp
+++ b/src/egl/drivers/haiku/egl_haiku.cpp
@@ -92,8 +92,11 @@ haiku_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
return NULL;
}
- if (!_eglInitSurface(&surface->surf, disp, EGL_WINDOW_BIT, conf, attrib_list))
- goto cleanup_surface;
+ if (!_eglInitSurface(&surface->surf, disp, EGL_WINDOW_BIT,
+ conf, attrib_list)) {
+ free(surface);
+ return NULL;
+ }
(&surface->surf)->SwapInterval = 1;
@@ -110,10 +113,6 @@ haiku_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
TRACE("Showing window\n");
win->Show();
return &surface->surf;
-
-cleanup_surface:
- free(surface);
- return NULL;
}
@@ -139,7 +138,7 @@ haiku_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
if (_eglPutSurface(surf)) {
// XXX: detach haiku_egl_surface::gl from the native window and destroy it
free(surf);
- }
+ }
return EGL_TRUE;
}
@@ -153,7 +152,7 @@ haiku_add_configs_for_visuals(_EGLDisplay *dpy)
conf = (struct haiku_egl_config*) calloc(1, sizeof (*conf));
if (!conf) {
_eglError(EGL_BAD_ALLOC, "haiku_add_configs_for_visuals");
- return NULL;
+ return EGL_FALSE;
}
_eglInitConfig(&conf->base, dpy, 1);
@@ -165,7 +164,7 @@ haiku_add_configs_for_visuals(_EGLDisplay *dpy)
_eglSetConfigKey(&conf->base, EGL_LUMINANCE_SIZE, 0);
_eglSetConfigKey(&conf->base, EGL_ALPHA_SIZE, 8);
_eglSetConfigKey(&conf->base, EGL_COLOR_BUFFER_TYPE, EGL_RGB_BUFFER);
- EGLint r = (_eglGetConfigKey(&conf->base, EGL_RED_SIZE)
+ EGLint r = (_eglGetConfigKey(&conf->base, EGL_RED_SIZE)
+ _eglGetConfigKey(&conf->base, EGL_GREEN_SIZE)
+ _eglGetConfigKey(&conf->base, EGL_BLUE_SIZE)
+ _eglGetConfigKey(&conf->base, EGL_ALPHA_SIZE));
@@ -195,7 +194,7 @@ haiku_add_configs_for_visuals(_EGLDisplay *dpy)
goto cleanup;
}
TRACE("Validated config\n");
-
+
_eglLinkConfig(&conf->base);
if (!_eglGetArraySize(dpy->Configs)) {
_eglLog(_EGL_WARNING, "Haiku: failed to create any config");
@@ -210,6 +209,7 @@ cleanup:
return EGL_FALSE;
}
+
extern "C"
EGLBoolean
init_haiku(_EGLDriver *drv, _EGLDisplay *dpy)
@@ -221,7 +221,7 @@ init_haiku(_EGLDriver *drv, _EGLDisplay *dpy)
return EGL_FALSE;
dpy->Version = 14;
-
+
TRACE("Initialization finished\n");
return EGL_TRUE;
@@ -271,7 +271,7 @@ haiku_destroy_context(_EGLDriver* drv, _EGLDisplay *disp, _EGLContext* ctx)
if (_eglPutContext(ctx)) {
// XXX: teardown the context ?
free(context);
- ctx = NULL
+ ctx = NULL;
}
return EGL_TRUE;
}
@@ -280,7 +280,7 @@ haiku_destroy_context(_EGLDriver* drv, _EGLDisplay *disp, _EGLContext* ctx)
extern "C"
EGLBoolean
haiku_make_current(_EGLDriver* drv, _EGLDisplay* dpy, _EGLSurface *dsurf,
- _EGLSurface *rsurf, _EGLContext *ctx)
+ _EGLSurface *rsurf, _EGLContext *ctx)
{
CALLED();
@@ -314,7 +314,7 @@ extern "C"
void
haiku_unload(_EGLDriver* drv)
{
-
+
}
diff --git a/src/egl/main/Makefile.sources b/src/egl/main/Makefile.sources
deleted file mode 100644
index e39a80f14a6..00000000000
--- a/src/egl/main/Makefile.sources
+++ /dev/null
@@ -1,31 +0,0 @@
-LIBEGL_C_FILES := \
- eglapi.c \
- eglapi.h \
- eglarray.c \
- eglarray.h \
- eglcompiler.h \
- eglconfig.c \
- eglconfig.h \
- eglcontext.c \
- eglcontext.h \
- eglcurrent.c \
- eglcurrent.h \
- egldefines.h \
- egldisplay.c \
- egldisplay.h \
- egldriver.c \
- egldriver.h \
- eglfallbacks.c \
- eglglobals.c \
- eglglobals.h \
- eglimage.c \
- eglimage.h \
- egllog.c \
- egllog.h \
- eglstring.c \
- eglstring.h \
- eglsurface.c \
- eglsurface.h \
- eglsync.c \
- eglsync.h \
- egltypedefs.h
diff --git a/src/egl/main/SConscript b/src/egl/main/SConscript
deleted file mode 100644
index c0012831bb9..00000000000
--- a/src/egl/main/SConscript
+++ /dev/null
@@ -1,52 +0,0 @@
-#######################################################################
-# SConscript for EGL
-
-
-Import('*')
-
-env = env.Clone()
-
-env.Append(CPPDEFINES = [
- '_EGL_DRIVER_SEARCH_DIR=\\"\\"',
-])
-
-if env['platform'] == 'haiku':
- env.Append(CPPDEFINES = [
- '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
- '_EGL_OS_UNIX',
- '_EGL_BUILT_IN_DRIVER_HAIKU',
- ])
- env.Prepend(LIBS = [
- egl_haiku,
- libloader,
- ])
-else:
- env.Append(CPPDEFINES = [
- '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_X11',
- '_EGL_OS_UNIX',
- ])
- if env['dri']:
- env.Prepend(LIBS = [
- egl_dri2,
- libloader,
- ])
- # Disallow undefined symbols
- if env['platform'] != 'darwin':
- env.Append(SHLINKFLAGS = ['-Wl,-z,defs'])
-
-env.Append(CPPPATH = [
- '#/include',
-])
-
-
-# parse Makefile.sources
-egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
-
-egl = env.SharedLibrary(
- target = 'EGL',
- source = egl_sources,
-)
-
-egl = env.InstallSharedLibrary(egl, version=(1, 0, 0))
-
-env.Alias('egl', egl)
diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c
index 105e919683a..323634e4511 100644
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -100,7 +100,6 @@
#include "eglconfig.h"
#include "eglimage.h"
#include "eglsync.h"
-#include "eglstring.h"
/**
@@ -381,48 +380,47 @@ _eglCreateExtensionsString(_EGLDisplay *dpy)
char *exts = dpy->ExtensionsString;
- _EGL_CHECK_EXTENSION(MESA_drm_display);
- _EGL_CHECK_EXTENSION(MESA_drm_image);
- _EGL_CHECK_EXTENSION(MESA_configless_context);
-
- _EGL_CHECK_EXTENSION(WL_bind_wayland_display);
- _EGL_CHECK_EXTENSION(WL_create_wayland_buffer_from_image);
-
- _EGL_CHECK_EXTENSION(KHR_image_base);
- _EGL_CHECK_EXTENSION(KHR_image_pixmap);
- if (dpy->Extensions.KHR_image_base && dpy->Extensions.KHR_image_pixmap)
- _eglAppendExtension(&exts, "EGL_KHR_image");
-
- _EGL_CHECK_EXTENSION(KHR_vg_parent_image);
- _EGL_CHECK_EXTENSION(KHR_get_all_proc_addresses);
- _EGL_CHECK_EXTENSION(KHR_gl_texture_2D_image);
- _EGL_CHECK_EXTENSION(KHR_gl_texture_cubemap_image);
- _EGL_CHECK_EXTENSION(KHR_gl_texture_3D_image);
- _EGL_CHECK_EXTENSION(KHR_gl_renderbuffer_image);
-
- _EGL_CHECK_EXTENSION(KHR_reusable_sync);
- _EGL_CHECK_EXTENSION(KHR_fence_sync);
- _EGL_CHECK_EXTENSION(KHR_wait_sync);
- _EGL_CHECK_EXTENSION(KHR_cl_event2);
-
- _EGL_CHECK_EXTENSION(KHR_surfaceless_context);
- _EGL_CHECK_EXTENSION(KHR_create_context);
-
- _EGL_CHECK_EXTENSION(NOK_swap_region);
- _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap);
-
+ /* Please keep these sorted alphabetically. */
_EGL_CHECK_EXTENSION(ANDROID_image_native_buffer);
_EGL_CHECK_EXTENSION(CHROMIUM_sync_control);
- _EGL_CHECK_EXTENSION(EXT_create_context_robustness);
_EGL_CHECK_EXTENSION(EXT_buffer_age);
- _EGL_CHECK_EXTENSION(EXT_swap_buffers_with_damage);
+ _EGL_CHECK_EXTENSION(EXT_create_context_robustness);
_EGL_CHECK_EXTENSION(EXT_image_dma_buf_import);
+ _EGL_CHECK_EXTENSION(EXT_swap_buffers_with_damage);
+
+ _EGL_CHECK_EXTENSION(KHR_cl_event2);
+ _EGL_CHECK_EXTENSION(KHR_create_context);
+ _EGL_CHECK_EXTENSION(KHR_fence_sync);
+ _EGL_CHECK_EXTENSION(KHR_get_all_proc_addresses);
+ _EGL_CHECK_EXTENSION(KHR_gl_colorspace);
+ _EGL_CHECK_EXTENSION(KHR_gl_renderbuffer_image);
+ _EGL_CHECK_EXTENSION(KHR_gl_texture_2D_image);
+ _EGL_CHECK_EXTENSION(KHR_gl_texture_3D_image);
+ _EGL_CHECK_EXTENSION(KHR_gl_texture_cubemap_image);
+ if (dpy->Extensions.KHR_image_base && dpy->Extensions.KHR_image_pixmap)
+ _eglAppendExtension(&exts, "EGL_KHR_image");
+ _EGL_CHECK_EXTENSION(KHR_image_base);
+ _EGL_CHECK_EXTENSION(KHR_image_pixmap);
+ _EGL_CHECK_EXTENSION(KHR_reusable_sync);
+ _EGL_CHECK_EXTENSION(KHR_surfaceless_context);
+ _EGL_CHECK_EXTENSION(KHR_vg_parent_image);
+ _EGL_CHECK_EXTENSION(KHR_wait_sync);
+
+ _EGL_CHECK_EXTENSION(MESA_configless_context);
+ _EGL_CHECK_EXTENSION(MESA_drm_display);
+ _EGL_CHECK_EXTENSION(MESA_drm_image);
+ _EGL_CHECK_EXTENSION(MESA_image_dma_buf_export);
+
+ _EGL_CHECK_EXTENSION(NOK_swap_region);
+ _EGL_CHECK_EXTENSION(NOK_texture_from_pixmap);
_EGL_CHECK_EXTENSION(NV_post_sub_buffer);
- _EGL_CHECK_EXTENSION(MESA_image_dma_buf_export);
+ _EGL_CHECK_EXTENSION(WL_bind_wayland_display);
+ _EGL_CHECK_EXTENSION(WL_create_wayland_buffer_from_image);
+
#undef _EGL_CHECK_EXTENSION
}
@@ -507,7 +505,7 @@ eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor)
_eglComputeVersion(disp);
_eglCreateExtensionsString(disp);
_eglCreateAPIsString(disp);
- _eglsnprintf(disp->VersionString, sizeof(disp->VersionString),
+ snprintf(disp->VersionString, sizeof(disp->VersionString),
"%d.%d (%s)", disp->Version / 10, disp->Version % 10,
disp->Driver->Name);
}
@@ -1015,8 +1013,6 @@ eglSwapBuffers(EGLDisplay dpy, EGLSurface surface)
}
-#ifdef EGL_EXT_swap_buffers_with_damage
-
static EGLBoolean EGLAPIENTRY
eglSwapBuffersWithDamageEXT(EGLDisplay dpy, EGLSurface surface,
EGLint *rects, EGLint n_rects)
@@ -1042,8 +1038,6 @@ eglSwapBuffersWithDamageEXT(EGLDisplay dpy, EGLSurface surface,
RETURN_EGL_EVAL(disp, ret);
}
-#endif /* EGL_EXT_swap_buffers_with_damage */
-
EGLBoolean EGLAPIENTRY
eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target)
{
@@ -1204,8 +1198,6 @@ eglGetError(void)
}
-#ifdef EGL_MESA_drm_display
-
static EGLDisplay EGLAPIENTRY
eglGetDRMDisplayMESA(int fd)
{
@@ -1213,8 +1205,6 @@ eglGetDRMDisplayMESA(int fd)
return _eglGetDisplayHandle(dpy);
}
-#endif /* EGL_MESA_drm_display */
-
/**
** EGL 1.2
**/
@@ -1580,8 +1570,6 @@ eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *valu
}
-#ifdef EGL_NOK_swap_region
-
static EGLBoolean EGLAPIENTRY
eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface,
EGLint numRects, const EGLint *rects)
@@ -1607,10 +1595,6 @@ eglSwapBuffersRegionNOK(EGLDisplay dpy, EGLSurface surface,
RETURN_EGL_EVAL(disp, ret);
}
-#endif /* EGL_NOK_swap_region */
-
-
-#ifdef EGL_MESA_drm_image
static EGLImage EGLAPIENTRY
eglCreateDRMImageMESA(EGLDisplay dpy, const EGLint *attr_list)
@@ -1650,9 +1634,7 @@ eglExportDRMImageMESA(EGLDisplay dpy, EGLImage image,
RETURN_EGL_EVAL(disp, ret);
}
-#endif
-#ifdef EGL_WL_bind_wayland_display
struct wl_display;
static EGLBoolean EGLAPIENTRY
@@ -1709,9 +1691,8 @@ eglQueryWaylandBufferWL(EGLDisplay dpy, struct wl_resource *buffer,
RETURN_EGL_EVAL(disp, ret);
}
-#endif
-#ifdef EGL_WL_create_wayland_buffer_from_image
+
static struct wl_buffer * EGLAPIENTRY
eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImage image)
{
@@ -1732,7 +1713,6 @@ eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImage image)
RETURN_EGL_EVAL(disp, ret);
}
-#endif
static EGLBoolean EGLAPIENTRY
eglPostSubBufferNV(EGLDisplay dpy, EGLSurface surface,
@@ -1775,7 +1755,6 @@ eglGetSyncValuesCHROMIUM(EGLDisplay display, EGLSurface surface,
RETURN_EGL_EVAL(disp, ret);
}
-#ifdef EGL_MESA_image_dma_buf_export
static EGLBoolean EGLAPIENTRY
eglExportDMABUFImageQueryMESA(EGLDisplay dpy, EGLImage image,
EGLint *fourcc, EGLint *nplanes,
@@ -1817,7 +1796,6 @@ eglExportDMABUFImageMESA(EGLDisplay dpy, EGLImage image,
RETURN_EGL_EVAL(disp, ret);
}
-#endif
__eglMustCastToProperFunctionPointerType EGLAPIENTRY
eglGetProcAddress(const char *procname)
@@ -1874,9 +1852,7 @@ eglGetProcAddress(const char *procname)
{ "eglGetPlatformDisplay", (_EGLProc) eglGetPlatformDisplay },
{ "eglCreatePlatformWindowSurface", (_EGLProc) eglCreatePlatformWindowSurface },
{ "eglCreatePlatformPixmapSurface", (_EGLProc) eglCreatePlatformPixmapSurface },
-#ifdef EGL_MESA_drm_display
{ "eglGetDRMDisplayMESA", (_EGLProc) eglGetDRMDisplayMESA },
-#endif
{ "eglCreateImageKHR", (_EGLProc) eglCreateImageKHR },
{ "eglDestroyImageKHR", (_EGLProc) eglDestroyImage },
{ "eglCreateSyncKHR", (_EGLProc) eglCreateSyncKHR },
@@ -1886,33 +1862,21 @@ eglGetProcAddress(const char *procname)
{ "eglWaitSyncKHR", (_EGLProc) eglWaitSyncKHR },
{ "eglSignalSyncKHR", (_EGLProc) eglSignalSyncKHR },
{ "eglGetSyncAttribKHR", (_EGLProc) eglGetSyncAttribKHR },
-#ifdef EGL_NOK_swap_region
{ "eglSwapBuffersRegionNOK", (_EGLProc) eglSwapBuffersRegionNOK },
-#endif
-#ifdef EGL_MESA_drm_image
{ "eglCreateDRMImageMESA", (_EGLProc) eglCreateDRMImageMESA },
{ "eglExportDRMImageMESA", (_EGLProc) eglExportDRMImageMESA },
-#endif
-#ifdef EGL_WL_bind_wayland_display
{ "eglBindWaylandDisplayWL", (_EGLProc) eglBindWaylandDisplayWL },
{ "eglUnbindWaylandDisplayWL", (_EGLProc) eglUnbindWaylandDisplayWL },
{ "eglQueryWaylandBufferWL", (_EGLProc) eglQueryWaylandBufferWL },
-#endif
-#ifdef EGL_WL_create_wayland_buffer_from_image
{ "eglCreateWaylandBufferFromImageWL", (_EGLProc) eglCreateWaylandBufferFromImageWL },
-#endif
{ "eglPostSubBufferNV", (_EGLProc) eglPostSubBufferNV },
-#ifdef EGL_EXT_swap_buffers_with_damage
{ "eglSwapBuffersWithDamageEXT", (_EGLProc) eglSwapBuffersWithDamageEXT },
-#endif
{ "eglGetPlatformDisplayEXT", (_EGLProc) eglGetPlatformDisplayEXT },
{ "eglCreatePlatformWindowSurfaceEXT", (_EGLProc) eglCreatePlatformWindowSurfaceEXT },
{ "eglCreatePlatformPixmapSurfaceEXT", (_EGLProc) eglCreatePlatformPixmapSurfaceEXT },
{ "eglGetSyncValuesCHROMIUM", (_EGLProc) eglGetSyncValuesCHROMIUM },
-#ifdef EGL_MESA_image_dma_buf_export
{ "eglExportDMABUFImageQueryMESA", (_EGLProc) eglExportDMABUFImageQueryMESA },
{ "eglExportDMABUFImageMESA", (_EGLProc) eglExportDMABUFImageMESA },
-#endif
{ NULL, NULL }
};
EGLint i;
diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h
index 4e0378d0d5f..6c54c7c410d 100644
--- a/src/egl/main/eglapi.h
+++ b/src/egl/main/eglapi.h
@@ -99,41 +99,29 @@ typedef EGLBoolean (*SignalSyncKHR_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSyn
typedef EGLBoolean (*GetSyncAttrib_t)(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync, EGLint attribute, EGLAttrib *value);
-#ifdef EGL_NOK_swap_region
typedef EGLBoolean (*SwapBuffersRegionNOK_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, EGLint numRects, const EGLint *rects);
-#endif
-#ifdef EGL_MESA_drm_image
typedef _EGLImage *(*CreateDRMImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, const EGLint *attr_list);
typedef EGLBoolean (*ExportDRMImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *name, EGLint *handle, EGLint *stride);
-#endif
-#ifdef EGL_WL_bind_wayland_display
struct wl_display;
typedef EGLBoolean (*BindWaylandDisplayWL_t)(_EGLDriver *drv, _EGLDisplay *disp, struct wl_display *display);
typedef EGLBoolean (*UnbindWaylandDisplayWL_t)(_EGLDriver *drv, _EGLDisplay *disp, struct wl_display *display);
typedef EGLBoolean (*QueryWaylandBufferWL_t)(_EGLDriver *drv, _EGLDisplay *displ, struct wl_resource *buffer, EGLint attribute, EGLint *value);
-#endif
-#ifdef EGL_WL_create_wayland_buffer_from_image
typedef struct wl_buffer * (*CreateWaylandBufferFromImageWL_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img);
-#endif
typedef EGLBoolean (*PostSubBufferNV_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surface, EGLint x, EGLint y, EGLint width, EGLint height);
typedef EGLint (*QueryBufferAge_t)(_EGLDriver *drv,
_EGLDisplay *dpy, _EGLSurface *surface);
-#ifdef EGL_EXT_swap_buffers_with_damage
typedef EGLBoolean (*SwapBuffersWithDamageEXT_t) (_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface, const EGLint *rects, EGLint n_rects);
-#endif
typedef EGLBoolean (*GetSyncValuesCHROMIUM_t) (_EGLDisplay *dpy, _EGLSurface *surface, EGLuint64KHR *ust, EGLuint64KHR *msc, EGLuint64KHR *sbc);
-#ifdef EGL_MESA_image_dma_buf_export
typedef EGLBoolean (*ExportDMABUFImageQueryMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *fourcc, EGLint *nplanes, EGLuint64KHR *modifiers);
typedef EGLBoolean (*ExportDMABUFImageMESA_t)(_EGLDriver *drv, _EGLDisplay *disp, _EGLImage *img, EGLint *fds, EGLint *strides, EGLint *offsets);
-#endif
/**
* The API dispatcher jumps through these functions
@@ -180,38 +168,26 @@ struct _egl_api
SignalSyncKHR_t SignalSyncKHR;
GetSyncAttrib_t GetSyncAttrib;
-#ifdef EGL_NOK_swap_region
SwapBuffersRegionNOK_t SwapBuffersRegionNOK;
-#endif
-#ifdef EGL_MESA_drm_image
CreateDRMImageMESA_t CreateDRMImageMESA;
ExportDRMImageMESA_t ExportDRMImageMESA;
-#endif
-#ifdef EGL_WL_bind_wayland_display
BindWaylandDisplayWL_t BindWaylandDisplayWL;
UnbindWaylandDisplayWL_t UnbindWaylandDisplayWL;
QueryWaylandBufferWL_t QueryWaylandBufferWL;
-#endif
-#ifdef EGL_WL_create_wayland_buffer_from_image
CreateWaylandBufferFromImageWL_t CreateWaylandBufferFromImageWL;
-#endif
-#ifdef EGL_EXT_swap_buffers_with_damage
SwapBuffersWithDamageEXT_t SwapBuffersWithDamageEXT;
-#endif /* EGL_EXT_swap_buffers_with_damage */
PostSubBufferNV_t PostSubBufferNV;
QueryBufferAge_t QueryBufferAge;
GetSyncValuesCHROMIUM_t GetSyncValuesCHROMIUM;
-#ifdef EGL_MESA_image_dma_buf_export
ExportDMABUFImageQueryMESA_t ExportDMABUFImageQueryMESA;
ExportDMABUFImageMESA_t ExportDMABUFImageMESA;
-#endif
};
diff --git a/src/egl/main/eglarray.c b/src/egl/main/eglarray.c
index 3ccc8a649f0..d2f39af49a6 100644
--- a/src/egl/main/eglarray.c
+++ b/src/egl/main/eglarray.c
@@ -197,6 +197,9 @@ _eglFlattenArray(_EGLArray *array, void *buffer, EGLint elem_size, EGLint size,
count = array->Size;
if (buffer) {
+ /* clamp size to 0 */
+ if (size < 0)
+ size = 0;
/* do not exceed buffer size */
if (count > size)
count = size;
diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c
index cf65c69b7b4..c445d9b0c92 100644
--- a/src/egl/main/eglconfig.c
+++ b/src/egl/main/eglconfig.c
@@ -83,7 +83,8 @@ _eglLinkConfig(_EGLConfig *conf)
_EGLDisplay *dpy = conf->Display;
/* sanity check */
- assert(dpy && conf->ConfigID > 0);
+ assert(dpy);
+ assert(conf->ConfigID > 0);
if (!dpy->Configs) {
dpy->Configs = _eglCreateArray("Config", 16);
diff --git a/src/egl/main/eglcontext.c b/src/egl/main/eglcontext.c
index e767f4b1abe..588f48921f2 100644
--- a/src/egl/main/eglcontext.c
+++ b/src/egl/main/eglcontext.c
@@ -101,11 +101,42 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,
switch (attr) {
case EGL_CONTEXT_CLIENT_VERSION:
+ /* The EGL 1.4 spec says:
+ *
+ * "attribute EGL_CONTEXT_CLIENT_VERSION is only valid when the
+ * current rendering API is EGL_OPENGL_ES_API"
+ *
+ * The EGL_KHR_create_context spec says:
+ *
+ * "EGL_CONTEXT_MAJOR_VERSION_KHR 0x3098
+ * (this token is an alias for EGL_CONTEXT_CLIENT_VERSION)"
+ *
+ * "The values for attributes EGL_CONTEXT_MAJOR_VERSION_KHR and
+ * EGL_CONTEXT_MINOR_VERSION_KHR specify the requested client API
+ * version. They are only meaningful for OpenGL and OpenGL ES
+ * contexts, and specifying them for other types of contexts will
+ * generate an error."
+ */
+ if ((api != EGL_OPENGL_ES_API &&
+ (!dpy->Extensions.KHR_create_context || api != EGL_OPENGL_API))) {
+ err = EGL_BAD_ATTRIBUTE;
+ break;
+ }
+
ctx->ClientMajorVersion = val;
break;
case EGL_CONTEXT_MINOR_VERSION_KHR:
- if (!dpy->Extensions.KHR_create_context) {
+ /* The EGL_KHR_create_context spec says:
+ *
+ * "The values for attributes EGL_CONTEXT_MAJOR_VERSION_KHR and
+ * EGL_CONTEXT_MINOR_VERSION_KHR specify the requested client API
+ * version. They are only meaningful for OpenGL and OpenGL ES
+ * contexts, and specifying them for other types of contexts will
+ * generate an error."
+ */
+ if (!dpy->Extensions.KHR_create_context ||
+ (api != EGL_OPENGL_ES_API && api != EGL_OPENGL_API)) {
err = EGL_BAD_ATTRIBUTE;
break;
}
diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c
index 24a0c7e61a7..f6db03ab50c 100644
--- a/src/egl/main/egldisplay.c
+++ b/src/egl/main/egldisplay.c
@@ -65,11 +65,9 @@ static const struct {
_EGLPlatformType platform;
const char *name;
} egl_platforms[_EGL_NUM_PLATFORMS] = {
- { _EGL_PLATFORM_WINDOWS, "gdi" },
{ _EGL_PLATFORM_X11, "x11" },
{ _EGL_PLATFORM_WAYLAND, "wayland" },
{ _EGL_PLATFORM_DRM, "drm" },
- { _EGL_PLATFORM_NULL, "null" },
{ _EGL_PLATFORM_ANDROID, "android" },
{ _EGL_PLATFORM_HAIKU, "haiku" },
{ _EGL_PLATFORM_SURFACELESS, "surfaceless" },
diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h
index 0b50a36a098..6c64980cf20 100644
--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -44,11 +44,9 @@ extern "C" {
#endif
enum _egl_platform_type {
- _EGL_PLATFORM_WINDOWS,
_EGL_PLATFORM_X11,
_EGL_PLATFORM_WAYLAND,
_EGL_PLATFORM_DRM,
- _EGL_PLATFORM_NULL,
_EGL_PLATFORM_ANDROID,
_EGL_PLATFORM_HAIKU,
_EGL_PLATFORM_SURFACELESS,
@@ -91,46 +89,44 @@ struct _egl_resource
*/
struct _egl_extensions
{
- EGLBoolean MESA_drm_display;
- EGLBoolean MESA_drm_image;
- EGLBoolean MESA_configless_context;
-
- EGLBoolean WL_bind_wayland_display;
- EGLBoolean WL_create_wayland_buffer_from_image;
-
- EGLBoolean KHR_image_base;
- EGLBoolean KHR_image_pixmap;
- EGLBoolean KHR_vg_parent_image;
- EGLBoolean KHR_get_all_proc_addresses;
- EGLBoolean KHR_gl_colorspace;
- EGLBoolean KHR_gl_texture_2D_image;
- EGLBoolean KHR_gl_texture_cubemap_image;
- EGLBoolean KHR_gl_texture_3D_image;
- EGLBoolean KHR_gl_renderbuffer_image;
-
- EGLBoolean KHR_reusable_sync;
- EGLBoolean KHR_fence_sync;
- EGLBoolean KHR_wait_sync;
- EGLBoolean KHR_cl_event2;
-
- EGLBoolean KHR_surfaceless_context;
- EGLBoolean KHR_create_context;
-
- EGLBoolean NOK_swap_region;
- EGLBoolean NOK_texture_from_pixmap;
-
+ /* Please keep these sorted alphabetically. */
EGLBoolean ANDROID_image_native_buffer;
EGLBoolean CHROMIUM_sync_control;
+ EGLBoolean EXT_buffer_age;
+ EGLBoolean EXT_create_context_robustness;
+ EGLBoolean EXT_image_dma_buf_import;
+ EGLBoolean EXT_swap_buffers_with_damage;
+
+ EGLBoolean KHR_cl_event2;
+ EGLBoolean KHR_create_context;
+ EGLBoolean KHR_fence_sync;
+ EGLBoolean KHR_get_all_proc_addresses;
+ EGLBoolean KHR_gl_colorspace;
+ EGLBoolean KHR_gl_renderbuffer_image;
+ EGLBoolean KHR_gl_texture_2D_image;
+ EGLBoolean KHR_gl_texture_3D_image;
+ EGLBoolean KHR_gl_texture_cubemap_image;
+ EGLBoolean KHR_image_base;
+ EGLBoolean KHR_image_pixmap;
+ EGLBoolean KHR_reusable_sync;
+ EGLBoolean KHR_surfaceless_context;
+ EGLBoolean KHR_vg_parent_image;
+ EGLBoolean KHR_wait_sync;
+
+ EGLBoolean MESA_configless_context;
+ EGLBoolean MESA_drm_display;
+ EGLBoolean MESA_drm_image;
+ EGLBoolean MESA_image_dma_buf_export;
+
+ EGLBoolean NOK_swap_region;
+ EGLBoolean NOK_texture_from_pixmap;
+
EGLBoolean NV_post_sub_buffer;
- EGLBoolean EXT_create_context_robustness;
- EGLBoolean EXT_buffer_age;
- EGLBoolean EXT_swap_buffers_with_damage;
- EGLBoolean EXT_image_dma_buf_import;
-
- EGLBoolean MESA_image_dma_buf_export;
+ EGLBoolean WL_bind_wayland_display;
+ EGLBoolean WL_create_wayland_buffer_from_image;
};
diff --git a/src/egl/main/egldriver.c b/src/egl/main/egldriver.c
index 6ef79d96502..b9b21dec5ea 100644
--- a/src/egl/main/egldriver.c
+++ b/src/egl/main/egldriver.c
@@ -39,7 +39,6 @@
#include
#include "c11/threads.h"
-#include "eglstring.h"
#include "egldefines.h"
#include "egldisplay.h"
#include "egldriver.h"
@@ -97,15 +96,10 @@ _eglLoadModule(_EGLModule *mod)
static void
_eglUnloadModule(_EGLModule *mod)
{
-#if defined(_EGL_OS_UNIX)
/* destroy the driver */
if (mod->Driver && mod->Driver->Unload)
mod->Driver->Unload(mod->Driver);
-#elif defined(_EGL_OS_WINDOWS)
- /* XXX Windows unloads DLLs before atexit */
-#endif
-
mod->Driver = NULL;
}
@@ -135,7 +129,7 @@ _eglAddModule(const char *name)
/* allocate a new one */
mod = calloc(1, sizeof(*mod));
if (mod) {
- mod->Name = _eglstrdup(name);
+ mod->Name = strdup(name);
if (!mod->Name) {
free(mod);
mod = NULL;
diff --git a/src/egl/main/eglfallbacks.c b/src/egl/main/eglfallbacks.c
index 3c3701f4ae9..65daf8fd0f5 100644
--- a/src/egl/main/eglfallbacks.c
+++ b/src/egl/main/eglfallbacks.c
@@ -93,17 +93,11 @@ _eglInitDriverFallbacks(_EGLDriver *drv)
drv->API.SignalSyncKHR = NULL;
drv->API.GetSyncAttrib = _eglGetSyncAttrib;
-#ifdef EGL_MESA_drm_image
drv->API.CreateDRMImageMESA = NULL;
drv->API.ExportDRMImageMESA = NULL;
-#endif
-#ifdef EGL_NOK_swap_region
drv->API.SwapBuffersRegionNOK = NULL;
-#endif
-#ifdef EGL_MESA_image_dma_buf_export
drv->API.ExportDMABUFImageQueryMESA = NULL;
drv->API.ExportDMABUFImageMESA = NULL;
-#endif
}
diff --git a/src/egl/main/eglglobals.c b/src/egl/main/eglglobals.c
index 884cff0c36b..938d9537891 100644
--- a/src/egl/main/eglglobals.c
+++ b/src/egl/main/eglglobals.c
@@ -53,10 +53,10 @@ struct _egl_global _eglGlobal =
/* ClientExtensionsString */
"EGL_EXT_client_extensions"
" EGL_EXT_platform_base"
- " EGL_EXT_platform_x11"
" EGL_EXT_platform_wayland"
- " EGL_MESA_platform_gbm"
+ " EGL_EXT_platform_x11"
" EGL_KHR_client_get_all_proc_addresses"
+ " EGL_MESA_platform_gbm"
};
diff --git a/src/egl/main/egllog.c b/src/egl/main/egllog.c
index 1877d8bfd10..956946532cd 100644
--- a/src/egl/main/egllog.c
+++ b/src/egl/main/egllog.c
@@ -38,10 +38,11 @@
#include
#include
#include
+#include
+#include
#include "c11/threads.h"
#include "egllog.h"
-#include "eglstring.h"
#define MAXSTRING 1000
#define FALLBACK_LOG_LEVEL _EGL_WARNING
@@ -146,7 +147,7 @@ _eglInitLogger(void)
log_env = getenv("EGL_LOG_LEVEL");
if (log_env) {
for (i = 0; level_strings[i]; i++) {
- if (_eglstrcasecmp(log_env, level_strings[i]) == 0) {
+ if (strcasecmp(log_env, level_strings[i]) == 0) {
level = i;
break;
}
diff --git a/src/egl/main/eglstring.c b/src/egl/main/eglstring.c
deleted file mode 100644
index 8b4c491ac64..00000000000
--- a/src/egl/main/eglstring.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 VMware, Inc.
- * Copyright 2009-2010 Chia-I Wu
- * Copyright 2010-2011 LunarG, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * String utils.
- */
-
-#include
-#include
-#include "eglstring.h"
-
-
-char *
-_eglstrdup(const char *s)
-{
- if (s) {
- size_t l = strlen(s);
- char *s2 = malloc(l + 1);
- if (s2)
- strcpy(s2, s);
- return s2;
- }
- return NULL;
-}
-
-
-
diff --git a/src/egl/main/eglstring.h b/src/egl/main/eglstring.h
deleted file mode 100644
index 16baa477714..00000000000
--- a/src/egl/main/eglstring.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2008 VMware, Inc.
- * Copyright 2009-2010 Chia-I Wu
- * Copyright 2010-2011 LunarG, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef EGLSTRING_INCLUDED
-#define EGLSTRING_INCLUDED
-
-#include
-#include
-
-#ifdef _EGL_OS_WINDOWS
-#define _eglstrcasecmp _stricmp
-#define _eglsnprintf _snprintf
-#else
-#include // for strcasecmp
-#define _eglstrcasecmp strcasecmp
-#define _eglsnprintf snprintf
-#endif
-
-extern char *
-_eglstrdup(const char *s);
-
-
-#endif /* EGLSTRING_INCLUDED */
diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c
index 76c60e940dc..4fa43f3e2b1 100644
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -84,6 +84,22 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list)
switch (attr) {
/* common attributes */
+ case EGL_GL_COLORSPACE_KHR:
+ if (!dpy->Extensions.KHR_gl_colorspace) {
+ err = EGL_BAD_ATTRIBUTE;
+ break;
+ }
+ switch (val) {
+ case EGL_GL_COLORSPACE_SRGB_KHR:
+ case EGL_GL_COLORSPACE_LINEAR_KHR:
+ break;
+ default:
+ err = EGL_BAD_ATTRIBUTE;
+ }
+ if (err != EGL_SUCCESS)
+ break;
+ surf->GLColorspace = val;
+ break;
case EGL_VG_COLORSPACE:
switch (val) {
case EGL_VG_COLORSPACE_sRGB:
@@ -272,6 +288,7 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
surf->RenderBuffer = renderBuffer;
surf->VGAlphaFormat = EGL_VG_ALPHA_FORMAT_NONPRE;
surf->VGColorspace = EGL_VG_COLORSPACE_sRGB;
+ surf->GLColorspace = EGL_GL_COLORSPACE_LINEAR_KHR;
surf->MipmapLevel = 0;
surf->MultisampleResolve = EGL_MULTISAMPLE_RESOLVE_DEFAULT;
@@ -309,7 +326,8 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
*value = surface->Config->ConfigID;
break;
case EGL_LARGEST_PBUFFER:
- *value = surface->LargestPbuffer;
+ if (surface->Type == EGL_PBUFFER_BIT)
+ *value = surface->LargestPbuffer;
break;
case EGL_TEXTURE_FORMAT:
/* texture attributes: only for pbuffers, no error otherwise */
@@ -352,6 +370,13 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
case EGL_VG_COLORSPACE:
*value = surface->VGColorspace;
break;
+ case EGL_GL_COLORSPACE_KHR:
+ if (!dpy->Extensions.KHR_gl_colorspace) {
+ _eglError(EGL_BAD_ATTRIBUTE, "eglQuerySurface");
+ return EGL_FALSE;
+ }
+ *value = surface->GLColorspace;
+ break;
case EGL_POST_SUB_BUFFER_SUPPORTED_NV:
*value = surface->PostSubBufferSupportedNV;
break;
diff --git a/src/egl/main/eglsurface.h b/src/egl/main/eglsurface.h
index 74c429a9628..fc799ee43dc 100644
--- a/src/egl/main/eglsurface.h
+++ b/src/egl/main/eglsurface.h
@@ -65,6 +65,7 @@ struct _egl_surface
EGLenum RenderBuffer;
EGLenum VGAlphaFormat;
EGLenum VGColorspace;
+ EGLenum GLColorspace;
/* attributes set by eglSurfaceAttrib */
EGLint MipmapLevel;
diff --git a/src/gallium/Android.mk b/src/gallium/Android.mk
index b946681840c..39e064e9538 100644
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -34,7 +34,7 @@ SUBDIRS := auxiliary
# swrast
ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),)
-SUBDIRS += winsys/sw/dri winsys/sw/kms-dri drivers/softpipe
+SUBDIRS += winsys/sw/dri drivers/softpipe
endif
# freedreno
@@ -72,6 +72,7 @@ SUBDIRS += drivers/r600
endif
ifneq ($(filter radeonsi, $(MESA_GPU_DRIVERS)),)
SUBDIRS += drivers/radeonsi
+SUBDIRS += winsys/amdgpu/drm
endif
endif
endif
diff --git a/src/gallium/Automake.inc b/src/gallium/Automake.inc
index 95aae50d64b..ee07ab6c8f9 100644
--- a/src/gallium/Automake.inc
+++ b/src/gallium/Automake.inc
@@ -67,10 +67,3 @@ if HAVE_DRISW
GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
$(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
endif
-
-if NEED_WINSYS_XLIB
-GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
- $(top_builddir)/src/gallium/winsys/sw/xlib/libws_xlib.la \
- -lX11 -lXext -lXfixes \
- $(LIBDRM_LIBS)
-endif
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index ede6e21233a..e2c1090aa26 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -58,6 +58,7 @@ endif
## radeonsi
if HAVE_GALLIUM_RADEONSI
SUBDIRS += drivers/radeonsi
+SUBDIRS += winsys/amdgpu/drm
endif
## the radeon winsys - linked in by r300, r600 and radeonsi
diff --git a/src/gallium/README.portability b/src/gallium/README.portability
index adecf4bb798..cf6cc36afbb 100644
--- a/src/gallium/README.portability
+++ b/src/gallium/README.portability
@@ -13,8 +13,6 @@ headers in general, should strictly follow these guidelines to ensure
* Include the p_compiler.h.
-* Don't use the 'inline' keyword, use the INLINE macro in p_compiler.h instead.
-
* Cast explicitly when converting to integer types of smaller sizes.
* Cast explicitly when converting between float, double and integral types.
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index eeb1c780fcd..fa5fa6e8734 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -46,7 +46,6 @@ if env['platform'] == 'haiku':
if env['dri']:
SConscript([
'winsys/sw/dri/SConscript',
- 'winsys/sw/kms-dri/SConscript',
'winsys/svga/drm/SConscript',
])
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index 89c7a13e913..04f77d002c8 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
if HAVE_LOADER_GALLIUM
SUBDIRS := pipe-loader
endif
@@ -10,6 +8,7 @@ include $(top_srcdir)/src/gallium/Automake.inc
noinst_LTLIBRARIES = libgallium.la
AM_CFLAGS = \
+ -I$(top_srcdir)/src/loader \
-I$(top_builddir)/src/glsl/nir \
-I$(top_srcdir)/src/gallium/auxiliary/util \
$(GALLIUM_CFLAGS) \
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 62e6b94cab8..3616d885b47 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -274,7 +274,6 @@ C_SOURCES := \
util/u_simple_shaders.h \
util/u_slab.c \
util/u_slab.h \
- util/u_snprintf.c \
util/u_split_prim.h \
util/u_sse.h \
util/u_staging.c \
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index dd56e4a154e..d36f1fbd717 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -80,7 +80,7 @@ unsigned cso_construct_key(void *item, int item_size)
return hash_key((item), item_size);
}
-static INLINE struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type)
+static inline struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type)
{
struct cso_hash *hash;
hash = sc->hashes[type];
@@ -127,7 +127,7 @@ static void delete_velements(void *state, void *data)
FREE(state);
}
-static INLINE void delete_cso(void *state, enum cso_cache_type type)
+static inline void delete_cso(void *state, enum cso_cache_type type)
{
switch (type) {
case CSO_BLEND:
@@ -152,7 +152,7 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type)
}
-static INLINE void sanitize_hash(struct cso_cache *sc,
+static inline void sanitize_hash(struct cso_cache *sc,
struct cso_hash *hash,
enum cso_cache_type type,
int max_size)
@@ -162,7 +162,7 @@ static INLINE void sanitize_hash(struct cso_cache *sc,
}
-static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type,
+static inline void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type,
int max_size, void *user_data)
{
/* if we're approach the maximum size, remove fourth of the entries
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 744b00cbd92..00686d2af41 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -56,22 +56,8 @@
*/
struct sampler_info
{
- struct {
- void *samplers[PIPE_MAX_SAMPLERS];
- unsigned nr_samplers;
- } hw;
-
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
-
- void *samplers_saved[PIPE_MAX_SAMPLERS];
- unsigned nr_samplers_saved;
-
- struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
- unsigned nr_views;
-
- struct pipe_sampler_view *views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
- unsigned nr_views_saved;
};
@@ -85,6 +71,15 @@ struct cso_context {
boolean has_tessellation;
boolean has_streamout;
+ struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ unsigned nr_fragment_views;
+
+ struct pipe_sampler_view *fragment_views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ unsigned nr_fragment_views_saved;
+
+ void *fragment_samplers_saved[PIPE_MAX_SAMPLERS];
+ unsigned nr_fragment_samplers_saved;
+
struct sampler_info samplers[PIPE_SHADER_TYPES];
struct pipe_vertex_buffer aux_vertex_buffer_current;
@@ -116,9 +111,6 @@ struct cso_context {
uint render_condition_mode, render_condition_mode_saved;
boolean render_condition_cond, render_condition_cond_saved;
- struct pipe_clip_state clip;
- struct pipe_clip_state clip_saved;
-
struct pipe_framebuffer_state fb, fb_saved;
struct pipe_viewport_state vp, vp_saved;
struct pipe_blend_color blend_color;
@@ -192,7 +184,7 @@ static boolean delete_vertex_elements(struct cso_context *ctx,
}
-static INLINE boolean delete_cso(struct cso_context *ctx,
+static inline boolean delete_cso(struct cso_context *ctx,
void *state, enum cso_cache_type type)
{
switch (type) {
@@ -213,7 +205,7 @@ static INLINE boolean delete_cso(struct cso_context *ctx,
return FALSE;
}
-static INLINE void
+static inline void
sanitize_hash(struct cso_hash *hash, enum cso_cache_type type,
int max_size, void *user_data)
{
@@ -297,7 +289,7 @@ out:
*/
void cso_destroy_context( struct cso_context *ctx )
{
- unsigned i, shader;
+ unsigned i;
if (ctx->pipe) {
ctx->pipe->set_index_buffer(ctx->pipe, NULL);
@@ -347,13 +339,9 @@ void cso_destroy_context( struct cso_context *ctx )
ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, NULL);
}
- /* free sampler views for each shader stage */
- for (shader = 0; shader < Elements(ctx->samplers); shader++) {
- struct sampler_info *info = &ctx->samplers[shader];
- for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
- pipe_sampler_view_reference(&info->views[i], NULL);
- pipe_sampler_view_reference(&info->views_saved[i], NULL);
- }
+ for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+ pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
+ pipe_sampler_view_reference(&ctx->fragment_views_saved[i], NULL);
}
util_unreference_framebuffer_state(&ctx->fb);
@@ -919,47 +907,6 @@ void cso_restore_tesseval_shader(struct cso_context *ctx)
ctx->tesseval_shader_saved = NULL;
}
-/* clip state */
-
-static INLINE void
-clip_state_cpy(struct pipe_clip_state *dst,
- const struct pipe_clip_state *src)
-{
- memcpy(dst->ucp, src->ucp, sizeof(dst->ucp));
-}
-
-static INLINE int
-clip_state_cmp(const struct pipe_clip_state *a,
- const struct pipe_clip_state *b)
-{
- return memcmp(a->ucp, b->ucp, sizeof(a->ucp));
-}
-
-void
-cso_set_clip(struct cso_context *ctx,
- const struct pipe_clip_state *clip)
-{
- if (clip_state_cmp(&ctx->clip, clip)) {
- clip_state_cpy(&ctx->clip, clip);
- ctx->pipe->set_clip_state(ctx->pipe, clip);
- }
-}
-
-void
-cso_save_clip(struct cso_context *ctx)
-{
- clip_state_cpy(&ctx->clip_saved, &ctx->clip);
-}
-
-void
-cso_restore_clip(struct cso_context *ctx)
-{
- if (clip_state_cmp(&ctx->clip, &ctx->clip_saved)) {
- clip_state_cpy(&ctx->clip, &ctx->clip_saved);
- ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved);
- }
-}
-
enum pipe_error
cso_set_vertex_elements(struct cso_context *ctx,
unsigned count,
@@ -1122,11 +1069,9 @@ unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx)
/**************** fragment/vertex sampler view state *************************/
-static enum pipe_error
-single_sampler(struct cso_context *ctx,
- struct sampler_info *info,
- unsigned idx,
- const struct pipe_sampler_state *templ)
+enum pipe_error
+cso_single_sampler(struct cso_context *ctx, unsigned shader_stage,
+ unsigned idx, const struct pipe_sampler_state *templ)
{
void *handle = NULL;
@@ -1162,24 +1107,13 @@ single_sampler(struct cso_context *ctx,
}
}
- info->samplers[idx] = handle;
-
+ ctx->samplers[shader_stage].samplers[idx] = handle;
return PIPE_OK;
}
-enum pipe_error
-cso_single_sampler(struct cso_context *ctx,
- unsigned shader_stage,
- unsigned idx,
- const struct pipe_sampler_state *templ)
-{
- return single_sampler(ctx, &ctx->samplers[shader_stage], idx, templ);
-}
-
-
-static void
-single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
+void
+cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
{
struct sampler_info *info = &ctx->samplers[shader_stage];
unsigned i;
@@ -1191,33 +1125,8 @@ single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
}
info->nr_samplers = i;
-
- if (info->hw.nr_samplers != info->nr_samplers ||
- memcmp(info->hw.samplers,
- info->samplers,
- info->nr_samplers * sizeof(void *)) != 0)
- {
- memcpy(info->hw.samplers,
- info->samplers,
- info->nr_samplers * sizeof(void *));
-
- /* set remaining slots/pointers to null */
- for (i = info->nr_samplers; i < info->hw.nr_samplers; i++)
- info->samplers[i] = NULL;
-
- ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0,
- MAX2(info->nr_samplers,
- info->hw.nr_samplers),
- info->samplers);
-
- info->hw.nr_samplers = info->nr_samplers;
- }
-}
-
-void
-cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
-{
- single_sampler_done(ctx, shader_stage);
+ ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0, i,
+ info->samplers);
}
@@ -1240,38 +1149,42 @@ cso_set_samplers(struct cso_context *ctx,
*/
for (i = 0; i < nr; i++) {
- temp = single_sampler(ctx, info, i, templates[i]);
+ temp = cso_single_sampler(ctx, shader_stage, i, templates[i]);
if (temp != PIPE_OK)
error = temp;
}
for ( ; i < info->nr_samplers; i++) {
- temp = single_sampler(ctx, info, i, NULL);
+ temp = cso_single_sampler(ctx, shader_stage, i, NULL);
if (temp != PIPE_OK)
error = temp;
}
- single_sampler_done(ctx, shader_stage);
+ cso_single_sampler_done(ctx, shader_stage);
return error;
}
void
-cso_save_samplers(struct cso_context *ctx, unsigned shader_stage)
+cso_save_fragment_samplers(struct cso_context *ctx)
{
- struct sampler_info *info = &ctx->samplers[shader_stage];
- info->nr_samplers_saved = info->nr_samplers;
- memcpy(info->samplers_saved, info->samplers, sizeof(info->samplers));
+ struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
+
+ ctx->nr_fragment_samplers_saved = info->nr_samplers;
+ memcpy(ctx->fragment_samplers_saved, info->samplers,
+ sizeof(info->samplers));
}
void
-cso_restore_samplers(struct cso_context *ctx, unsigned shader_stage)
+cso_restore_fragment_samplers(struct cso_context *ctx)
{
- struct sampler_info *info = &ctx->samplers[shader_stage];
- info->nr_samplers = info->nr_samplers_saved;
- memcpy(info->samplers, info->samplers_saved, sizeof(info->samplers));
- single_sampler_done(ctx, shader_stage);
+ struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
+
+ info->nr_samplers = ctx->nr_fragment_samplers_saved;
+ memcpy(info->samplers, ctx->fragment_samplers_saved,
+ sizeof(info->samplers));
+ cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
}
@@ -1281,71 +1194,74 @@ cso_set_sampler_views(struct cso_context *ctx,
unsigned count,
struct pipe_sampler_view **views)
{
- struct sampler_info *info = &ctx->samplers[shader_stage];
- unsigned i;
- boolean any_change = FALSE;
+ if (shader_stage == PIPE_SHADER_FRAGMENT) {
+ unsigned i;
+ boolean any_change = FALSE;
- /* reference new views */
- for (i = 0; i < count; i++) {
- any_change |= info->views[i] != views[i];
- pipe_sampler_view_reference(&info->views[i], views[i]);
- }
- /* unref extra old views, if any */
- for (; i < info->nr_views; i++) {
- any_change |= info->views[i] != NULL;
- pipe_sampler_view_reference(&info->views[i], NULL);
- }
+ /* reference new views */
+ for (i = 0; i < count; i++) {
+ any_change |= ctx->fragment_views[i] != views[i];
+ pipe_sampler_view_reference(&ctx->fragment_views[i], views[i]);
+ }
+ /* unref extra old views, if any */
+ for (; i < ctx->nr_fragment_views; i++) {
+ any_change |= ctx->fragment_views[i] != NULL;
+ pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
+ }
- /* bind the new sampler views */
- if (any_change) {
- ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0,
- MAX2(info->nr_views, count),
- info->views);
- }
+ /* bind the new sampler views */
+ if (any_change) {
+ ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0,
+ MAX2(ctx->nr_fragment_views, count),
+ ctx->fragment_views);
+ }
- info->nr_views = count;
+ ctx->nr_fragment_views = count;
+ }
+ else
+ ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0, count, views);
}
void
-cso_save_sampler_views(struct cso_context *ctx, unsigned shader_stage)
+cso_save_fragment_sampler_views(struct cso_context *ctx)
{
- struct sampler_info *info = &ctx->samplers[shader_stage];
unsigned i;
- info->nr_views_saved = info->nr_views;
+ ctx->nr_fragment_views_saved = ctx->nr_fragment_views;
- for (i = 0; i < info->nr_views; i++) {
- assert(!info->views_saved[i]);
- pipe_sampler_view_reference(&info->views_saved[i], info->views[i]);
+ for (i = 0; i < ctx->nr_fragment_views; i++) {
+ assert(!ctx->fragment_views_saved[i]);
+ pipe_sampler_view_reference(&ctx->fragment_views_saved[i],
+ ctx->fragment_views[i]);
}
}
void
-cso_restore_sampler_views(struct cso_context *ctx, unsigned shader_stage)
+cso_restore_fragment_sampler_views(struct cso_context *ctx)
{
- struct sampler_info *info = &ctx->samplers[shader_stage];
- unsigned i, nr_saved = info->nr_views_saved;
+ unsigned i, nr_saved = ctx->nr_fragment_views_saved;
unsigned num;
for (i = 0; i < nr_saved; i++) {
- pipe_sampler_view_reference(&info->views[i], NULL);
+ pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
/* move the reference from one pointer to another */
- info->views[i] = info->views_saved[i];
- info->views_saved[i] = NULL;
+ ctx->fragment_views[i] = ctx->fragment_views_saved[i];
+ ctx->fragment_views_saved[i] = NULL;
}
- for (; i < info->nr_views; i++) {
- pipe_sampler_view_reference(&info->views[i], NULL);
+ for (; i < ctx->nr_fragment_views; i++) {
+ pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
}
- num = MAX2(info->nr_views, nr_saved);
+ num = MAX2(ctx->nr_fragment_views, nr_saved);
/* bind the old/saved sampler views */
- ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0, num, info->views);
+ ctx->pipe->set_sampler_views(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, num,
+ ctx->fragment_views);
- info->nr_views = nr_saved;
- info->nr_views_saved = 0;
+ ctx->nr_fragment_views = nr_saved;
+ ctx->nr_fragment_views_saved = 0;
}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index cc50b60c6cd..f0a27390d17 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -72,19 +72,17 @@ cso_set_samplers(struct cso_context *cso,
const struct pipe_sampler_state **states);
void
-cso_save_samplers(struct cso_context *cso, unsigned shader_stage);
+cso_save_fragment_samplers(struct cso_context *cso);
void
-cso_restore_samplers(struct cso_context *cso, unsigned shader_stage);
+cso_restore_fragment_samplers(struct cso_context *cso);
/* Alternate interface to support state trackers that like to modify
* samplers one at a time:
*/
enum pipe_error
-cso_single_sampler(struct cso_context *cso,
- unsigned shader_stage,
- unsigned count,
- const struct pipe_sampler_state *states);
+cso_single_sampler(struct cso_context *cso, unsigned shader_stage,
+ unsigned idx, const struct pipe_sampler_state *states);
void
cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage);
@@ -188,19 +186,6 @@ void cso_save_render_condition(struct cso_context *cso);
void cso_restore_render_condition(struct cso_context *cso);
-/* clip state */
-
-void
-cso_set_clip(struct cso_context *cso,
- const struct pipe_clip_state *clip);
-
-void
-cso_save_clip(struct cso_context *cso);
-
-void
-cso_restore_clip(struct cso_context *cso);
-
-
/* sampler view state */
void
@@ -210,10 +195,10 @@ cso_set_sampler_views(struct cso_context *cso,
struct pipe_sampler_view **views);
void
-cso_save_sampler_views(struct cso_context *cso, unsigned shader_stage);
+cso_save_fragment_sampler_views(struct cso_context *ctx);
void
-cso_restore_sampler_views(struct cso_context *cso, unsigned shader_stage);
+cso_restore_fragment_sampler_views(struct cso_context *ctx);
/* constant buffers */
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index a1564f93292..c827a68ea0a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -45,7 +45,7 @@
/* fixme: move it from here */
#define MAX_PRIMITIVES 64
-static INLINE int
+static inline int
draw_gs_get_input_index(int semantic, int index,
const struct tgsi_shader_info *input_info)
{
@@ -66,7 +66,7 @@ draw_gs_get_input_index(int semantic, int index,
* the number of elements in the SOA vector. This ensures that the
* throughput is optimized for the given vector instruction set.
*/
-static INLINE boolean
+static inline boolean
draw_gs_should_flush(struct draw_geometry_shader *shader)
{
return (shader->fetched_prim_count == shader->vector_length);
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 90a31bc6ac0..b1e1bcbee04 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -72,7 +72,7 @@ struct draw_gs_llvm_iface {
LLVMValueRef input;
};
-static INLINE const struct draw_gs_llvm_iface *
+static inline const struct draw_gs_llvm_iface *
draw_gs_llvm_iface(const struct lp_build_tgsi_gs_iface *iface)
{
return (const struct draw_gs_llvm_iface *)iface;
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index d48ed721593..d153c166ead 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -350,7 +350,7 @@ struct draw_gs_llvm_variant_key
PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state))
-static INLINE size_t
+static inline size_t
draw_llvm_variant_key_size(unsigned nr_vertex_elements,
unsigned nr_samplers)
{
@@ -360,7 +360,7 @@ draw_llvm_variant_key_size(unsigned nr_vertex_elements,
}
-static INLINE size_t
+static inline size_t
draw_gs_llvm_variant_key_size(unsigned nr_samplers)
{
return (sizeof(struct draw_gs_llvm_variant_key) +
@@ -368,7 +368,7 @@ draw_gs_llvm_variant_key_size(unsigned nr_samplers)
}
-static INLINE struct draw_sampler_static_state *
+static inline struct draw_sampler_static_state *
draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
{
return (struct draw_sampler_static_state *)
@@ -476,13 +476,13 @@ struct draw_llvm {
};
-static INLINE struct llvm_vertex_shader *
+static inline struct llvm_vertex_shader *
llvm_vertex_shader(struct draw_vertex_shader *vs)
{
return (struct llvm_vertex_shader *)vs;
}
-static INLINE struct llvm_geometry_shader *
+static inline struct llvm_geometry_shader *
llvm_geometry_shader(struct draw_geometry_shader *gs)
{
return (struct llvm_geometry_shader *)gs;
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h
index 35273330d13..e69dcbded0e 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.h
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -115,7 +115,7 @@ void draw_unfilled_prepare_outputs(struct draw_context *context,
* \param idx index into stage's tmp[] array to put the copy (dest)
* \return pointer to the copied vertex
*/
-static INLINE struct vertex_header *
+static inline struct vertex_header *
dup_vert( struct draw_stage *stage,
const struct vertex_header *vert,
unsigned idx )
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 936046ea5f5..85d24b7a6a1 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -511,7 +511,7 @@ bind_aaline_fragment_shader(struct aaline_stage *aaline)
-static INLINE struct aaline_stage *
+static inline struct aaline_stage *
aaline_stage( struct draw_stage *stage )
{
return (struct aaline_stage *) stage;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 7feb49ae934..3918923296d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -427,7 +427,7 @@ bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
-static INLINE struct aapoint_stage *
+static inline struct aapoint_stage *
aapoint_stage( struct draw_stage *stage )
{
return (struct aapoint_stage *) stage;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index e1e7dcc6f63..c22758bc702 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -70,12 +70,12 @@ struct clip_stage {
/** Cast wrapper */
-static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
+static inline struct clip_stage *clip_stage( struct draw_stage *stage )
{
return (struct clip_stage *)stage;
}
-static INLINE unsigned
+static inline unsigned
draw_viewport_index(struct draw_context *draw,
const struct vertex_header *leading_vertex)
{
@@ -210,7 +210,7 @@ static void interp( const struct clip_stage *clip,
* true, otherwise returns false.
* Triangle is considered null/empty if it's area is qual to zero.
*/
-static INLINE boolean
+static inline boolean
is_tri_null(struct draw_context *draw, const struct prim_header *header)
{
const unsigned pos_attr = draw_current_shader_position_output(draw);
@@ -322,7 +322,7 @@ static void emit_poly( struct draw_stage *stage,
}
-static INLINE float
+static inline float
dot4(const float *a, const float *b)
{
return (a[0] * b[0] +
@@ -336,7 +336,7 @@ dot4(const float *a, const float *b)
* it first checks if the shader provided a clip distance, otherwise
* it works out the value using the clipvertex
*/
-static INLINE float getclipdist(const struct clip_stage *clipper,
+static inline float getclipdist(const struct clip_stage *clipper,
struct vertex_header *vert,
int plane_idx)
{
diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index fa344089a8a..fc8293bd128 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -46,12 +46,12 @@ struct cull_stage {
};
-static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
+static inline struct cull_stage *cull_stage( struct draw_stage *stage )
{
return (struct cull_stage *)stage;
}
-static INLINE boolean
+static inline boolean
cull_distance_is_out(float dist)
{
return (dist < 0.0f) || util_is_inf_or_nan(dist);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index 59e33b472f4..0ea740861d6 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -47,7 +47,7 @@ struct flat_stage
};
-static INLINE struct flat_stage *
+static inline struct flat_stage *
flat_stage(struct draw_stage *stage)
{
return (struct flat_stage *) stage;
@@ -55,7 +55,7 @@ flat_stage(struct draw_stage *stage)
/** Copy all the constant attributes from 'src' vertex to 'dst' vertex */
-static INLINE void copy_flats( struct draw_stage *stage,
+static inline void copy_flats( struct draw_stage *stage,
struct vertex_header *dst,
const struct vertex_header *src )
{
@@ -70,7 +70,7 @@ static INLINE void copy_flats( struct draw_stage *stage,
/** Copy all the color attributes from src vertex to dst0 & dst1 vertices */
-static INLINE void copy_flats2( struct draw_stage *stage,
+static inline void copy_flats2( struct draw_stage *stage,
struct vertex_header *dst0,
struct vertex_header *dst1,
const struct vertex_header *src )
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index b25dd21fd4d..5e0d8ce793d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -49,7 +49,7 @@ struct offset_stage {
-static INLINE struct offset_stage *offset_stage( struct draw_stage *stage )
+static inline struct offset_stage *offset_stage( struct draw_stage *stage )
{
return (struct offset_stage *) stage;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 445f195e59c..186b4cb4935 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -462,7 +462,7 @@ bind_pstip_fragment_shader(struct pstip_stage *pstip)
}
-static INLINE struct pstip_stage *
+static inline struct pstip_stage *
pstip_stage( struct draw_stage *stage )
{
return (struct pstip_stage *) stage;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 476c011b9a0..381aa41530b 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -53,7 +53,7 @@ struct stipple_stage {
};
-static INLINE struct stipple_stage *
+static inline struct stipple_stage *
stipple_stage(struct draw_stage *stage)
{
return (struct stipple_stage *) stage;
@@ -108,7 +108,7 @@ emit_segment(struct draw_stage *stage, struct prim_header *header,
}
-static INLINE unsigned
+static inline unsigned
stipple_test(int counter, ushort pattern, int factor)
{
int b = (counter / factor) & 0xf;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 8148f6b4569..7f958d9b985 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -43,7 +43,7 @@ struct twoside_stage {
};
-static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage )
+static inline struct twoside_stage *twoside_stage( struct draw_stage *stage )
{
return (struct twoside_stage *)stage;
}
@@ -51,7 +51,7 @@ static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage )
/**
* Copy back color(s) to front color(s).
*/
-static INLINE struct vertex_header *
+static inline struct vertex_header *
copy_bfc( struct twoside_stage *twoside,
const struct vertex_header *v,
unsigned idx )
diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index 51fbdb97ae8..8e6435cdbb4 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -53,7 +53,7 @@ struct unfilled_stage {
};
-static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage )
+static inline struct unfilled_stage *unfilled_stage( struct draw_stage *stage )
{
return (struct unfilled_stage *)stage;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index e0e32dd9bbe..5cc866d7eee 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -85,7 +85,7 @@ struct vbuf_stage {
/**
* Basically a cast wrapper.
*/
-static INLINE struct vbuf_stage *
+static inline struct vbuf_stage *
vbuf_stage( struct draw_stage *stage )
{
assert(stage);
@@ -97,7 +97,7 @@ static void vbuf_flush_vertices( struct vbuf_stage *vbuf );
static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
-static INLINE boolean
+static inline boolean
overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
{
unsigned long used = (unsigned long) ((char *)ptr - (char *)map);
@@ -105,7 +105,7 @@ overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
}
-static INLINE void
+static inline void
check_space( struct vbuf_stage *vbuf, unsigned nr )
{
if (vbuf->nr_vertices + nr > vbuf->max_vertices ||
@@ -126,7 +126,7 @@ check_space( struct vbuf_stage *vbuf, unsigned nr )
* have a couple of slots at the beginning (1-dword header, 4-dword
* clip pos) that we ignore here. We only use the vertex->data[] fields.
*/
-static INLINE ushort
+static inline ushort
emit_vertex( struct vbuf_stage *vbuf,
struct vertex_header *vertex )
{
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index 6c57d5c1e3e..38ac11a9adf 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -45,7 +45,7 @@ struct wideline_stage {
-static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage )
+static inline struct wideline_stage *wideline_stage( struct draw_stage *stage )
{
return (struct wideline_stage *)stage;
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index 05beba8cd97..348b0e93bbc 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -83,7 +83,7 @@ struct widepoint_stage {
-static INLINE struct widepoint_stage *
+static inline struct widepoint_stage *
widepoint_stage( struct draw_stage *stage )
{
return (struct widepoint_stage *)stage;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 7b893cb2692..0ad94bb031f 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -494,7 +494,7 @@ void draw_update_viewport_flags(struct draw_context *draw);
* Return index of the given viewport clamping it
* to be between 0 <= and < PIPE_MAX_VIEWPORTS
*/
-static INLINE unsigned
+static inline unsigned
draw_clamp_viewport_idx(int idx)
{
return ((PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0);
@@ -505,7 +505,7 @@ draw_clamp_viewport_idx(int idx)
* overflows then it returns the value from
* the overflow_value variable.
*/
-static INLINE unsigned
+static inline unsigned
draw_overflow_uadd(unsigned a, unsigned b,
unsigned overflow_value)
{
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 5af845ff938..ffec863ae6f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -54,7 +54,7 @@ struct fetch_pipeline_middle_end {
/** cast wrapper */
-static INLINE struct fetch_pipeline_middle_end *
+static inline struct fetch_pipeline_middle_end *
fetch_pipeline_middle_end(struct draw_pt_middle_end *middle)
{
return (struct fetch_pipeline_middle_end *) middle;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index d17d6959b44..e42c4af0e70 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -60,7 +60,7 @@ struct llvm_middle_end {
/** cast wrapper */
-static INLINE struct llvm_middle_end *
+static inline struct llvm_middle_end *
llvm_middle_end(struct draw_pt_middle_end *middle)
{
return (struct llvm_middle_end *) middle;
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 71a7d3918e9..f0d5e0f5656 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -53,7 +53,7 @@ struct pt_post_vs {
const struct draw_prim_info *prim_info );
};
-static INLINE void
+static inline void
initialize_vertex_header(struct vertex_header *header)
{
header->clipmask = 0;
@@ -62,7 +62,7 @@ initialize_vertex_header(struct vertex_header *header)
header->vertex_id = UNDEFINED_VERTEX_ID;
}
-static INLINE float
+static inline float
dot4(const float *a, const float *b)
{
return (a[0]*b[0] +
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index 91e67c0840d..20de26fd08a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -65,7 +65,7 @@ draw_so_info(const struct draw_context *draw)
return state;
}
-static INLINE boolean
+static inline boolean
draw_has_so(const struct draw_context *draw)
{
const struct pipe_stream_output_info *state = draw_so_info(draw);
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
index 8098adea61f..8d448f92a26 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -84,7 +84,7 @@ vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
/**
* Add a fetch element and add it to the draw elements.
*/
-static INLINE void
+static inline void
vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch, unsigned ofbias)
{
unsigned hash;
@@ -111,7 +111,7 @@ vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch, unsigned ofbias
* The value is checked for overflows (both integer overflows
* and the elements array overflow).
*/
-static INLINE unsigned
+static inline unsigned
vsplit_get_base_idx(struct vsplit_frontend *vsplit,
unsigned start, unsigned fetch, unsigned *ofbit)
{
@@ -137,7 +137,7 @@ vsplit_get_base_idx(struct vsplit_frontend *vsplit,
* index, plus the element bias, clamped to maximum elememt
* index if that addition overflows.
*/
-static INLINE unsigned
+static inline unsigned
vsplit_get_bias_idx(struct vsplit_frontend *vsplit,
int idx, int bias, unsigned *ofbias)
{
@@ -170,7 +170,7 @@ vsplit_get_bias_idx(struct vsplit_frontend *vsplit,
elt_idx = vsplit_get_base_idx(vsplit, start, fetch, &ofbit); \
elt_idx = vsplit_get_bias_idx(vsplit, ofbit ? 0 : DRAW_GET_IDX(elts, elt_idx), elt_bias, &ofbias)
-static INLINE void
+static inline void
vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts,
unsigned start, unsigned fetch, int elt_bias)
{
@@ -179,7 +179,7 @@ vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts,
vsplit_add_cache(vsplit, elt_idx, ofbias);
}
-static INLINE void
+static inline void
vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, const ushort *elts,
unsigned start, unsigned fetch, int elt_bias)
{
@@ -193,7 +193,7 @@ vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, const ushort *elts,
* Add a fetch element and add it to the draw elements. The fetch element is
* in full range (uint).
*/
-static INLINE void
+static inline void
vsplit_add_cache_uint(struct vsplit_frontend *vsplit, const uint *elts,
unsigned start, unsigned fetch, int elt_bias)
{
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
index 0f7a3cdc012..0afabb01398 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -129,7 +129,7 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
* When spoken is TRUE, ispoken replaces istart; When close is TRUE, iclose is
* appended.
*/
-static INLINE void
+static inline void
CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
unsigned flags,
unsigned istart, unsigned icount,
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index b4178d6a6c5..ee11d2f9276 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -91,13 +91,13 @@ struct vertex_info
} attrib[PIPE_MAX_SHADER_OUTPUTS];
};
-static INLINE size_t
+static inline size_t
draw_vinfo_size( const struct vertex_info *a )
{
return offsetof(const struct vertex_info, attrib[a->num_attribs]);
}
-static INLINE int
+static inline int
draw_vinfo_compare( const struct vertex_info *a,
const struct vertex_info *b )
{
@@ -105,7 +105,7 @@ draw_vinfo_compare( const struct vertex_info *a,
return memcmp( a, b, sizea );
}
-static INLINE void
+static inline void
draw_vinfo_copy( struct vertex_info *dst,
const struct vertex_info *src )
{
@@ -121,7 +121,7 @@ draw_vinfo_copy( struct vertex_info *dst,
* corresponds to this attribute.
* \return slot in which the attribute was added
*/
-static INLINE uint
+static inline uint
draw_emit_vertex_attr(struct vertex_info *vinfo,
enum attrib_emit emit,
enum interp_mode interp, /* only used by softpipe??? */
@@ -150,7 +150,7 @@ void draw_dump_emitted_vertex(const struct vertex_info *vinfo,
const uint8_t *data);
-static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit)
+static inline enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit)
{
switch (emit) {
case EMIT_OMIT:
@@ -174,7 +174,7 @@ static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit
}
}
-static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit)
+static inline unsigned draw_translate_vinfo_size(enum attrib_emit emit)
{
switch (emit) {
case EMIT_OMIT:
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 1d54e7ef298..24b29e70dd9 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -191,12 +191,12 @@ draw_vs_create_variant_generic( struct draw_vertex_shader *vs,
-static INLINE int draw_vs_variant_keysize( const struct draw_vs_variant_key *key )
+static inline int draw_vs_variant_keysize( const struct draw_vs_variant_key *key )
{
return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_variant_element);
}
-static INLINE int draw_vs_variant_key_compare( const struct draw_vs_variant_key *a,
+static inline int draw_vs_variant_key_compare( const struct draw_vs_variant_key *a,
const struct draw_vs_variant_key *b )
{
int keysize = draw_vs_variant_keysize(a);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 9daa93eec3e..50ae192325b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1135,7 +1135,7 @@ lp_build_div(struct lp_build_context *bld,
*
* @sa http://www.stereopsis.com/doubleblend.html
*/
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_lerp_simple(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef v0,
@@ -1674,7 +1674,7 @@ enum lp_build_round_mode
* NOTE: In the SSE4.1's nearest mode, if two values are equally close, the
* result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0.
*/
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_round_sse41(struct lp_build_context *bld,
LLVMValueRef a,
enum lp_build_round_mode mode)
@@ -1717,7 +1717,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
args[2] = LLVMConstInt(i32t, mode, 0);
res = lp_build_intrinsic(builder, intrinsic,
- vec_type, args, Elements(args));
+ vec_type, args, Elements(args), 0);
res = LLVMBuildExtractElement(builder, res, index0, "");
}
@@ -1761,7 +1761,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
}
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_iround_nearest_sse2(struct lp_build_context *bld,
LLVMValueRef a)
{
@@ -1817,7 +1817,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld,
/*
*/
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_round_altivec(struct lp_build_context *bld,
LLVMValueRef a,
enum lp_build_round_mode mode)
@@ -1851,7 +1851,7 @@ lp_build_round_altivec(struct lp_build_context *bld,
return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
}
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_round_arch(struct lp_build_context *bld,
LLVMValueRef a,
enum lp_build_round_mode mode)
@@ -1997,6 +1997,12 @@ lp_build_floor(struct lp_build_context *bld,
LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMTypeRef vec_type = bld->vec_type;
+ if (type.width != 32) {
+ char intrinsic[32];
+ util_snprintf(intrinsic, sizeof intrinsic, "llvm.floor.v%uf%u", type.length, type.width);
+ return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+ }
+
assert(type.width == 32); /* might want to handle doubles at some point */
inttype = type;
@@ -2066,6 +2072,12 @@ lp_build_ceil(struct lp_build_context *bld,
LLVMTypeRef int_vec_type = bld->int_vec_type;
LLVMTypeRef vec_type = bld->vec_type;
+ if (type.width != 32) {
+ char intrinsic[32];
+ util_snprintf(intrinsic, sizeof intrinsic, "llvm.ceil.v%uf%u", type.length, type.width);
+ return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+ }
+
assert(type.width == 32); /* might want to handle doubles at some point */
inttype = type;
@@ -2427,7 +2439,7 @@ lp_build_sqrt(struct lp_build_context *bld,
* - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
* - http://softwarecommunity.intel.com/articles/eng/1818.htm
*/
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_rcp_refine(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef rcp_a)
@@ -2512,7 +2524,7 @@ lp_build_rcp(struct lp_build_context *bld,
*
* See also Intel 64 and IA-32 Architectures Optimization Manual.
*/
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_rsqrt_refine(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef rsqrt_a)
@@ -3535,7 +3547,7 @@ lp_build_fpstate_get(struct gallivm_state *gallivm)
lp_build_intrinsic(builder,
"llvm.x86.sse.stmxcsr",
LLVMVoidTypeInContext(gallivm->context),
- &mxcsr_ptr8, 1);
+ &mxcsr_ptr8, 1, 0);
return mxcsr_ptr;
}
return 0;
@@ -3582,6 +3594,6 @@ lp_build_fpstate_set(struct gallivm_state *gallivm,
lp_build_intrinsic(builder,
"llvm.x86.sse.ldmxcsr",
LLVMVoidTypeInContext(gallivm->context),
- &mxcsr_ptr, 1);
+ &mxcsr_ptr, 1, 0);
}
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
index b17c41931f4..a4c3bf0977a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -120,14 +120,14 @@ lp_build_const_mask_aos_swizzled(struct gallivm_state *gallivm,
const unsigned char *swizzle);
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_const_int32(struct gallivm_state *gallivm, int i)
{
return LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), i, 0);
}
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_const_float(struct gallivm_state *gallivm, float x)
{
return LLVMConstReal(LLVMFloatTypeInContext(gallivm->context), x);
@@ -135,7 +135,7 @@ lp_build_const_float(struct gallivm_state *gallivm, float x)
/** Return constant-valued pointer to int */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_const_int_pointer(struct gallivm_state *gallivm, const void *ptr)
{
LLVMTypeRef int_type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index 405e6486f7a..7283e2f162f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -61,6 +61,7 @@ lp_check_alignment(const void *ptr, unsigned alignment)
return ((uintptr_t)ptr & (alignment - 1)) == 0;
}
+#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBEDDED)
class raw_debug_ostream :
public llvm::raw_ostream
@@ -91,6 +92,7 @@ raw_debug_ostream::write_impl(const char *Ptr, size_t Size)
}
}
+#endif
extern "C" const char *
lp_get_module_id(LLVMModuleRef module)
@@ -123,7 +125,7 @@ lp_debug_dump_value(LLVMValueRef value)
* - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
*/
static size_t
-disassemble(const void* func, llvm::raw_ostream & Out)
+disassemble(const void* func)
{
const uint8_t *bytes = (const uint8_t *)func;
@@ -141,7 +143,8 @@ disassemble(const void* func, llvm::raw_ostream & Out)
char outline[1024];
if (!D) {
- Out << "error: couldn't create disassembler for triple " << Triple << "\n";
+ _debug_printf("error: couldn't create disassembler for triple %s\n",
+ Triple.c_str());
return 0;
}
@@ -155,13 +158,13 @@ disassemble(const void* func, llvm::raw_ostream & Out)
* so that between runs.
*/
- Out << llvm::format("%6lu:\t", (unsigned long)pc);
+ _debug_printf("%6lu:\t", (unsigned long)pc);
Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, outline,
sizeof outline);
if (!Size) {
- Out << "invalid\n";
+ _debug_printf("invalid\n");
pc += 1;
break;
}
@@ -173,10 +176,10 @@ disassemble(const void* func, llvm::raw_ostream & Out)
if (0) {
unsigned i;
for (i = 0; i < Size; ++i) {
- Out << llvm::format("%02x ", bytes[pc + i]);
+ _debug_printf("%02x ", bytes[pc + i]);
}
for (; i < 16; ++i) {
- Out << " ";
+ _debug_printf(" ");
}
}
@@ -184,9 +187,9 @@ disassemble(const void* func, llvm::raw_ostream & Out)
* Print the instruction.
*/
- Out << outline;
+ _debug_printf("%*s", Size, outline);
- Out << "\n";
+ _debug_printf("\n");
/*
* Stop disassembling on return statements, if there is no record of a
@@ -206,13 +209,12 @@ disassemble(const void* func, llvm::raw_ostream & Out)
pc += Size;
if (pc >= extent) {
- Out << "disassembly larger than " << extent << "bytes, aborting\n";
+ _debug_printf("disassembly larger than %ull bytes, aborting\n", extent);
break;
}
}
- Out << "\n";
- Out.flush();
+ _debug_printf("\n");
LLVMDisasmDispose(D);
@@ -229,9 +231,8 @@ disassemble(const void* func, llvm::raw_ostream & Out)
extern "C" void
lp_disassemble(LLVMValueRef func, const void *code) {
- raw_debug_ostream Out;
- Out << LLVMGetValueName(func) << ":\n";
- disassemble(code, Out);
+ _debug_printf("%s:\n", LLVMGetValueName(func));
+ disassemble(code);
}
@@ -273,7 +274,7 @@ lp_profile(LLVMValueRef func, const void *code)
unsigned long addr = (uintptr_t)code;
llvm::raw_fd_ostream Out(perf_asm_fd, false);
Out << symbol << ":\n";
- unsigned long size = disassemble(code, Out);
+ unsigned long size = disassemble(code);
fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
fflush(perf_map_file);
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index 321e09d56b9..375ba6cb5ff 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -59,7 +59,7 @@ extern unsigned gallivm_debug;
#endif
-static INLINE void
+static inline void
lp_build_name(LLVMValueRef val, const char *format, ...)
{
#ifdef DEBUG
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index efe71704c3a..ddf3ad1dfc6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -95,7 +95,7 @@ lp_build_format_swizzle_aos(const struct util_format_description *desc,
/**
* Whether the format matches the vector type, apart of swizzles.
*/
-static INLINE boolean
+static inline boolean
format_matches_type(const struct util_format_description *desc,
struct lp_type type)
{
@@ -146,7 +146,7 @@ format_matches_type(const struct util_format_description *desc,
*
* @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.
*/
-static INLINE LLVMValueRef
+static inline LLVMValueRef
lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
const struct util_format_description *desc,
LLVMValueRef packed)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
index 4f5a45c6a3d..fa0e8b656bb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -212,7 +212,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm,
}
-static INLINE void
+static inline void
yuv_to_rgb_soa(struct gallivm_state *gallivm,
unsigned n,
LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 384ea864081..017d0752060 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -106,7 +106,6 @@ enum LLVM_CodeGenOpt_Level {
static boolean
create_pass_manager(struct gallivm_state *gallivm)
{
- char *td_str;
assert(!gallivm->passmgr);
assert(gallivm->target);
@@ -122,10 +121,29 @@ create_pass_manager(struct gallivm_state *gallivm)
// Old versions of LLVM get the DataLayout from the pass manager.
LLVMAddTargetData(gallivm->target, gallivm->passmgr);
- // New ones from the Module.
- td_str = LLVMCopyStringRepOfTargetData(gallivm->target);
- LLVMSetDataLayout(gallivm->module, td_str);
- free(td_str);
+ /* Setting the module's DataLayout to an empty string will cause the
+ * ExecutionEngine to copy to the DataLayout string from its target
+ * machine to the module. As of LLVM 3.8 the module and the execution
+ * engine are required to have the same DataLayout.
+ *
+ * TODO: This is just a temporary work-around. The correct solution is
+ * for gallivm_init_state() to create a TargetMachine and pull the
+ * DataLayout from there. Currently, the TargetMachine used by llvmpipe
+ * is being implicitly created by the EngineBuilder in
+ * lp_build_create_jit_compiler_for_module()
+ */
+
+#if HAVE_LLVM < 0x0308
+ {
+ char *td_str;
+ // New ones from the Module.
+ td_str = LLVMCopyStringRepOfTargetData(gallivm->target);
+ LLVMSetDataLayout(gallivm->module, td_str);
+ free(td_str);
+ }
+#else
+ LLVMSetDataLayout(gallivm->module, "");
+#endif
if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 2bf1211bcd7..30f4863ec44 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -81,7 +81,8 @@ lp_build_intrinsic(LLVMBuilderRef builder,
const char *name,
LLVMTypeRef ret_type,
LLVMValueRef *args,
- unsigned num_args)
+ unsigned num_args,
+ LLVMAttribute attr)
{
LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
LLVMValueRef function;
@@ -99,6 +100,9 @@ lp_build_intrinsic(LLVMBuilderRef builder,
}
function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
+
+ if (attr)
+ LLVMAddFunctionAttr(function, attr);
}
return LLVMBuildCall(builder, function, args, num_args, "");
@@ -111,7 +115,7 @@ lp_build_intrinsic_unary(LLVMBuilderRef builder,
LLVMTypeRef ret_type,
LLVMValueRef a)
{
- return lp_build_intrinsic(builder, name, ret_type, &a, 1);
+ return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0);
}
@@ -127,7 +131,7 @@ lp_build_intrinsic_binary(LLVMBuilderRef builder,
args[0] = a;
args[1] = b;
- return lp_build_intrinsic(builder, name, ret_type, args, 2);
+ return lp_build_intrinsic(builder, name, ret_type, args, 2, 0);
}
@@ -242,7 +246,7 @@ lp_build_intrinsic_map(struct gallivm_state *gallivm,
LLVMValueRef res_elem;
for(j = 0; j < num_args; ++j)
arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, "");
- res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args);
+ res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args, 0);
res = LLVMBuildInsertElement(builder, res, res_elem, index, "");
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index 38c5c29c980..a54b367961a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -59,7 +59,8 @@ lp_build_intrinsic(LLVMBuilderRef builder,
const char *name,
LLVMTypeRef ret_type,
LLVMValueRef *args,
- unsigned num_args);
+ unsigned num_args,
+ LLVMAttribute attr);
LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index db503514881..571c615f9f8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -88,7 +88,7 @@
* actually try to allocate the maximum and run out of memory and crash. So
* stick with something reasonable here.
*/
-static INLINE int
+static inline int
gallivm_get_shader_param(enum pipe_shader_cap param)
{
switch(param) {
@@ -100,7 +100,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
return LP_MAX_TGSI_NESTING;
case PIPE_SHADER_CAP_MAX_INPUTS:
- return PIPE_MAX_SHADER_INPUTS;
+ return 32;
case PIPE_SHADER_CAP_MAX_OUTPUTS:
return 32;
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
@@ -132,6 +132,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
return 1;
case PIPE_SHADER_CAP_DOUBLES:
+ return 1;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 80b53e5c3f8..19d30d0d63c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -395,7 +395,7 @@ lp_build_select(struct lp_build_context *bld,
args[2] = mask;
res = lp_build_intrinsic(builder, intrinsic,
- arg_type, args, Elements(args));
+ arg_type, args, Elements(args), 0);
if (arg_type != bld->vec_type) {
res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 640b7e0d7e0..eba758da6ae 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -371,7 +371,7 @@ struct lp_build_sample_context
* We only support a few wrap modes in lp_build_sample_wrap_linear_int() at
* this time. Return whether the given mode is supported by that function.
*/
-static INLINE boolean
+static inline boolean
lp_is_simple_wrap_mode(unsigned mode)
{
switch (mode) {
@@ -384,7 +384,7 @@ lp_is_simple_wrap_mode(unsigned mode)
}
-static INLINE void
+static inline void
apply_sampler_swizzle(struct lp_build_sample_context *bld,
LLVMValueRef *texel)
{
@@ -402,7 +402,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld,
* not really dimension as such, this indicates the amount of
* "normal" texture coords subject to minification, wrapping etc.
*/
-static INLINE unsigned
+static inline unsigned
texture_dims(enum pipe_texture_target tex)
{
switch (tex) {
@@ -424,7 +424,7 @@ texture_dims(enum pipe_texture_target tex)
}
}
-static INLINE boolean
+static inline boolean
has_layer_coord(enum pipe_texture_target tex)
{
switch (tex) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index e391d8a4301..c4ae30461cb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -104,7 +104,7 @@ lp_build_tgsi_intrinsic(
struct lp_build_context * base = &bld_base->base;
emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder, action->intr_name,
- emit_data->dst_type, emit_data->args, emit_data->arg_count);
+ emit_data->dst_type, emit_data->args, emit_data->arg_count, 0);
}
LLVMValueRef
@@ -175,13 +175,52 @@ void lp_build_fetch_args(
unsigned src;
for (src = 0; src < emit_data->info->num_src; src++) {
emit_data->args[src] = lp_build_emit_fetch(bld_base, emit_data->inst, src,
- emit_data->chan);
+ emit_data->src_chan);
}
emit_data->arg_count = emit_data->info->num_src;
lp_build_action_set_dst_type(emit_data, bld_base,
emit_data->inst->Instruction.Opcode);
}
+/**
+ * with doubles src and dst channels aren't 1:1.
+ * check the src/dst types for the opcode,
+ * 1. if neither is double then src == dst;
+ * 2. if dest is double
+ * - don't store to y or w
+ * - if src is double then src == dst.
+ * - else for f2d, d.xy = s.x
+ * - else for f2d, d.zw = s.y
+ * 3. if dst is single, src is double
+ * - map dst x,z to src xy;
+ * - map dst y,w to src zw;
+ */
+static int get_src_chan_idx(unsigned opcode,
+ int dst_chan_index)
+{
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(opcode);
+ enum tgsi_opcode_type stype = tgsi_opcode_infer_src_type(opcode);
+
+ if (dtype != TGSI_TYPE_DOUBLE && stype != TGSI_TYPE_DOUBLE)
+ return dst_chan_index;
+ if (dtype == TGSI_TYPE_DOUBLE) {
+ if (dst_chan_index == 1 || dst_chan_index == 3)
+ return -1;
+ if (stype == TGSI_TYPE_DOUBLE)
+ return dst_chan_index;
+ if (dst_chan_index == 0)
+ return 0;
+ if (dst_chan_index == 2)
+ return 1;
+ } else {
+ if (dst_chan_index == 0 || dst_chan_index == 2)
+ return 0;
+ if (dst_chan_index == 1 || dst_chan_index == 3)
+ return 2;
+ }
+ return -1;
+}
+
/* XXX: COMMENT
* It should be assumed that this function ignores writemasks
*/
@@ -197,7 +236,6 @@ lp_build_tgsi_inst_llvm(
struct lp_build_emit_data emit_data;
unsigned chan_index;
LLVMValueRef val;
-
bld_base->pc++;
if (bld_base->emit_debug) {
@@ -240,7 +278,12 @@ lp_build_tgsi_inst_llvm(
/* Emit the instructions */
if (info->output_mode == TGSI_OUTPUT_COMPONENTWISE && bld_base->soa) {
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+ int src_index = get_src_chan_idx(inst->Instruction.Opcode, chan_index);
+ /* ignore channels 1/3 in double dst */
+ if (src_index == -1)
+ continue;
emit_data.chan = chan_index;
+ emit_data.src_chan = src_index;
if (!action->fetch_args) {
lp_build_fetch_args(bld_base, &emit_data);
} else {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 967373ccdae..2ca9c6194b3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -338,6 +338,7 @@ struct lp_build_tgsi_context
struct lp_build_context uint_bld;
struct lp_build_context int_bld;
+ struct lp_build_context dbl_bld;
/** This array stores functions that are used to transform TGSI opcodes to
* LLVM instructions.
*/
@@ -349,6 +350,9 @@ struct lp_build_tgsi_context
struct lp_build_tgsi_action sqrt_action;
+ struct lp_build_tgsi_action drsq_action;
+
+ struct lp_build_tgsi_action dsqrt_action;
const struct tgsi_shader_info *info;
lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
@@ -558,13 +562,13 @@ struct lp_build_tgsi_aos_context
};
-static INLINE struct lp_build_tgsi_soa_context *
+static inline struct lp_build_tgsi_soa_context *
lp_soa_context(struct lp_build_tgsi_context *bld_base)
{
return (struct lp_build_tgsi_soa_context *)bld_base;
}
-static INLINE struct lp_build_tgsi_aos_context *
+static inline struct lp_build_tgsi_aos_context *
lp_aos_context(struct lp_build_tgsi_context *bld_base)
{
return (struct lp_build_tgsi_aos_context *)bld_base;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 9cb42b237b7..0ad78b0ace2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -894,6 +894,125 @@ const struct lp_build_tgsi_action xpd_action = {
xpd_emit /* emit */
};
+/* TGSI_OPCODE_D2F */
+static void
+d2f_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] =
+ LLVMBuildFPTrunc(bld_base->base.gallivm->builder,
+ emit_data->args[0],
+ bld_base->base.vec_type, "");
+}
+
+/* TGSI_OPCODE_D2I */
+static void
+d2i_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] =
+ LLVMBuildFPToSI(bld_base->base.gallivm->builder,
+ emit_data->args[0],
+ bld_base->base.int_vec_type, "");
+}
+
+/* TGSI_OPCODE_D2U */
+static void
+d2u_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] =
+ LLVMBuildFPToUI(bld_base->base.gallivm->builder,
+ emit_data->args[0],
+ bld_base->base.int_vec_type, "");
+}
+
+/* TGSI_OPCODE_F2D */
+static void
+f2d_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] =
+ LLVMBuildFPExt(bld_base->base.gallivm->builder,
+ emit_data->args[0],
+ bld_base->dbl_bld.vec_type, "");
+}
+
+/* TGSI_OPCODE_U2D */
+static void
+u2d_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] =
+ LLVMBuildUIToFP(bld_base->base.gallivm->builder,
+ emit_data->args[0],
+ bld_base->dbl_bld.vec_type, "");
+}
+
+/* TGSI_OPCODE_I2D */
+static void
+i2d_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSIToFP(bld_base->base.gallivm->builder,
+ emit_data->args[0],
+ bld_base->dbl_bld.vec_type, "");
+}
+
+/* TGSI_OPCODE_DMAD */
+static void
+dmad_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp;
+ tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DMUL,
+ emit_data->args[0],
+ emit_data->args[1]);
+ emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_DADD, tmp, emit_data->args[2]);
+}
+
+/*.TGSI_OPCODE_DRCP.*/
+static void drcp_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef one;
+ one = lp_build_const_vec(bld_base->dbl_bld.gallivm, bld_base->dbl_bld.type, 1.0f);
+ emit_data->output[emit_data->chan] = LLVMBuildFDiv(
+ bld_base->base.gallivm->builder,
+ one, emit_data->args[0], "");
+}
+
+/* TGSI_OPCODE_DFRAC */
+static void dfrac_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMValueRef tmp;
+ tmp = lp_build_floor(&bld_base->dbl_bld,
+ emit_data->args[0]);
+ emit_data->output[emit_data->chan] = LLVMBuildFSub(bld_base->base.gallivm->builder,
+ emit_data->args[0], tmp, "");
+}
+
void
lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
{
@@ -948,6 +1067,25 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit;
bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit;
+
+ bld_base->op_actions[TGSI_OPCODE_DADD].emit = add_emit;
+ bld_base->op_actions[TGSI_OPCODE_DMAX].emit = fmax_emit;
+ bld_base->op_actions[TGSI_OPCODE_DMIN].emit = fmin_emit;
+ bld_base->op_actions[TGSI_OPCODE_DMUL].emit = mul_emit;
+
+ bld_base->op_actions[TGSI_OPCODE_D2F].emit = d2f_emit;
+ bld_base->op_actions[TGSI_OPCODE_D2I].emit = d2i_emit;
+ bld_base->op_actions[TGSI_OPCODE_D2U].emit = d2u_emit;
+
+ bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
+ bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
+ bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
+
+ bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
+
+ bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
+ bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
+
}
/* CPU Only default actions */
@@ -1792,6 +1930,107 @@ xor_emit_cpu(
emit_data->args[1]);
}
+/* TGSI_OPCODE_DABS (CPU Only) */
+static void
+dabs_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->dbl_bld,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DNEG (CPU Only) */
+static void
+dneg_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->dbl_bld,
+ bld_base->dbl_bld.zero,
+ emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DSET Helper (CPU Only) */
+static void
+dset_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data,
+ unsigned pipe_func)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMValueRef cond = lp_build_cmp(&bld_base->dbl_bld, pipe_func,
+ emit_data->args[0], emit_data->args[1]);
+ /* arguments were 64 bit but store as 32 bit */
+ cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
+ emit_data->output[emit_data->chan] = cond;
+}
+
+/* TGSI_OPCODE_DSEQ (CPU Only) */
+static void
+dseq_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
+}
+
+/* TGSI_OPCODE_DSGE (CPU Only) */
+static void
+dsge_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
+}
+
+/* TGSI_OPCODE_DSLT (CPU Only) */
+static void
+dslt_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
+}
+
+/* TGSI_OPCODE_DSNE (CPU Only) */
+static void
+dsne_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
+}
+
+/* Double Reciprocal squareroot (CPU Only) */
+static void
+drecip_sqrt_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->dbl_bld,
+ emit_data->args[0]);
+}
+
+/* Double Squareroot (CPU Only) */
+static void
+dsqrt_emit_cpu(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
+ emit_data->args[0]);
+}
+
void
lp_set_default_actions_cpu(
struct lp_build_tgsi_context * bld_base)
@@ -1864,4 +2103,14 @@ lp_set_default_actions_cpu(
bld_base->op_actions[TGSI_OPCODE_XOR].emit = xor_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
+
+ bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
+ bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
+
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
index fc7fdbdd231..463d44eb450 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
@@ -71,6 +71,11 @@ struct lp_build_emit_data {
*/
unsigned chan;
+ /**
+ * This is used to specify the src channel to read from for doubles.
+ */
+ unsigned src_chan;
+
/** The lp_build_tgsi_action::emit 'executes' the opcode and writes the
* results to this array.
*/
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
index 55acea83799..906a1745551 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
@@ -462,7 +462,7 @@ analyse_instruction(struct analysis_context *ctx,
}
-static INLINE void
+static inline void
dump_info(const struct tgsi_token *tokens,
struct lp_tgsi_info *info)
{
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 268379e7d13..fae604e2f9c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -106,7 +106,7 @@ emit_dump_reg(struct gallivm_state *gallivm,
* Return the context for the current function.
* (always 'main', if shader doesn't do any function calls)
*/
-static INLINE struct function_ctx *
+static inline struct function_ctx *
func_ctx(struct lp_exec_mask *mask)
{
assert(mask->function_stack_size > 0);
@@ -120,7 +120,7 @@ func_ctx(struct lp_exec_mask *mask)
* no loop inside the current function, but we were inside
* a loop inside another function, from which this one was called.
*/
-static INLINE boolean
+static inline boolean
mask_has_loop(struct lp_exec_mask *mask)
{
int i;
@@ -138,7 +138,7 @@ mask_has_loop(struct lp_exec_mask *mask)
* no switch in the current function, but we were inside
* a switch inside another function, from which this one was called.
*/
-static INLINE boolean
+static inline boolean
mask_has_switch(struct lp_exec_mask *mask)
{
int i;
@@ -156,7 +156,7 @@ mask_has_switch(struct lp_exec_mask *mask)
* no conditional in the current function, but we were inside
* a conditional inside another function, from which this one was called.
*/
-static INLINE boolean
+static inline boolean
mask_has_cond(struct lp_exec_mask *mask)
{
int i;
@@ -947,15 +947,20 @@ static LLVMValueRef
build_gather(struct lp_build_tgsi_context *bld_base,
LLVMValueRef base_ptr,
LLVMValueRef indexes,
- LLVMValueRef overflow_mask)
+ LLVMValueRef overflow_mask,
+ LLVMValueRef indexes2)
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
struct lp_build_context *bld = &bld_base->base;
- LLVMValueRef res = bld->undef;
+ LLVMValueRef res;
unsigned i;
+ if (indexes2)
+ res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
+ else
+ res = bld->undef;
/*
* overflow_mask is a vector telling us which channels
* in the vector overflowed. We use the overflow behavior for
@@ -976,26 +981,47 @@ build_gather(struct lp_build_tgsi_context *bld_base,
* control flow.
*/
indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
+ if (indexes2)
+ indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
}
/*
* Loop over elements of index_vec, load scalar value, insert it into 'res'.
*/
- for (i = 0; i < bld->type.length; i++) {
- LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
- LLVMValueRef index = LLVMBuildExtractElement(builder,
- indexes, ii, "");
+ for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
+ LLVMValueRef si, di;
+ LLVMValueRef index;
LLVMValueRef scalar_ptr, scalar;
+ di = lp_build_const_int32(bld->gallivm, i);
+ if (indexes2)
+ si = lp_build_const_int32(bld->gallivm, i >> 1);
+ else
+ si = di;
+
+ if (indexes2 && (i & 1)) {
+ index = LLVMBuildExtractElement(builder,
+ indexes2, si, "");
+ } else {
+ index = LLVMBuildExtractElement(builder,
+ indexes, si, "");
+ }
scalar_ptr = LLVMBuildGEP(builder, base_ptr,
&index, 1, "gather_ptr");
scalar = LLVMBuildLoad(builder, scalar_ptr, "");
- res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
+ res = LLVMBuildInsertElement(builder, res, scalar, di, "");
}
if (overflow_mask) {
- res = lp_build_select(bld, overflow_mask, bld->zero, res);
+ if (indexes2) {
+ res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
+ overflow_mask = LLVMBuildSExt(builder, overflow_mask,
+ bld_base->dbl_bld.int_vec_type, "");
+ res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
+ bld_base->dbl_bld.zero, res);
+ } else
+ res = lp_build_select(bld, overflow_mask, bld->zero, res);
}
return res;
@@ -1139,8 +1165,10 @@ stype_to_fetch(struct lp_build_tgsi_context * bld_base,
case TGSI_TYPE_SIGNED:
bld_fetch = &bld_base->int_bld;
break;
- case TGSI_TYPE_VOID:
case TGSI_TYPE_DOUBLE:
+ bld_fetch = &bld_base->dbl_bld;
+ break;
+ case TGSI_TYPE_VOID:
default:
assert(0);
bld_fetch = NULL;
@@ -1216,6 +1244,7 @@ emit_fetch_constant(
lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
LLVMValueRef index_vec; /* index into the const buffer */
LLVMValueRef overflow_mask;
+ LLVMValueRef index_vec2 = NULL;
indirect_index = get_indirect_index(bld,
reg->Register.File,
@@ -1235,22 +1264,33 @@ emit_fetch_constant(
index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+ if (stype == TGSI_TYPE_DOUBLE) {
+ LLVMValueRef swizzle_vec2;
+ swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
+ index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
+ }
/* Gather values from the constant buffer */
- res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask);
+ res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
}
else {
LLVMValueRef index; /* index into the const buffer */
LLVMValueRef scalar, scalar_ptr;
-
+ struct lp_build_context *bld_broad = &bld_base->base;
index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
&index, 1, "");
+ if (stype == TGSI_TYPE_DOUBLE) {
+ LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
+ scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
+ bld_broad = &bld_base->dbl_bld;
+ }
scalar = LLVMBuildLoad(builder, scalar_ptr, "");
- res = lp_build_broadcast_scalar(&bld_base->base, scalar);
+ res = lp_build_broadcast_scalar(bld_broad, scalar);
}
- if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
+ if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
@@ -1258,6 +1298,39 @@ emit_fetch_constant(
return res;
}
+/**
+ * Fetch double values from two separate channels.
+ * Doubles are stored split across two channels, like xy and zw.
+ * This function creates a set of 16 floats,
+ * extracts the values from the two channels,
+ * puts them in the correct place, then casts to 8 doubles.
+ */
+static LLVMValueRef
+emit_fetch_double(
+ struct lp_build_tgsi_context * bld_base,
+ enum tgsi_opcode_type stype,
+ LLVMValueRef input,
+ LLVMValueRef input2)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef res;
+ struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
+ int i;
+ LLVMValueRef shuffles[16];
+ int len = bld_base->base.type.length * 2;
+ assert(len <= 16);
+
+ for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+ shuffles[i] = lp_build_const_int32(gallivm, i / 2);
+ shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+ }
+ res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
+
+ return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
+}
+
static LLVMValueRef
emit_fetch_immediate(
struct lp_build_tgsi_context * bld_base,
@@ -1281,7 +1354,7 @@ emit_fetch_immediate(
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
LLVMValueRef index_vec; /* index into the immediate register array */
-
+ LLVMValueRef index_vec2 = NULL;
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
@@ -1296,25 +1369,46 @@ emit_fetch_immediate(
indirect_index,
swizzle,
FALSE);
-
+ if (stype == TGSI_TYPE_DOUBLE)
+ index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle + 1,
+ FALSE);
/* Gather values from the immediate register array */
- res = build_gather(bld_base, imms_array, index_vec, NULL);
+ res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
} else {
LLVMValueRef lindex = lp_build_const_int32(gallivm,
reg->Register.Index * 4 + swizzle);
LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
bld->imms_array, &lindex, 1, "");
res = LLVMBuildLoad(builder, imms_ptr, "");
+
+ if (stype == TGSI_TYPE_DOUBLE) {
+ LLVMValueRef lindex1;
+ LLVMValueRef imms_ptr2;
+ LLVMValueRef res2;
+
+ lindex1 = lp_build_const_int32(gallivm,
+ reg->Register.Index * 4 + swizzle + 1);
+ imms_ptr2 = LLVMBuildGEP(builder,
+ bld->imms_array, &lindex1, 1, "");
+ res2 = LLVMBuildLoad(builder, imms_ptr2, "");
+ res = emit_fetch_double(bld_base, stype, res, res2);
+ }
}
}
else {
res = bld->immediates[reg->Register.Index][swizzle];
+ if (stype == TGSI_TYPE_DOUBLE)
+ res = emit_fetch_double(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
}
if (stype == TGSI_TYPE_UNSIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
} else if (stype == TGSI_TYPE_SIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+ } else if (stype == TGSI_TYPE_DOUBLE) {
+ res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
}
return res;
}
@@ -1334,6 +1428,7 @@ emit_fetch_input(
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
LLVMValueRef index_vec; /* index into the input reg array */
+ LLVMValueRef index_vec2 = NULL;
LLVMValueRef inputs_array;
LLVMTypeRef fptr_type;
@@ -1346,23 +1441,43 @@ emit_fetch_input(
indirect_index,
swizzle,
TRUE);
-
+ if (stype == TGSI_TYPE_DOUBLE) {
+ index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle + 1,
+ TRUE);
+ }
/* cast inputs_array pointer to float* */
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
/* Gather values from the input register array */
- res = build_gather(bld_base, inputs_array, index_vec, NULL);
+ res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
} else {
if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
LLVMValueRef lindex = lp_build_const_int32(gallivm,
reg->Register.Index * 4 + swizzle);
- LLVMValueRef input_ptr = LLVMBuildGEP(builder,
- bld->inputs_array, &lindex, 1, "");
+ LLVMValueRef input_ptr = LLVMBuildGEP(builder,
+ bld->inputs_array, &lindex, 1, "");
+
res = LLVMBuildLoad(builder, input_ptr, "");
+ if (stype == TGSI_TYPE_DOUBLE) {
+ LLVMValueRef lindex1;
+ LLVMValueRef input_ptr2;
+ LLVMValueRef res2;
+
+ lindex1 = lp_build_const_int32(gallivm,
+ reg->Register.Index * 4 + swizzle + 1);
+ input_ptr2 = LLVMBuildGEP(builder,
+ bld->inputs_array, &lindex1, 1, "");
+ res2 = LLVMBuildLoad(builder, input_ptr2, "");
+ res = emit_fetch_double(bld_base, stype, res, res2);
+ }
}
else {
res = bld->inputs[reg->Register.Index][swizzle];
+ if (stype == TGSI_TYPE_DOUBLE)
+ res = emit_fetch_double(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
}
}
@@ -1372,6 +1487,8 @@ emit_fetch_input(
res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
} else if (stype == TGSI_TYPE_SIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+ } else if (stype == TGSI_TYPE_DOUBLE) {
+ res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
}
return res;
@@ -1413,7 +1530,7 @@ emit_fetch_gs_input(
} else {
attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
}
-
+
if (reg->Dimension.Indirect) {
vertex_index = get_indirect_index(bld,
reg->Register.File,
@@ -1436,6 +1553,8 @@ emit_fetch_gs_input(
res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
} else if (stype == TGSI_TYPE_SIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+ } else if (stype == TGSI_TYPE_DOUBLE) {
+ res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
}
return res;
@@ -1455,7 +1574,7 @@ emit_fetch_temporary(
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
- LLVMValueRef index_vec; /* index into the temp reg array */
+ LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
LLVMValueRef temps_array;
LLVMTypeRef fptr_type;
@@ -1468,21 +1587,35 @@ emit_fetch_temporary(
indirect_index,
swizzle,
TRUE);
+ if (stype == TGSI_TYPE_DOUBLE) {
+ index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle + 1,
+ TRUE);
+ }
/* cast temps_array pointer to float* */
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
/* Gather values from the temporary register array */
- res = build_gather(bld_base, temps_array, index_vec, NULL);
+ res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
}
else {
LLVMValueRef temp_ptr;
temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
res = LLVMBuildLoad(builder, temp_ptr, "");
+
+ if (stype == TGSI_TYPE_DOUBLE) {
+ LLVMValueRef temp_ptr2, res2;
+
+ temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
+ res2 = LLVMBuildLoad(builder, temp_ptr2, "");
+ res = emit_fetch_double(bld_base, stype, res, res2);
+ }
}
- if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
+ if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
@@ -1648,6 +1781,50 @@ emit_fetch_predicate(
}
}
+/**
+ * store an array of 8 doubles into two arrays of 8 floats
+ * i.e.
+ * value is d0, d1, d2, d3 etc.
+ * each double has high and low pieces x, y
+ * so gets stored into the separate channels as:
+ * chan_ptr = d0.x, d1.x, d2.x, d3.x
+ * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
+ */
+static void
+emit_store_double_chan(struct lp_build_tgsi_context *bld_base,
+ int dtype,
+ LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
+ LLVMValueRef pred,
+ LLVMValueRef value)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *float_bld = &bld_base->base;
+ int i;
+ LLVMValueRef temp, temp2;
+ LLVMValueRef shuffles[8];
+ LLVMValueRef shuffles2[8];
+
+ for (i = 0; i < bld_base->base.type.length; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, i * 2);
+ shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+ }
+
+ temp = LLVMBuildShuffleVector(builder, value,
+ LLVMGetUndef(LLVMTypeOf(value)),
+ LLVMConstVector(shuffles,
+ bld_base->base.type.length),
+ "");
+ temp2 = LLVMBuildShuffleVector(builder, value,
+ LLVMGetUndef(LLVMTypeOf(value)),
+ LLVMConstVector(shuffles2,
+ bld_base->base.type.length),
+ "");
+
+ lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp, chan_ptr);
+ lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp2, chan_ptr2);
+}
/**
* Register store.
@@ -1683,6 +1860,11 @@ emit_store_chan(
}
if (reg->Register.Indirect) {
+ /*
+ * Currently the mesa/st doesn't generate indirect stores
+ * to doubles, it normally uses MOV to do indirect stores.
+ */
+ assert(dtype != TGSI_TYPE_DOUBLE);
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
@@ -1721,13 +1903,23 @@ emit_store_chan(
else {
LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
chan_index);
- lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
+
+ if (dtype == TGSI_TYPE_DOUBLE) {
+ LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
+ chan_index + 1);
+ emit_store_double_chan(bld_base, dtype, out_ptr, out_ptr2,
+ pred, value);
+ } else
+ lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
}
break;
case TGSI_FILE_TEMPORARY:
/* Temporaries are always stored as floats */
- value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+ if (dtype != TGSI_TYPE_DOUBLE)
+ value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+ else
+ value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
if (reg->Register.Indirect) {
LLVMValueRef index_vec; /* indexes into the temp registers */
@@ -1749,7 +1941,16 @@ emit_store_chan(
else {
LLVMValueRef temp_ptr;
temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
- lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
+
+ if (dtype == TGSI_TYPE_DOUBLE) {
+ LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
+ reg->Register.Index,
+ chan_index + 1);
+ emit_store_double_chan(bld_base, dtype, temp_ptr, temp_ptr2,
+ pred, value);
+ }
+ else
+ lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
}
break;
@@ -1818,13 +2019,16 @@ emit_store(
{
unsigned chan_index;
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
-
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
if(info->num_dst) {
LLVMValueRef pred[TGSI_NUM_CHANNELS];
emit_fetch_predicate( bld, inst, pred );
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+
+ if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
+ continue;
emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
}
}
@@ -2823,6 +3027,7 @@ void lp_emit_immediate_soa(
lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
break;
+ case TGSI_IMM_FLOAT64:
case TGSI_IMM_UINT32:
for( i = 0; i < size; ++i ) {
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
@@ -2857,8 +3062,7 @@ void lp_emit_immediate_soa(
} else {
/* simply copy the immediate values into the next immediates[] slot */
unsigned i;
- const uint size = imm->Immediate.NrTokens - 1;
- assert(size <= 4);
+ assert(imm->Immediate.NrTokens - 1 <= 4);
assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
for(i = 0; i < 4; ++i )
@@ -3674,6 +3878,12 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
+ {
+ struct lp_type dbl_type;
+ dbl_type = type;
+ dbl_type.width *= 2;
+ lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
+ }
bld.mask = mask;
bld.inputs = inputs;
bld.outputs = outputs;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 191cf92d2d1..7fb449fd03f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -173,7 +173,7 @@ struct lp_build_context
*
* e.g. With PIPE_FORMAT_R32G32B32A32_FLOAT returns an lp_type with float[4]
*/
-static INLINE void
+static inline void
lp_type_from_format_desc(struct lp_type* type, const struct util_format_description *format_desc)
{
assert(format_desc->is_array);
@@ -189,14 +189,14 @@ lp_type_from_format_desc(struct lp_type* type, const struct util_format_descript
}
-static INLINE void
+static inline void
lp_type_from_format(struct lp_type* type, enum pipe_format format)
{
lp_type_from_format_desc(type, util_format_description(format));
}
-static INLINE unsigned
+static inline unsigned
lp_type_width(struct lp_type type)
{
return type.width * type.length;
@@ -204,7 +204,7 @@ lp_type_width(struct lp_type type)
/** Create scalar float type */
-static INLINE struct lp_type
+static inline struct lp_type
lp_type_float(unsigned width)
{
struct lp_type res_type;
@@ -220,7 +220,7 @@ lp_type_float(unsigned width)
/** Create vector of float type */
-static INLINE struct lp_type
+static inline struct lp_type
lp_type_float_vec(unsigned width, unsigned total_width)
{
struct lp_type res_type;
@@ -236,7 +236,7 @@ lp_type_float_vec(unsigned width, unsigned total_width)
/** Create scalar int type */
-static INLINE struct lp_type
+static inline struct lp_type
lp_type_int(unsigned width)
{
struct lp_type res_type;
@@ -251,7 +251,7 @@ lp_type_int(unsigned width)
/** Create vector int type */
-static INLINE struct lp_type
+static inline struct lp_type
lp_type_int_vec(unsigned width, unsigned total_width)
{
struct lp_type res_type;
@@ -266,7 +266,7 @@ lp_type_int_vec(unsigned width, unsigned total_width)
/** Create scalar uint type */
-static INLINE struct lp_type
+static inline struct lp_type
lp_type_uint(unsigned width)
{
struct lp_type res_type;
@@ -280,7 +280,7 @@ lp_type_uint(unsigned width)
/** Create vector uint type */
-static INLINE struct lp_type
+static inline struct lp_type
lp_type_uint_vec(unsigned width, unsigned total_width)
{
struct lp_type res_type;
@@ -293,7 +293,7 @@ lp_type_uint_vec(unsigned width, unsigned total_width)
}
-static INLINE struct lp_type
+static inline struct lp_type
lp_type_unorm(unsigned width, unsigned total_width)
{
struct lp_type res_type;
@@ -307,7 +307,7 @@ lp_type_unorm(unsigned width, unsigned total_width)
}
-static INLINE struct lp_type
+static inline struct lp_type
lp_type_fixed(unsigned width, unsigned total_width)
{
struct lp_type res_type;
@@ -322,7 +322,7 @@ lp_type_fixed(unsigned width, unsigned total_width)
}
-static INLINE struct lp_type
+static inline struct lp_type
lp_type_ufixed(unsigned width, unsigned total_width)
{
struct lp_type res_type;
@@ -364,7 +364,7 @@ LLVMTypeRef
lp_build_int_vec_type(struct gallivm_state *gallivm, struct lp_type type);
-static INLINE struct lp_type
+static inline struct lp_type
lp_float32_vec4_type(void)
{
struct lp_type type;
@@ -380,7 +380,7 @@ lp_float32_vec4_type(void)
}
-static INLINE struct lp_type
+static inline struct lp_type
lp_int32_vec4_type(void)
{
struct lp_type type;
@@ -396,7 +396,7 @@ lp_int32_vec4_type(void)
}
-static INLINE struct lp_type
+static inline struct lp_type
lp_unorm8_vec4_type(void)
{
struct lp_type type;
diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index 6a124f7d716..95eed2698bc 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -231,18 +231,53 @@ hud_draw_string(struct hud_context *hud, unsigned x, unsigned y,
}
static void
-number_to_human_readable(uint64_t num, boolean is_in_bytes, char *out)
+number_to_human_readable(uint64_t num, uint64_t max_value,
+ enum pipe_driver_query_type type, char *out)
{
static const char *byte_units[] =
- {"", " KB", " MB", " GB", " TB", " PB", " EB"};
+ {" B", " KB", " MB", " GB", " TB", " PB", " EB"};
static const char *metric_units[] =
{"", " k", " M", " G", " T", " P", " E"};
- const char **units = is_in_bytes ? byte_units : metric_units;
- double divisor = is_in_bytes ? 1024 : 1000;
- int unit = 0;
+ static const char *time_units[] =
+ {" us", " ms", " s"}; /* based on microseconds */
+ static const char *hz_units[] =
+ {" Hz", " KHz", " MHz", " GHz"};
+ static const char *percent_units[] = {"%"};
+
+ const char **units;
+ unsigned max_unit;
+ double divisor = (type == PIPE_DRIVER_QUERY_TYPE_BYTES) ? 1024 : 1000;
+ unsigned unit = 0;
double d = num;
- while (d > divisor) {
+ switch (type) {
+ case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+ max_unit = ARRAY_SIZE(time_units)-1;
+ units = time_units;
+ break;
+ case PIPE_DRIVER_QUERY_TYPE_PERCENTAGE:
+ max_unit = ARRAY_SIZE(percent_units)-1;
+ units = percent_units;
+ break;
+ case PIPE_DRIVER_QUERY_TYPE_BYTES:
+ max_unit = ARRAY_SIZE(byte_units)-1;
+ units = byte_units;
+ break;
+ case PIPE_DRIVER_QUERY_TYPE_HZ:
+ max_unit = ARRAY_SIZE(hz_units)-1;
+ units = hz_units;
+ break;
+ default:
+ if (max_value == 100) {
+ max_unit = ARRAY_SIZE(percent_units)-1;
+ units = percent_units;
+ } else {
+ max_unit = ARRAY_SIZE(metric_units)-1;
+ units = metric_units;
+ }
+ }
+
+ while (d > divisor && unit < max_unit) {
d /= divisor;
unit++;
}
@@ -300,9 +335,9 @@ hud_pane_accumulate_vertices(struct hud_context *hud,
unsigned y = pane->inner_y1 + pane->inner_height * (5 - i) / 5 -
hud->font.glyph_height / 2;
- number_to_human_readable(pane->max_value * i / 5,
- pane->uses_byte_units, str);
- hud_draw_string(hud, x, y, str);
+ number_to_human_readable(pane->max_value * i / 5, pane->max_value,
+ pane->type, str);
+ hud_draw_string(hud, x, y, "%s", str);
}
/* draw info below the pane */
@@ -311,8 +346,8 @@ hud_pane_accumulate_vertices(struct hud_context *hud,
unsigned x = pane->x1 + 2;
unsigned y = pane->y2 + 2 + i*hud->font.glyph_height;
- number_to_human_readable(gr->current_value,
- pane->uses_byte_units, str);
+ number_to_human_readable(gr->current_value, pane->max_value,
+ pane->type, str);
hud_draw_string(hud, x, y, " %s: %s", gr->name, str);
i++;
}
@@ -417,8 +452,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
cso_save_blend(cso);
cso_save_depth_stencil_alpha(cso);
cso_save_fragment_shader(cso);
- cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
- cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
+ cso_save_fragment_sampler_views(cso);
+ cso_save_fragment_samplers(cso);
cso_save_rasterizer(cso);
cso_save_viewport(cso);
cso_save_stream_outputs(cso);
@@ -547,8 +582,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
cso_restore_blend(cso);
cso_restore_depth_stencil_alpha(cso);
cso_restore_fragment_shader(cso);
- cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
- cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
+ cso_restore_fragment_sampler_views(cso);
+ cso_restore_fragment_samplers(cso);
cso_restore_rasterizer(cso);
cso_restore_viewport(cso);
cso_restore_stream_outputs(cso);
@@ -869,12 +904,16 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
else if (strcmp(name, "samples-passed") == 0 &&
has_occlusion_query(hud->pipe->screen)) {
hud_pipe_query_install(pane, hud->pipe, "samples-passed",
- PIPE_QUERY_OCCLUSION_COUNTER, 0, 0, FALSE);
+ PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
+ PIPE_DRIVER_QUERY_TYPE_UINT64,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
}
else if (strcmp(name, "primitives-generated") == 0 &&
has_streamout(hud->pipe->screen)) {
hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
- PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0, FALSE);
+ PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
+ PIPE_DRIVER_QUERY_TYPE_UINT64,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
}
else {
boolean processed = FALSE;
@@ -901,7 +940,8 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
if (i < Elements(pipeline_statistics_names)) {
hud_pipe_query_install(pane, hud->pipe, name,
PIPE_QUERY_PIPELINE_STATISTICS, i,
- 0, FALSE);
+ 0, PIPE_DRIVER_QUERY_TYPE_UINT64,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
processed = TRUE;
}
}
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c
index 603aba7e8cd..f14305ea835 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -43,6 +43,7 @@ struct query_info {
struct pipe_context *pipe;
unsigned query_type;
unsigned result_index; /* unit depends on query_type */
+ enum pipe_driver_query_result_type result_type;
/* Ring of queries. If a query is busy, we use another slot. */
struct pipe_query *query[NUM_QUERIES];
@@ -62,7 +63,8 @@ query_new_value(struct hud_graph *gr)
uint64_t now = os_time_get();
if (info->last_time) {
- pipe->end_query(pipe, info->query[info->head]);
+ if (info->query[info->head])
+ pipe->end_query(pipe, info->query[info->head]);
/* read query results */
while (1) {
@@ -70,7 +72,7 @@ query_new_value(struct hud_graph *gr)
union pipe_query_result result;
uint64_t *res64 = (uint64_t *)&result;
- if (pipe->get_query_result(pipe, query, FALSE, &result)) {
+ if (query && pipe->get_query_result(pipe, query, FALSE, &result)) {
info->results_cumulative += res64[info->result_index];
info->num_results++;
@@ -88,7 +90,8 @@ query_new_value(struct hud_graph *gr)
"gallium_hud: all queries are busy after %i frames, "
"can't add another query\n",
NUM_QUERIES);
- pipe->destroy_query(pipe, info->query[info->head]);
+ if (info->query[info->head])
+ pipe->destroy_query(pipe, info->query[info->head]);
info->query[info->head] =
pipe->create_query(pipe, info->query_type, 0);
}
@@ -106,22 +109,33 @@ query_new_value(struct hud_graph *gr)
}
if (info->num_results && info->last_time + gr->pane->period <= now) {
- /* compute the average value across all frames */
- hud_graph_add_value(gr, info->results_cumulative / info->num_results);
+ uint64_t value;
+
+ switch (info->result_type) {
+ default:
+ case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE:
+ value = info->results_cumulative / info->num_results;
+ break;
+ case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE:
+ value = info->results_cumulative;
+ break;
+ }
+
+ hud_graph_add_value(gr, value);
info->last_time = now;
info->results_cumulative = 0;
info->num_results = 0;
}
-
- pipe->begin_query(pipe, info->query[info->head]);
}
else {
/* initialize */
info->last_time = now;
info->query[info->head] = pipe->create_query(pipe, info->query_type, 0);
- pipe->begin_query(pipe, info->query[info->head]);
}
+
+ if (info->query[info->head])
+ pipe->begin_query(pipe, info->query[info->head]);
}
static void
@@ -148,7 +162,8 @@ void
hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
const char *name, unsigned query_type,
unsigned result_index,
- uint64_t max_value, boolean uses_byte_units)
+ uint64_t max_value, enum pipe_driver_query_type type,
+ enum pipe_driver_query_result_type result_type)
{
struct hud_graph *gr;
struct query_info *info;
@@ -172,12 +187,12 @@ hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
info->pipe = pipe;
info->query_type = query_type;
info->result_index = result_index;
+ info->result_type = result_type;
hud_pane_add_graph(pane, gr);
if (pane->max_value < max_value)
hud_pane_set_max_value(pane, max_value);
- if (uses_byte_units)
- pane->uses_byte_units = TRUE;
+ pane->type = type;
}
boolean
@@ -187,7 +202,6 @@ hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
struct pipe_screen *screen = pipe->screen;
struct pipe_driver_query_info query;
unsigned num_queries, i;
- boolean uses_byte_units;
boolean found = FALSE;
if (!screen->get_driver_query_info)
@@ -206,9 +220,8 @@ hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
if (!found)
return FALSE;
- uses_byte_units = query.type == PIPE_DRIVER_QUERY_TYPE_BYTES;
hud_pipe_query_install(pane, pipe, query.name, query.query_type, 0,
- query.max_value.u64, uses_byte_units);
+ query.max_value.u64, query.type, query.result_type);
return TRUE;
}
diff --git a/src/gallium/auxiliary/hud/hud_private.h b/src/gallium/auxiliary/hud/hud_private.h
index 632926b87f5..01caf7b8b2c 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -66,7 +66,7 @@ struct hud_pane {
uint64_t ceiling;
unsigned dyn_ceil_last_ran;
boolean dyn_ceiling;
- boolean uses_byte_units;
+ enum pipe_driver_query_type type;
uint64_t period; /* in microseconds */
struct list_head graph_list;
@@ -89,7 +89,9 @@ void hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index);
void hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
const char *name, unsigned query_type,
unsigned result_index,
- uint64_t max_value, boolean uses_byte_units);
+ uint64_t max_value,
+ enum pipe_driver_query_type type,
+ enum pipe_driver_query_result_type result_type);
boolean hud_driver_query_install(struct hud_pane *pane,
struct pipe_context *pipe, const char *name);
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 061f39ac6f3..93dfb803389 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -184,7 +184,8 @@ ttn_emit_declaration(struct ttn_compile *c)
c->samp_types[decl->Range.First + i] = type;
}
} else {
- nir_variable *var;
+ bool is_array = (array_size > 1);
+
assert(file == TGSI_FILE_INPUT ||
file == TGSI_FILE_OUTPUT ||
file == TGSI_FILE_CONSTANT);
@@ -193,76 +194,99 @@ ttn_emit_declaration(struct ttn_compile *c)
if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension)
return;
- var = rzalloc(b->shader, nir_variable);
- var->data.driver_location = decl->Range.First;
-
- var->type = glsl_vec4_type();
- if (array_size > 1)
- var->type = glsl_array_type(var->type, array_size);
-
- switch (file) {
- case TGSI_FILE_INPUT:
- var->data.read_only = true;
- var->data.mode = nir_var_shader_in;
- var->name = ralloc_asprintf(var, "in_%d", decl->Range.First);
-
- /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
- * instead, but nothing in NIR core is looking at the value
- * currently, and this is less change to drivers.
- */
- var->data.location = decl->Semantic.Name;
- var->data.index = decl->Semantic.Index;
-
- /* We definitely need to translate the interpolation field, because
- * nir_print will decode it.
- */
- switch (decl->Interp.Interpolate) {
- case TGSI_INTERPOLATE_CONSTANT:
- var->data.interpolation = INTERP_QUALIFIER_FLAT;
- break;
- case TGSI_INTERPOLATE_LINEAR:
- var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
- break;
- case TGSI_INTERPOLATE_PERSPECTIVE:
- var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
- break;
- }
-
- exec_list_push_tail(&b->shader->inputs, &var->node);
- break;
- case TGSI_FILE_OUTPUT: {
- /* Since we can't load from outputs in the IR, we make temporaries
- * for the outputs and emit stores to the real outputs at the end of
- * the shader.
- */
- nir_register *reg = nir_local_reg_create(b->impl);
- reg->num_components = 4;
- if (array_size > 1)
- reg->num_array_elems = array_size;
-
- var->data.mode = nir_var_shader_out;
- var->name = ralloc_asprintf(var, "out_%d", decl->Range.First);
-
- var->data.location = decl->Semantic.Name;
- var->data.index = decl->Semantic.Index;
-
- for (i = 0; i < array_size; i++) {
- c->output_regs[decl->Range.First + i].offset = i;
- c->output_regs[decl->Range.First + i].reg = reg;
- }
-
- exec_list_push_tail(&b->shader->outputs, &var->node);
+ if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
+ is_array = (is_array && decl->Declaration.Array &&
+ (decl->Array.ArrayID != 0));
}
- break;
- case TGSI_FILE_CONSTANT:
- var->data.mode = nir_var_uniform;
- var->name = ralloc_asprintf(var, "uniform_%d", decl->Range.First);
- exec_list_push_tail(&b->shader->uniforms, &var->node);
- break;
- default:
- unreachable("bad declaration file");
- return;
+ for (i = 0; i < array_size; i++) {
+ unsigned idx = decl->Range.First + i;
+ nir_variable *var = rzalloc(b->shader, nir_variable);
+
+ var->data.driver_location = idx;
+
+ var->type = glsl_vec4_type();
+ if (is_array)
+ var->type = glsl_array_type(var->type, array_size);
+
+ switch (file) {
+ case TGSI_FILE_INPUT:
+ var->data.read_only = true;
+ var->data.mode = nir_var_shader_in;
+ var->name = ralloc_asprintf(var, "in_%d", idx);
+
+ /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
+ * instead, but nothing in NIR core is looking at the value
+ * currently, and this is less change to drivers.
+ */
+ var->data.location = decl->Semantic.Name;
+ var->data.index = decl->Semantic.Index;
+
+ /* We definitely need to translate the interpolation field, because
+ * nir_print will decode it.
+ */
+ switch (decl->Interp.Interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ break;
+ case TGSI_INTERPOLATE_LINEAR:
+ var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
+ break;
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+ break;
+ }
+
+ exec_list_push_tail(&b->shader->inputs, &var->node);
+ break;
+ case TGSI_FILE_OUTPUT: {
+ /* Since we can't load from outputs in the IR, we make temporaries
+ * for the outputs and emit stores to the real outputs at the end of
+ * the shader.
+ */
+ nir_register *reg = nir_local_reg_create(b->impl);
+ reg->num_components = 4;
+ if (is_array)
+ reg->num_array_elems = array_size;
+
+ var->data.mode = nir_var_shader_out;
+ var->name = ralloc_asprintf(var, "out_%d", idx);
+
+ var->data.location = decl->Semantic.Name;
+ if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+ decl->Semantic.Index == 0 &&
+ c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+ var->data.index = -1;
+ else
+ var->data.index = decl->Semantic.Index;
+
+ if (is_array) {
+ unsigned j;
+ for (j = 0; j < array_size; j++) {
+ c->output_regs[idx + j].offset = i + j;
+ c->output_regs[idx + j].reg = reg;
+ }
+ } else {
+ c->output_regs[idx].offset = i;
+ c->output_regs[idx].reg = reg;
+ }
+
+ exec_list_push_tail(&b->shader->outputs, &var->node);
+ }
+ break;
+ case TGSI_FILE_CONSTANT:
+ var->data.mode = nir_var_uniform;
+ var->name = ralloc_asprintf(var, "uniform_%d", idx);
+
+ exec_list_push_tail(&b->shader->uniforms, &var->node);
+ break;
+ default:
+ unreachable("bad declaration file");
+ return;
+ }
+
+ if (is_array)
+ break;
}
}
@@ -975,6 +999,9 @@ static void
setup_texture_info(nir_tex_instr *instr, unsigned texture)
{
switch (texture) {
+ case TGSI_TEXTURE_BUFFER:
+ instr->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+ break;
case TGSI_TEXTURE_1D:
instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
break;
@@ -1068,6 +1095,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
op = nir_texop_txb;
num_srcs = 2;
break;
+ case TGSI_OPCODE_TXB2:
+ op = nir_texop_txb;
+ num_srcs = 2;
+ samp = 2;
+ break;
case TGSI_OPCODE_TXL:
op = nir_texop_txl;
num_srcs = 2;
@@ -1078,7 +1110,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
samp = 2;
break;
case TGSI_OPCODE_TXF:
- op = nir_texop_txf;
+ if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+ tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+ op = nir_texop_txf_ms;
+ } else {
+ op = nir_texop_txf;
+ }
num_srcs = 2;
break;
case TGSI_OPCODE_TXD:
@@ -1164,6 +1201,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
src_number++;
}
+ if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
+ instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
+ instr->src[src_number].src_type = nir_tex_src_bias;
+ src_number++;
+ }
+
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
instr->src[src_number].src_type = nir_tex_src_lod;
@@ -1178,7 +1221,10 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
- instr->src[src_number].src_type = nir_tex_src_lod;
+ if (op == nir_texop_txf_ms)
+ instr->src[src_number].src_type = nir_tex_src_ms_index;
+ else
+ instr->src[src_number].src_type = nir_tex_src_lod;
src_number++;
}
@@ -1472,7 +1518,7 @@ ttn_emit_instruction(struct ttn_compile *c)
return;
nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
- for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) {
+ for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
src[i] = ttn_get_src(c, &tgsi_inst->Src[i]);
}
nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
@@ -1708,9 +1754,11 @@ ttn_add_output_stores(struct ttn_compile *c)
for (i = 0; i < array_len; i++) {
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+ unsigned loc = var->data.driver_location + i;
store->num_components = 4;
- store->const_index[0] = var->data.driver_location + i;
- store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg;
+ store->const_index[0] = loc;
+ store->src[0].reg.reg = c->output_regs[loc].reg;
+ store->src[0].reg.base_offset = c->output_regs[loc].offset;
nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
}
}
diff --git a/src/gallium/auxiliary/os/os_memory_aligned.h b/src/gallium/auxiliary/os/os_memory_aligned.h
index bb15f24ade3..f7d0e3652ed 100644
--- a/src/gallium/auxiliary/os/os_memory_aligned.h
+++ b/src/gallium/auxiliary/os/os_memory_aligned.h
@@ -55,7 +55,7 @@ add_overflow_size_t(size_t a, size_t b, size_t *res)
/**
* Return memory on given byte alignment
*/
-static INLINE void *
+static inline void *
os_malloc_aligned(size_t size, size_t alignment)
{
char *ptr, *buf;
@@ -87,7 +87,7 @@ os_malloc_aligned(size_t size, size_t alignment)
/**
* Free memory returned by align_malloc().
*/
-static INLINE void
+static inline void
os_free_aligned(void *ptr)
{
if (ptr) {
diff --git a/src/gallium/auxiliary/os/os_memory_stdc.h b/src/gallium/auxiliary/os/os_memory_stdc.h
index 806e5363568..c9fde06d8ac 100644
--- a/src/gallium/auxiliary/os/os_memory_stdc.h
+++ b/src/gallium/auxiliary/os/os_memory_stdc.h
@@ -50,7 +50,7 @@
#if defined(HAVE_POSIX_MEMALIGN)
-static INLINE void *
+static inline void *
os_malloc_aligned(size_t size, size_t alignment)
{
void *ptr;
diff --git a/src/gallium/auxiliary/os/os_mman.h b/src/gallium/auxiliary/os/os_mman.h
index e892610bdbd..2ae0027c1c2 100644
--- a/src/gallium/auxiliary/os/os_mman.h
+++ b/src/gallium/auxiliary/os/os_mman.h
@@ -58,7 +58,7 @@ extern "C" {
extern void *__mmap2(void *, size_t, int, int, int, size_t);
-static INLINE void *os_mmap(void *addr, size_t length, int prot, int flags,
+static inline void *os_mmap(void *addr, size_t length, int prot, int flags,
int fd, loff_t offset)
{
/* offset must be aligned to 4096 (not necessarily the page size) */
@@ -78,7 +78,7 @@ static INLINE void *os_mmap(void *addr, size_t length, int prot, int flags,
# define os_mmap(addr, length, prot, flags, fd, offset) \
mmap(addr, length, prot, flags, fd, offset)
-static INLINE int os_munmap(void *addr, size_t length)
+static inline int os_munmap(void *addr, size_t length)
{
/* Copied from configure code generated by AC_SYS_LARGEFILE */
#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + \
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index e9da8954885..be8adcc6cf2 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -54,7 +54,7 @@ typedef thrd_t pipe_thread;
#define PIPE_THREAD_ROUTINE( name, param ) \
int name( void *param )
-static INLINE pipe_thread pipe_thread_create( PIPE_THREAD_ROUTINE((*routine), ), void *param )
+static inline pipe_thread pipe_thread_create( PIPE_THREAD_ROUTINE((*routine), ), void *param )
{
pipe_thread thread;
#ifdef HAVE_PTHREAD
@@ -75,17 +75,17 @@ static INLINE pipe_thread pipe_thread_create( PIPE_THREAD_ROUTINE((*routine), ),
return thread;
}
-static INLINE int pipe_thread_wait( pipe_thread thread )
+static inline int pipe_thread_wait( pipe_thread thread )
{
return thrd_join( thread, NULL );
}
-static INLINE int pipe_thread_destroy( pipe_thread thread )
+static inline int pipe_thread_destroy( pipe_thread thread )
{
return thrd_detach( thread );
}
-static INLINE void pipe_thread_setname( const char *name )
+static inline void pipe_thread_setname( const char *name )
{
#if defined(HAVE_PTHREAD)
# if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
@@ -145,17 +145,17 @@ typedef cnd_t pipe_condvar;
typedef pthread_barrier_t pipe_barrier;
-static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
+static inline void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
{
pthread_barrier_init(barrier, NULL, count);
}
-static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
+static inline void pipe_barrier_destroy(pipe_barrier *barrier)
{
pthread_barrier_destroy(barrier);
}
-static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
+static inline void pipe_barrier_wait(pipe_barrier *barrier)
{
pthread_barrier_wait(barrier);
}
@@ -171,7 +171,7 @@ typedef struct {
pipe_condvar condvar;
} pipe_barrier;
-static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
+static inline void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
{
barrier->count = count;
barrier->waiters = 0;
@@ -180,14 +180,14 @@ static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
pipe_condvar_init(barrier->condvar);
}
-static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
+static inline void pipe_barrier_destroy(pipe_barrier *barrier)
{
assert(barrier->waiters == 0);
pipe_mutex_destroy(barrier->mutex);
pipe_condvar_destroy(barrier->condvar);
}
-static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
+static inline void pipe_barrier_wait(pipe_barrier *barrier)
{
pipe_mutex_lock(barrier->mutex);
@@ -225,7 +225,7 @@ typedef struct
} pipe_semaphore;
-static INLINE void
+static inline void
pipe_semaphore_init(pipe_semaphore *sema, int init_val)
{
pipe_mutex_init(sema->mutex);
@@ -233,7 +233,7 @@ pipe_semaphore_init(pipe_semaphore *sema, int init_val)
sema->counter = init_val;
}
-static INLINE void
+static inline void
pipe_semaphore_destroy(pipe_semaphore *sema)
{
pipe_mutex_destroy(sema->mutex);
@@ -241,7 +241,7 @@ pipe_semaphore_destroy(pipe_semaphore *sema)
}
/** Signal/increment semaphore counter */
-static INLINE void
+static inline void
pipe_semaphore_signal(pipe_semaphore *sema)
{
pipe_mutex_lock(sema->mutex);
@@ -251,7 +251,7 @@ pipe_semaphore_signal(pipe_semaphore *sema)
}
/** Wait for semaphore counter to be greater than zero */
-static INLINE void
+static inline void
pipe_semaphore_wait(pipe_semaphore *sema)
{
pipe_mutex_lock(sema->mutex);
@@ -277,7 +277,7 @@ typedef struct {
#define PIPE_TSD_INIT_MAGIC 0xff8adc98
-static INLINE void
+static inline void
pipe_tsd_init(pipe_tsd *tsd)
{
if (tss_create(&tsd->key, NULL/*free*/) != 0) {
@@ -286,7 +286,7 @@ pipe_tsd_init(pipe_tsd *tsd)
tsd->initMagic = PIPE_TSD_INIT_MAGIC;
}
-static INLINE void *
+static inline void *
pipe_tsd_get(pipe_tsd *tsd)
{
if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
@@ -295,7 +295,7 @@ pipe_tsd_get(pipe_tsd *tsd)
return tss_get(tsd->key);
}
-static INLINE void
+static inline void
pipe_tsd_set(pipe_tsd *tsd, void *value)
{
if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
index f7e4ca49c7c..3d2e4167222 100644
--- a/src/gallium/auxiliary/os/os_time.c
+++ b/src/gallium/auxiliary/os/os_time.c
@@ -33,11 +33,13 @@
*/
-#include "pipe/p_config.h"
+#include "pipe/p_defines.h"
+#include "util/u_atomic.h"
#if defined(PIPE_OS_UNIX)
# include /* timeval */
# include /* timeval */
+# include /* sched_yield */
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
# include
#else
@@ -92,3 +94,78 @@ os_time_sleep(int64_t usecs)
}
#endif
+
+
+int64_t
+os_time_get_absolute_timeout(uint64_t timeout)
+{
+ int64_t time, abs_timeout;
+
+ /* Also check for the type upper bound. */
+ if (timeout == PIPE_TIMEOUT_INFINITE || timeout > INT64_MAX)
+ return PIPE_TIMEOUT_INFINITE;
+
+ time = os_time_get_nano();
+ abs_timeout = time + (int64_t)timeout;
+
+ /* Check for overflow. */
+ if (abs_timeout < time)
+ return PIPE_TIMEOUT_INFINITE;
+
+ return abs_timeout;
+}
+
+
+bool
+os_wait_until_zero(volatile int *var, uint64_t timeout)
+{
+ if (!p_atomic_read(var))
+ return true;
+
+ if (!timeout)
+ return false;
+
+ if (timeout == PIPE_TIMEOUT_INFINITE) {
+ while (p_atomic_read(var)) {
+#if defined(PIPE_OS_UNIX)
+ sched_yield();
+#endif
+ }
+ return true;
+ }
+ else {
+ int64_t start_time = os_time_get_nano();
+ int64_t end_time = start_time + timeout;
+
+ while (p_atomic_read(var)) {
+ if (os_time_timeout(start_time, end_time, os_time_get_nano()))
+ return false;
+
+#if defined(PIPE_OS_UNIX)
+ sched_yield();
+#endif
+ }
+ return true;
+ }
+}
+
+
+bool
+os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout)
+{
+ if (!p_atomic_read(var))
+ return true;
+
+ if (timeout == PIPE_TIMEOUT_INFINITE)
+ return os_wait_until_zero(var, PIPE_TIMEOUT_INFINITE);
+
+ while (p_atomic_read(var)) {
+ if (os_time_get_nano() >= timeout)
+ return false;
+
+#if defined(PIPE_OS_UNIX)
+ sched_yield();
+#endif
+ }
+ return true;
+}
diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
index 4fab03cc671..9312e028809 100644
--- a/src/gallium/auxiliary/os/os_time.h
+++ b/src/gallium/auxiliary/os/os_time.h
@@ -45,7 +45,7 @@
#include "pipe/p_compiler.h"
-#ifdef __cplusplus
+#ifdef __cplusplus
extern "C" {
#endif
@@ -60,9 +60,10 @@ os_time_get_nano(void);
/*
* Get the current time in microseconds from an unknown base.
*/
-static INLINE int64_t
-os_time_get(void) {
- return os_time_get_nano() / 1000;
+static inline int64_t
+os_time_get(void)
+{
+ return os_time_get_nano() / 1000;
}
@@ -82,19 +83,56 @@ os_time_sleep(int64_t usecs);
*
* Returns true if the current time has elapsed beyond the specified interval.
*/
-static INLINE boolean
+static inline boolean
os_time_timeout(int64_t start,
int64_t end,
int64_t curr)
{
- if(start <= end)
+ if (start <= end)
return !(start <= curr && curr < end);
else
return !((start <= curr) || (curr < end));
}
-#ifdef __cplusplus
+/**
+ * Convert a relative timeout in nanoseconds into an absolute timeout,
+ * in other words, it returns current time + timeout.
+ * os_time_get_nano() must be monotonic.
+ * PIPE_TIMEOUT_INFINITE is passed through unchanged. If the calculation
+ * overflows, PIPE_TIMEOUT_INFINITE is returned.
+ */
+int64_t
+os_time_get_absolute_timeout(uint64_t timeout);
+
+
+/**
+ * Wait until the variable at the given memory location is zero.
+ *
+ * \param var variable
+ * \param timeout timeout in ns, can be anything from 0 (no wait) to
+ * PIPE_TIME_INFINITE (wait forever)
+ * \return true if the variable is zero
+ */
+bool
+os_wait_until_zero(volatile int *var, uint64_t timeout);
+
+
+/**
+ * Wait until the variable at the given memory location is zero.
+ * The timeout is the absolute time when the waiting should stop. If it is
+ * less than or equal to the current time, it only returns the status and
+ * doesn't wait. PIPE_TIME_INFINITE waits forever. This requires that
+ * os_time_get_nano is monotonic.
+ *
+ * \param var variable
+ * \param timeout the time in ns when the waiting should stop
+ * \return true if the variable is zero
+ */
+bool
+os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout);
+
+#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am
index cb6035d85c9..8c837996539 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -1,37 +1,28 @@
include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
-AM_CPPFLAGS = $(DEFINES) \
- $(VISIBILITY_CFLAGS) \
- -I$(top_srcdir)/include \
- -I$(top_srcdir)/src \
+# XXX: check if we need the gallium/winsys include
+AM_CFLAGS = \
-I$(top_srcdir)/src/loader \
- -I$(top_srcdir)/src/gallium/include \
- -I$(top_srcdir)/src/gallium/auxiliary \
- -I$(top_srcdir)/src/gallium/winsys
+ -I$(top_srcdir)/src/gallium/winsys \
+ $(GALLIUM_PIPE_LOADER_DEFINES) \
+ $(GALLIUM_CFLAGS) \
+ $(VISIBILITY_CFLAGS)
noinst_LTLIBRARIES = libpipe_loader.la
-noinst_LTLIBRARIES += libpipe_loader_client.la
+
+libpipe_loader_la_SOURCES = \
+ $(COMMON_SOURCES)
if HAVE_DRM_LOADER_GALLIUM
-AM_CFLAGS = $(LIBDRM_CFLAGS)
+AM_CFLAGS += \
+ $(LIBDRM_CFLAGS)
-COMMON_SOURCES += $(DRM_SOURCES)
+libpipe_loader_la_SOURCES += \
+ $(DRM_SOURCES)
-COMMON_LIBADD = \
+libpipe_loader_la_LIBADD = \
$(top_builddir)/src/loader/libloader.la
endif
-libpipe_loader_la_CFLAGS = \
- $(GALLIUM_PIPE_LOADER_DEFINES) \
- $(AM_CFLAGS) $(AM_CPPFLAGS)
-libpipe_loader_la_SOURCES = $(COMMON_SOURCES)
-libpipe_loader_la_LIBADD = $(COMMON_LIBADD) \
- $(GALLIUM_PIPE_LOADER_LIBS)
-
-libpipe_loader_client_la_CFLAGS = \
- $(GALLIUM_PIPE_LOADER_CLIENT_DEFINES) \
- $(AM_CFLAGS) $(AM_CPPFLAGS)
-libpipe_loader_client_la_SOURCES = $(COMMON_SOURCES)
-libpipe_loader_client_la_LIBADD = $(COMMON_LIBADD) \
- $(GALLIUM_PIPE_LOADER_CLIENT_LIBS)
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
index 9f43f17a6e2..9b8712666bb 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -36,10 +36,6 @@
#include "pipe/p_compiler.h"
#include "state_tracker/drm_driver.h"
-#ifdef HAVE_PIPE_LOADER_XLIB
-#include
-#endif
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -116,21 +112,6 @@ pipe_loader_configuration(struct pipe_loader_device *dev,
void
pipe_loader_release(struct pipe_loader_device **devs, int ndev);
-#ifdef HAVE_PIPE_LOADER_XLIB
-
-/**
- * Initialize Xlib for an associated display.
- *
- * This function is platform-specific.
- *
- * \sa pipe_loader_probe
- */
-bool
-pipe_loader_sw_probe_xlib(struct pipe_loader_device **devs, Display *display);
-
-#endif
-
-
#ifdef HAVE_PIPE_LOADER_DRI
/**
@@ -195,13 +176,9 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev);
* This function is platform-specific.
*
* \sa pipe_loader_probe
- *
- * \param auth_x If true, the pipe-loader will attempt to
- * authenticate with the X server.
*/
bool
-pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
- boolean auth_x);
+pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd);
#endif
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index ffeb29906b5..1799df7e4c5 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -35,12 +35,6 @@
#include
#include
-#ifdef HAVE_PIPE_LOADER_XCB
-
-#include
-
-#endif
-
#include "loader.h"
#include "state_tracker/drm_driver.h"
#include "pipe_loader_priv.h"
@@ -64,78 +58,8 @@ struct pipe_loader_drm_device {
static struct pipe_loader_ops pipe_loader_drm_ops;
-#ifdef HAVE_PIPE_LOADER_XCB
-
-static xcb_screen_t *
-get_xcb_screen(xcb_screen_iterator_t iter, int screen)
-{
- for (; iter.rem; --screen, xcb_screen_next(&iter))
- if (screen == 0)
- return iter.data;
-
- return NULL;
-}
-
-#endif
-
-static void
-pipe_loader_drm_x_auth(int fd)
-{
-#ifdef HAVE_PIPE_LOADER_XCB
- /* Try authenticate with the X server to give us access to devices that X
- * is running on. */
- xcb_connection_t *xcb_conn;
- const xcb_setup_t *xcb_setup;
- xcb_screen_iterator_t s;
- xcb_dri2_connect_cookie_t connect_cookie;
- xcb_dri2_connect_reply_t *connect;
- drm_magic_t magic;
- xcb_dri2_authenticate_cookie_t authenticate_cookie;
- xcb_dri2_authenticate_reply_t *authenticate;
- int screen;
-
- xcb_conn = xcb_connect(NULL, &screen);
-
- if(!xcb_conn)
- return;
-
- xcb_setup = xcb_get_setup(xcb_conn);
-
- if (!xcb_setup)
- goto disconnect;
-
- s = xcb_setup_roots_iterator(xcb_setup);
- connect_cookie = xcb_dri2_connect_unchecked(xcb_conn,
- get_xcb_screen(s, screen)->root,
- XCB_DRI2_DRIVER_TYPE_DRI);
- connect = xcb_dri2_connect_reply(xcb_conn, connect_cookie, NULL);
-
- if (!connect || connect->driver_name_length
- + connect->device_name_length == 0) {
-
- goto disconnect;
- }
-
- if (drmGetMagic(fd, &magic))
- goto disconnect;
-
- authenticate_cookie = xcb_dri2_authenticate_unchecked(xcb_conn,
- s.data->root,
- magic);
- authenticate = xcb_dri2_authenticate_reply(xcb_conn,
- authenticate_cookie,
- NULL);
- FREE(authenticate);
-
-disconnect:
- xcb_disconnect(xcb_conn);
-
-#endif
-}
-
bool
-pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
- boolean auth_x)
+pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
{
struct pipe_loader_drm_device *ddev = CALLOC_STRUCT(pipe_loader_drm_device);
int vendor_id, chip_id;
@@ -153,9 +77,6 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
ddev->base.ops = &pipe_loader_drm_ops;
ddev->fd = fd;
- if (auth_x)
- pipe_loader_drm_x_auth(fd);
-
ddev->base.driver_name = loader_get_driver_for_fd(fd, _LOADER_GALLIUM);
if (!ddev->base.driver_name)
goto fail;
@@ -168,35 +89,20 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
return false;
}
-static int
-open_drm_minor(int minor)
-{
- char path[PATH_MAX];
- snprintf(path, sizeof(path), DRM_DEV_NAME, DRM_DIR_NAME, minor);
- return open(path, O_RDWR, 0);
-}
-
static int
open_drm_render_node_minor(int minor)
{
char path[PATH_MAX];
snprintf(path, sizeof(path), DRM_RENDER_NODE_DEV_NAME_FORMAT, DRM_DIR_NAME,
minor);
- return open(path, O_RDWR, 0);
+ return loader_open_device(path);
}
int
pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
{
- int i, k, fd, num_render_node_devs;
- int j = 0;
+ int i, j, fd;
- struct {
- unsigned vendor_id;
- unsigned chip_id;
- } render_node_devs[DRM_RENDER_NODE_MAX_NODES];
-
- /* Look for render nodes first */
for (i = DRM_RENDER_NODE_MIN_MINOR, j = 0;
i <= DRM_RENDER_NODE_MAX_MINOR; i++) {
fd = open_drm_render_node_minor(i);
@@ -204,14 +110,11 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
if (fd < 0)
continue;
- if (!pipe_loader_drm_probe_fd(&dev, fd, false)) {
+ if (!pipe_loader_drm_probe_fd(&dev, fd)) {
close(fd);
continue;
}
- render_node_devs[j].vendor_id = dev->u.pci.vendor_id;
- render_node_devs[j].chip_id = dev->u.pci.chip_id;
-
if (j < ndev) {
devs[j] = dev;
} else {
@@ -221,46 +124,6 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
j++;
}
- num_render_node_devs = j;
-
- /* Next look for drm devices. */
- for (i = 0; i < DRM_MAX_MINOR; i++) {
- struct pipe_loader_device *dev;
- boolean duplicate = FALSE;
- fd = open_drm_minor(i);
- if (fd < 0)
- continue;
-
- if (!pipe_loader_drm_probe_fd(&dev, fd, true)) {
- close(fd);
- continue;
- }
-
- /* Check to make sure we aren't already accessing this device via
- * render nodes.
- */
- for (k = 0; k < num_render_node_devs; k++) {
- if (dev->u.pci.vendor_id == render_node_devs[k].vendor_id &&
- dev->u.pci.chip_id == render_node_devs[k].chip_id) {
- close(fd);
- dev->ops->release(&dev);
- duplicate = TRUE;
- break;
- }
- }
-
- if (duplicate)
- continue;
-
- if (j < ndev) {
- devs[j] = dev;
- } else {
- dev->ops->release(&dev);
- }
-
- j++;
- }
-
return j;
}
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 3d332645231..6794930193d 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -32,10 +32,6 @@
#include "sw/dri/dri_sw_winsys.h"
#include "sw/null/null_sw_winsys.h"
#include "sw/wrapper/wrapper_sw_winsys.h"
-#ifdef HAVE_PIPE_LOADER_XLIB
-/* Explicitly wrap the header to ease build without X11 headers */
-#include "sw/xlib/xlib_sw_winsys.h"
-#endif
#include "target-helpers/inline_sw_helper.h"
#include "state_tracker/drisw_api.h"
@@ -53,29 +49,6 @@ static struct sw_winsys *(*backends[])() = {
null_sw_create
};
-#ifdef HAVE_PIPE_LOADER_XLIB
-bool
-pipe_loader_sw_probe_xlib(struct pipe_loader_device **devs, Display *display)
-{
- struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
-
- if (!sdev)
- return false;
-
- sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
- sdev->base.driver_name = "swrast";
- sdev->base.ops = &pipe_loader_sw_ops;
- sdev->ws = xlib_create_sw_winsys(display);
- if (!sdev->ws) {
- FREE(sdev);
- return false;
- }
- *devs = &sdev->base;
-
- return true;
-}
-#endif
-
#ifdef HAVE_PIPE_LOADER_DRI
bool
pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 03bdce31513..ba48d461d5c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -158,7 +158,7 @@ struct pb_vtbl
/* Accessor functions for pb->vtbl:
*/
-static INLINE void *
+static inline void *
pb_map(struct pb_buffer *buf,
unsigned flags, void *flush_ctx)
{
@@ -170,7 +170,7 @@ pb_map(struct pb_buffer *buf,
}
-static INLINE void
+static inline void
pb_unmap(struct pb_buffer *buf)
{
assert(buf);
@@ -181,7 +181,7 @@ pb_unmap(struct pb_buffer *buf)
}
-static INLINE void
+static inline void
pb_get_base_buffer( struct pb_buffer *buf,
struct pb_buffer **base_buf,
pb_size *offset )
@@ -200,7 +200,7 @@ pb_get_base_buffer( struct pb_buffer *buf,
}
-static INLINE enum pipe_error
+static inline enum pipe_error
pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags)
{
assert(buf);
@@ -211,7 +211,7 @@ pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags)
}
-static INLINE void
+static inline void
pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence)
{
assert(buf);
@@ -222,7 +222,7 @@ pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence)
}
-static INLINE void
+static inline void
pb_destroy(struct pb_buffer *buf)
{
assert(buf);
@@ -232,7 +232,7 @@ pb_destroy(struct pb_buffer *buf)
buf->vtbl->destroy(buf);
}
-static INLINE void
+static inline void
pb_reference(struct pb_buffer **dst,
struct pb_buffer *src)
{
@@ -248,7 +248,7 @@ pb_reference(struct pb_buffer **dst,
* Utility function to check whether the provided alignment is consistent with
* the requested or not.
*/
-static INLINE boolean
+static inline boolean
pb_check_alignment(pb_size requested, pb_size provided)
{
if(!requested)
@@ -265,7 +265,7 @@ pb_check_alignment(pb_size requested, pb_size provided)
* Utility function to check whether the provided alignment is consistent with
* the requested or not.
*/
-static INLINE boolean
+static inline boolean
pb_check_usage(unsigned requested, unsigned provided)
{
return (requested & provided) == requested ? TRUE : FALSE;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index fc81e11b972..08935b4dec7 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -149,7 +149,7 @@ struct fenced_buffer
};
-static INLINE struct fenced_manager *
+static inline struct fenced_manager *
fenced_manager(struct pb_manager *mgr)
{
assert(mgr);
@@ -157,7 +157,7 @@ fenced_manager(struct pb_manager *mgr)
}
-static INLINE struct fenced_buffer *
+static inline struct fenced_buffer *
fenced_buffer(struct pb_buffer *buf)
{
assert(buf);
@@ -240,7 +240,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
}
-static INLINE void
+static inline void
fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
@@ -265,7 +265,7 @@ fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr,
*
* Reference count should be incremented before calling this function.
*/
-static INLINE void
+static inline void
fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
@@ -289,7 +289,7 @@ fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
*
* Returns TRUE if the buffer was detroyed.
*/
-static INLINE boolean
+static inline boolean
fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
@@ -326,7 +326,7 @@ fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
* This function will release and re-acquire the mutex, so any copy of mutable
* state must be discarded after calling it.
*/
-static INLINE enum pipe_error
+static inline enum pipe_error
fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
@@ -550,7 +550,7 @@ fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf)
* This function is a shorthand around pb_manager::create_buffer for
* fenced_buffer_create_gpu_storage_locked()'s benefit.
*/
-static INLINE boolean
+static inline boolean
fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index bf1a538bf79..b97771457d6 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -49,7 +49,7 @@ struct malloc_buffer
extern const struct pb_vtbl malloc_buffer_vtbl;
-static INLINE struct malloc_buffer *
+static inline struct malloc_buffer *
malloc_buffer(struct pb_buffer *buf)
{
assert(buf);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
index 62df2a6b9de..47cbaeb20ac 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
@@ -50,7 +50,7 @@ struct pb_alt_manager
};
-static INLINE struct pb_alt_manager *
+static inline struct pb_alt_manager *
pb_alt_manager(struct pb_manager *mgr)
{
assert(mgr);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 5023687ec04..3b35049f679 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -88,7 +88,7 @@ struct pb_cache_manager
};
-static INLINE struct pb_cache_buffer *
+static inline struct pb_cache_buffer *
pb_cache_buffer(struct pb_buffer *buf)
{
assert(buf);
@@ -96,7 +96,7 @@ pb_cache_buffer(struct pb_buffer *buf)
}
-static INLINE struct pb_cache_manager *
+static inline struct pb_cache_manager *
pb_cache_manager(struct pb_manager *mgr)
{
assert(mgr);
@@ -107,7 +107,7 @@ pb_cache_manager(struct pb_manager *mgr)
/**
* Actually destroy the buffer.
*/
-static INLINE void
+static inline void
_pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
{
struct pb_cache_manager *mgr = buf->mgr;
@@ -235,7 +235,7 @@ pb_cache_buffer_vtbl = {
};
-static INLINE int
+static inline int
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
pb_size size,
const struct pb_desc *desc)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 6236afb70d1..7ad70f293a6 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -99,7 +99,7 @@ struct pb_debug_manager
};
-static INLINE struct pb_debug_buffer *
+static inline struct pb_debug_buffer *
pb_debug_buffer(struct pb_buffer *buf)
{
assert(buf);
@@ -107,7 +107,7 @@ pb_debug_buffer(struct pb_buffer *buf)
}
-static INLINE struct pb_debug_manager *
+static inline struct pb_debug_manager *
pb_debug_manager(struct pb_manager *mgr)
{
assert(mgr);
@@ -123,7 +123,7 @@ static const uint8_t random_pattern[32] = {
};
-static INLINE void
+static inline void
fill_random_pattern(uint8_t *dst, pb_size size)
{
pb_size i = 0;
@@ -134,7 +134,7 @@ fill_random_pattern(uint8_t *dst, pb_size size)
}
-static INLINE boolean
+static inline boolean
check_random_pattern(const uint8_t *dst, pb_size size,
pb_size *min_ofs, pb_size *max_ofs)
{
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index 84eb6edda34..72099ba5850 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -65,7 +65,7 @@ struct mm_pb_manager
};
-static INLINE struct mm_pb_manager *
+static inline struct mm_pb_manager *
mm_pb_manager(struct pb_manager *mgr)
{
assert(mgr);
@@ -83,7 +83,7 @@ struct mm_buffer
};
-static INLINE struct mm_buffer *
+static inline struct mm_buffer *
mm_buffer(struct pb_buffer *buf)
{
assert(buf);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
index 77e642ada08..c20e2dca02d 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
@@ -70,7 +70,7 @@ struct pb_ondemand_manager
extern const struct pb_vtbl pb_ondemand_buffer_vtbl;
-static INLINE struct pb_ondemand_buffer *
+static inline struct pb_ondemand_buffer *
pb_ondemand_buffer(struct pb_buffer *buf)
{
assert(buf);
@@ -80,7 +80,7 @@ pb_ondemand_buffer(struct pb_buffer *buf)
return (struct pb_ondemand_buffer *)buf;
}
-static INLINE struct pb_ondemand_manager *
+static inline struct pb_ondemand_manager *
pb_ondemand_manager(struct pb_manager *mgr)
{
assert(mgr);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
index 51525b0f97c..56a5e82ece0 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
@@ -73,7 +73,7 @@ struct pool_pb_manager
};
-static INLINE struct pool_pb_manager *
+static inline struct pool_pb_manager *
pool_pb_manager(struct pb_manager *mgr)
{
assert(mgr);
@@ -93,7 +93,7 @@ struct pool_buffer
};
-static INLINE struct pool_buffer *
+static inline struct pool_buffer *
pool_buffer(struct pb_buffer *buf)
{
assert(buf);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 6a62b4f5fdb..aadeaa087f4 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -163,7 +163,7 @@ struct pb_slab_range_manager
};
-static INLINE struct pb_slab_buffer *
+static inline struct pb_slab_buffer *
pb_slab_buffer(struct pb_buffer *buf)
{
assert(buf);
@@ -171,7 +171,7 @@ pb_slab_buffer(struct pb_buffer *buf)
}
-static INLINE struct pb_slab_manager *
+static inline struct pb_slab_manager *
pb_slab_manager(struct pb_manager *mgr)
{
assert(mgr);
@@ -179,7 +179,7 @@ pb_slab_manager(struct pb_manager *mgr)
}
-static INLINE struct pb_slab_range_manager *
+static inline struct pb_slab_range_manager *
pb_slab_range_manager(struct pb_manager *mgr)
{
assert(mgr);
diff --git a/src/gallium/auxiliary/postprocess/pp_colors.c b/src/gallium/auxiliary/postprocess/pp_colors.c
index 247e4df72a4..e6ea0102eac 100644
--- a/src/gallium/auxiliary/postprocess/pp_colors.c
+++ b/src/gallium/auxiliary/postprocess/pp_colors.c
@@ -37,6 +37,7 @@ pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in,
{
struct pp_program *p = ppq->p;
+ const struct pipe_sampler_state *samplers[] = {&p->sampler_point};
pp_filter_setup_in(p, in);
pp_filter_setup_out(p, out);
@@ -44,8 +45,7 @@ pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in,
pp_filter_set_fb(p);
pp_filter_misc_state(p);
- cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
- cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+ cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->view);
cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]);
diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.c b/src/gallium/auxiliary/postprocess/pp_mlaa.c
index 147d14de95d..024a24895c8 100644
--- a/src/gallium/auxiliary/postprocess/pp_mlaa.c
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.c
@@ -141,8 +141,10 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
p->pipe->clear(p->pipe, PIPE_CLEAR_STENCIL | PIPE_CLEAR_COLOR0,
&p->clear_color, 0, 0);
- cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
- cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+ {
+ const struct pipe_sampler_state *samplers[] = {&p->sampler_point};
+ cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
+ }
cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->view);
cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]); /* offsetvs */
@@ -168,10 +170,11 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
pp_filter_set_clear_fb(p);
- cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
- cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->sampler_point);
- cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 2, &p->sampler);
- cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+ {
+ const struct pipe_sampler_state *samplers[] =
+ {&p->sampler_point, &p->sampler_point, &p->sampler};
+ cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 3, samplers);
+ }
arr[0] = p->view;
cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 3, arr);
@@ -199,9 +202,11 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
u_sampler_view_default_template(&v_tmp, in, in->format);
arr[0] = p->pipe->create_sampler_view(p->pipe, in, &v_tmp);
- cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
- cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->sampler_point);
- cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+ {
+ const struct pipe_sampler_state *samplers[] =
+ {&p->sampler_point, &p->sampler_point};
+ cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 2, samplers);
+ }
arr[1] = p->view;
cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 2, arr);
diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c
index e76ce854442..caa2062f4cf 100644
--- a/src/gallium/auxiliary/postprocess/pp_run.c
+++ b/src/gallium/auxiliary/postprocess/pp_run.c
@@ -125,8 +125,8 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
cso_save_rasterizer(cso);
cso_save_sample_mask(cso);
cso_save_min_samples(cso);
- cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
- cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+ cso_save_fragment_samplers(cso);
+ cso_save_fragment_sampler_views(cso);
cso_save_stencil_ref(cso);
cso_save_stream_outputs(cso);
cso_save_vertex_elements(cso);
@@ -196,8 +196,8 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
cso_restore_rasterizer(cso);
cso_restore_sample_mask(cso);
cso_restore_min_samples(cso);
- cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
- cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+ cso_restore_fragment_samplers(cso);
+ cso_restore_fragment_sampler_views(cso);
cso_restore_stencil_ref(cso);
cso_restore_stream_outputs(cso);
cso_restore_vertex_elements(cso);
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index f9637889187..27ee8f1242a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -510,7 +510,7 @@ void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
/**
* Immediate group 1 instructions.
*/
-static INLINE void
+static inline void
x86_group1_imm( struct x86_function *p,
unsigned op, struct x86_reg dst, int imm )
{
@@ -2196,7 +2196,7 @@ void x86_release_func( struct x86_function *p )
}
-static INLINE x86_func
+static inline x86_func
voidptr_to_x86_func(void *v)
{
union {
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 498ca824cd1..b44d917cd43 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -136,7 +136,7 @@ enum x86_target
};
/* make this read a member of x86_function if target != host is desired */
-static INLINE enum x86_target x86_target( struct x86_function* p )
+static inline enum x86_target x86_target( struct x86_function* p )
{
#ifdef PIPE_ARCH_X86
return X86_32;
@@ -147,7 +147,7 @@ static INLINE enum x86_target x86_target( struct x86_function* p )
#endif
}
-static INLINE unsigned x86_target_caps( struct x86_function* p )
+static inline unsigned x86_target_caps( struct x86_function* p )
{
return p->caps;
}
diff --git a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
index 0648e596549..d353ab81e34 100644
--- a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
@@ -27,7 +27,7 @@
* TODO: Audit the following *screen_create() - all of
* them should return the original screen on failuire.
*/
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
debug_screen_wrap(struct pipe_screen *screen)
{
#if defined(GALLIUM_RBUG)
diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
index d3c331d224d..08271a760f5 100644
--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
@@ -42,6 +42,7 @@
#if GALLIUM_RADEONSI
#include "radeon/radeon_winsys.h"
#include "radeon/drm/radeon_drm_public.h"
+#include "amdgpu/drm/amdgpu_public.h"
#include "radeonsi/si_public.h"
#endif
@@ -228,7 +229,12 @@ pipe_radeonsi_create_screen(int fd)
{
struct radeon_winsys *rw;
- rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+ /* First, try amdgpu. */
+ rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
+
+ if (!rw)
+ rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+
return rw ? debug_screen_wrap(rw->screen) : NULL;
}
#endif
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index d8cee2b2917..5f46552f6c3 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -20,7 +20,7 @@
#endif
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
{
struct pipe_screen *screen = NULL;
@@ -39,7 +39,7 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
}
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
sw_screen_create(struct sw_winsys *winsys)
{
const char *default_driver;
@@ -71,7 +71,7 @@ PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void)
return galliumsw_driver_extensions;
}
-INLINE struct pipe_screen *
+inline struct pipe_screen *
drisw_create_screen(struct drisw_loader_funcs *lf)
{
struct sw_winsys *winsys = NULL;
@@ -98,7 +98,7 @@ drisw_create_screen(struct drisw_loader_funcs *lf)
extern struct pipe_screen *ninesw_create_screen(struct pipe_screen *screen);
-INLINE struct pipe_screen *
+inline struct pipe_screen *
ninesw_create_screen(struct pipe_screen *pscreen)
{
struct sw_winsys *winsys = NULL;
diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
index 0a2e215352b..4f38ba9f919 100644
--- a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
@@ -9,7 +9,7 @@
* Try to wrap a hw screen with a software screen.
* On failure will return given screen.
*/
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
sw_screen_wrap(struct pipe_screen *screen)
{
#if defined(GALLIUM_SOFTPIPE) || defined(GALLIUM_LLVMPIPE)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index c80d7a20481..8ceb5b47584 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -48,6 +48,7 @@ struct dump_ctx
int indent;
uint indentation;
+ FILE *file;
void (*dump_printf)(struct dump_ctx *ctx, const char *format, ...);
};
@@ -58,7 +59,10 @@ dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
va_list ap;
(void)ctx;
va_start(ap, format);
- _debug_vprintf(format, ap);
+ if (ctx->file)
+ vfprintf(ctx->file, format, ap);
+ else
+ _debug_vprintf(format, ap);
va_end(ap);
}
@@ -659,9 +663,7 @@ prolog(
}
void
-tgsi_dump(
- const struct tgsi_token *tokens,
- uint flags )
+tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
{
struct dump_ctx ctx;
@@ -677,10 +679,17 @@ tgsi_dump(
ctx.indent = 0;
ctx.dump_printf = dump_ctx_printf;
ctx.indentation = 0;
+ ctx.file = file;
tgsi_iterate_shader( tokens, &ctx.iter );
}
+void
+tgsi_dump(const struct tgsi_token *tokens, uint flags)
+{
+ tgsi_dump_to_file(tokens, flags, NULL);
+}
+
struct str_dump_ctx
{
struct dump_ctx base;
@@ -733,6 +742,7 @@ tgsi_dump_str(
ctx.base.indent = 0;
ctx.base.dump_printf = &str_dump_ctx_printf;
ctx.base.indentation = 0;
+ ctx.base.file = NULL;
ctx.str = str;
ctx.str[0] = 0;
@@ -756,6 +766,7 @@ tgsi_dump_instruction_str(
ctx.base.indent = 0;
ctx.base.dump_printf = &str_dump_ctx_printf;
ctx.base.indentation = 0;
+ ctx.base.file = NULL;
ctx.str = str;
ctx.str[0] = 0;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index bc873a54ae9..7c8f92ee7bc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -32,6 +32,8 @@
#include "pipe/p_defines.h"
#include "pipe/p_shader_tokens.h"
+#include
+
#if defined __cplusplus
extern "C" {
#endif
@@ -43,6 +45,9 @@ tgsi_dump_str(
char *str,
size_t size);
+void
+tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file);
+
void
tgsi_dump(
const struct tgsi_token *tokens,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 44000ffdb6c..75cd0d53c5a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -735,7 +735,7 @@ static const union tgsi_exec_channel M128Vec = {
* not lead to crashes, etc. But when debugging, it's helpful to catch
* them.
*/
-static INLINE void
+static inline void
check_inf_or_nan(const union tgsi_exec_channel *chan)
{
assert(!util_is_inf_or_nan((chan)->f[0]));
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 208640cfd46..5d56aab2216 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -213,7 +213,7 @@ struct tgsi_sampler
* input register files, this is the stride between two 1D
* arrays.
*/
-#define TGSI_EXEC_MAX_INPUT_ATTRIBS PIPE_MAX_SHADER_INPUTS
+#define TGSI_EXEC_MAX_INPUT_ATTRIBS 32
/* The maximum number of bytes per constant buffer.
*/
@@ -386,7 +386,7 @@ boolean
tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst);
-static INLINE void
+static inline void
tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
{
mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
@@ -395,7 +395,7 @@ tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
/** Set execution mask values prior to executing the shader */
-static INLINE void
+static inline void
tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
boolean ch0, boolean ch1, boolean ch2, boolean ch3)
{
@@ -414,7 +414,7 @@ tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
const unsigned *buf_sizes);
-static INLINE int
+static inline int
tgsi_exec_get_shader_param(enum pipe_shader_cap param)
{
switch(param) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 929531109e5..fb29ea0d53d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -316,7 +316,7 @@ tgsi_get_processor_name( uint processor )
*
* MOV and UCMP is special so return VOID
*/
-static INLINE enum tgsi_opcode_type
+static inline enum tgsi_opcode_type
tgsi_opcode_infer_type( uint opcode )
{
switch (opcode) {
@@ -374,7 +374,34 @@ tgsi_opcode_infer_type( uint opcode )
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_IMSB:
+ case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_DSNE:
return TGSI_TYPE_SIGNED;
+ case TGSI_OPCODE_DADD:
+ case TGSI_OPCODE_DABS:
+ case TGSI_OPCODE_DFMA:
+ case TGSI_OPCODE_DNEG:
+ case TGSI_OPCODE_DMUL:
+ case TGSI_OPCODE_DMAX:
+ case TGSI_OPCODE_DMIN:
+ case TGSI_OPCODE_DRCP:
+ case TGSI_OPCODE_DSQRT:
+ case TGSI_OPCODE_DMAD:
+ case TGSI_OPCODE_DLDEXP:
+ case TGSI_OPCODE_DFRACEXP:
+ case TGSI_OPCODE_DFRAC:
+ case TGSI_OPCODE_DRSQ:
+ case TGSI_OPCODE_DTRUNC:
+ case TGSI_OPCODE_DCEIL:
+ case TGSI_OPCODE_DFLR:
+ case TGSI_OPCODE_DROUND:
+ case TGSI_OPCODE_DSSG:
+ case TGSI_OPCODE_F2D:
+ case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_U2D:
+ return TGSI_TYPE_DOUBLE;
default:
return TGSI_TYPE_FLOAT;
}
@@ -391,6 +418,7 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_TXF:
case TGSI_OPCODE_BREAKC:
case TGSI_OPCODE_U2F:
+ case TGSI_OPCODE_U2D:
case TGSI_OPCODE_UADD:
case TGSI_OPCODE_SWITCH:
case TGSI_OPCODE_CASE:
@@ -400,10 +428,12 @@ tgsi_opcode_infer_src_type( uint opcode )
return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_I2F:
+ case TGSI_OPCODE_I2D:
return TGSI_TYPE_SIGNED;
case TGSI_OPCODE_ARL:
case TGSI_OPCODE_ARR:
case TGSI_OPCODE_TXQ_LZ:
+ case TGSI_OPCODE_F2D:
case TGSI_OPCODE_F2I:
case TGSI_OPCODE_F2U:
case TGSI_OPCODE_FSEQ:
@@ -412,6 +442,14 @@ tgsi_opcode_infer_src_type( uint opcode )
case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_UCMP:
return TGSI_TYPE_FLOAT;
+ case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_D2U:
+ case TGSI_OPCODE_D2I:
+ case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_DSNE:
+ return TGSI_TYPE_DOUBLE;
default:
return tgsi_opcode_infer_type(opcode);
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 1162b265522..0729b5d2426 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -36,7 +36,7 @@ tgsi_parse_init(
const struct tgsi_token *tokens )
{
ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[0];
- if( ctx->FullHeader.Header.HeaderSize >= 2 ) {
+ if (ctx->FullHeader.Header.HeaderSize >= 2) {
ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[1];
}
else {
@@ -69,7 +69,7 @@ tgsi_parse_end_of_tokens(
* warnings. The warnings seem harmless on x86 but on PPC they cause
* real failures.
*/
-static INLINE void
+static inline void
copy_token(void *dst, const void *src)
{
memcpy(dst, src, 4);
@@ -113,11 +113,11 @@ tgsi_parse_token(
next_token(ctx, &decl->Dim);
}
- if( decl->Declaration.Interpolate ) {
+ if (decl->Declaration.Interpolate) {
next_token( ctx, &decl->Interp );
}
- if( decl->Declaration.Semantic ) {
+ if (decl->Declaration.Semantic) {
next_token( ctx, &decl->Semantic );
}
@@ -129,7 +129,7 @@ tgsi_parse_token(
next_token(ctx, &decl->SamplerView);
}
- if( decl->Declaration.Array ) {
+ if (decl->Declaration.Array) {
next_token(ctx, &decl->Array);
}
@@ -190,21 +190,21 @@ tgsi_parse_token(
if (inst->Instruction.Texture) {
next_token( ctx, &inst->Texture);
- for( i = 0; i < inst->Texture.NumOffsets; i++ ) {
+ for (i = 0; i < inst->Texture.NumOffsets; i++) {
next_token( ctx, &inst->TexOffsets[i] );
}
}
assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
- for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+ for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
next_token( ctx, &inst->Dst[i].Register );
- if( inst->Dst[i].Register.Indirect )
+ if (inst->Dst[i].Register.Indirect)
next_token( ctx, &inst->Dst[i].Indirect );
- if( inst->Dst[i].Register.Dimension ) {
+ if (inst->Dst[i].Register.Dimension) {
next_token( ctx, &inst->Dst[i].Dimension );
/*
@@ -212,21 +212,21 @@ tgsi_parse_token(
*/
assert( !inst->Dst[i].Dimension.Dimension );
- if( inst->Dst[i].Dimension.Indirect )
+ if (inst->Dst[i].Dimension.Indirect)
next_token( ctx, &inst->Dst[i].DimIndirect );
}
}
assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
- for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+ for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
next_token( ctx, &inst->Src[i].Register );
- if( inst->Src[i].Register.Indirect )
+ if (inst->Src[i].Register.Indirect)
next_token( ctx, &inst->Src[i].Indirect );
- if( inst->Src[i].Register.Dimension ) {
+ if (inst->Src[i].Register.Dimension) {
next_token( ctx, &inst->Src[i].Dimension );
/*
@@ -234,7 +234,7 @@ tgsi_parse_token(
*/
assert( !inst->Src[i].Dimension.Dimension );
- if( inst->Src[i].Dimension.Indirect )
+ if (inst->Src[i].Dimension.Indirect)
next_token( ctx, &inst->Src[i].DimIndirect );
}
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index cd4b2afdb8b..35e1c7cfd62 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -133,7 +133,7 @@ void
tgsi_parse_token(
struct tgsi_parse_context *ctx );
-static INLINE unsigned
+static inline unsigned
tgsi_num_tokens(const struct tgsi_token *tokens)
{
struct tgsi_header header;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index be4851f5dcb..d14372feb30 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -63,7 +63,7 @@ struct sanity_check_ctx
boolean print;
};
-static INLINE unsigned
+static inline unsigned
scan_register_key(const scan_register *reg)
{
unsigned key = reg->file;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 6b6a14f55f5..8271ea08177 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -203,7 +203,7 @@ const char *tgsi_immediate_type_names[4] =
};
-static INLINE void
+static inline void
tgsi_strings_check(void)
{
STATIC_ASSERT(Elements(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index 39d7688ab3b..ceb7c2e0f46 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -94,7 +94,7 @@ struct tgsi_transform_context
/**
* Helper for emitting temporary register declarations.
*/
-static INLINE void
+static inline void
tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
unsigned index)
{
@@ -108,7 +108,7 @@ tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
}
-static INLINE void
+static inline void
tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
unsigned index,
unsigned sem_name, unsigned sem_index,
@@ -130,7 +130,7 @@ tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
}
-static INLINE void
+static inline void
tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
unsigned index)
{
@@ -143,7 +143,7 @@ tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
ctx->emit_declaration(ctx, &decl);
}
-static INLINE void
+static inline void
tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx,
unsigned index,
unsigned target,
@@ -165,7 +165,7 @@ tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx,
ctx->emit_declaration(ctx, &decl);
}
-static INLINE void
+static inline void
tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
float x, float y, float z, float w)
{
@@ -186,7 +186,7 @@ tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
/**
* Helper for emitting 1-operand instructions.
*/
-static INLINE void
+static inline void
tgsi_transform_op1_inst(struct tgsi_transform_context *ctx,
unsigned opcode,
unsigned dst_file,
@@ -211,7 +211,7 @@ tgsi_transform_op1_inst(struct tgsi_transform_context *ctx,
}
-static INLINE void
+static inline void
tgsi_transform_op2_inst(struct tgsi_transform_context *ctx,
unsigned opcode,
unsigned dst_file,
@@ -240,7 +240,7 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx,
}
-static INLINE void
+static inline void
tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx,
unsigned opcode,
unsigned dst_file,
@@ -282,7 +282,7 @@ tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx,
}
-static INLINE void
+static inline void
tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx,
unsigned opcode,
unsigned dst_file,
@@ -333,7 +333,7 @@ tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx,
}
-static INLINE void
+static inline void
tgsi_transform_op3_swz_inst(struct tgsi_transform_context *ctx,
unsigned opcode,
unsigned dst_file,
@@ -395,7 +395,7 @@ tgsi_transform_op3_swz_inst(struct tgsi_transform_context *ctx,
}
-static INLINE void
+static inline void
tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
unsigned src_file,
unsigned src_index,
@@ -419,7 +419,7 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
}
-static INLINE void
+static inline void
tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx,
unsigned dst_file,
unsigned dst_index,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 201a849ef95..3d213195090 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -1830,7 +1830,7 @@ void ureg_free_tokens( const struct tgsi_token *tokens )
}
-static INLINE unsigned
+static inline unsigned
pipe_shader_from_tgsi_processor(unsigned processor)
{
switch (processor) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 1891b068774..0aae550d60a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -140,7 +140,7 @@ ureg_destroy( struct ureg_program * );
/***********************************************************************
* Convenience routine:
*/
-static INLINE void *
+static inline void *
ureg_create_shader_with_so_and_destroy( struct ureg_program *p,
struct pipe_context *pipe,
const struct pipe_stream_output_info *so )
@@ -150,7 +150,7 @@ ureg_create_shader_with_so_and_destroy( struct ureg_program *p,
return result;
}
-static INLINE void *
+static inline void *
ureg_create_shader_and_destroy( struct ureg_program *p,
struct pipe_context *pipe )
{
@@ -180,7 +180,7 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *,
unsigned array_id,
unsigned array_size);
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
@@ -195,7 +195,7 @@ ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
0, 0, 1);
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_DECL_fs_input(struct ureg_program *ureg,
unsigned semantic_name,
unsigned semantic_index,
@@ -328,7 +328,7 @@ ureg_DECL_sampler_view(struct ureg_program *,
unsigned return_type_w );
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm4f( struct ureg_program *ureg,
float a, float b,
float c, float d)
@@ -341,7 +341,7 @@ ureg_imm4f( struct ureg_program *ureg,
return ureg_DECL_immediate( ureg, v, 4 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm3f( struct ureg_program *ureg,
float a, float b,
float c)
@@ -353,7 +353,7 @@ ureg_imm3f( struct ureg_program *ureg,
return ureg_DECL_immediate( ureg, v, 3 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm2f( struct ureg_program *ureg,
float a, float b)
{
@@ -363,7 +363,7 @@ ureg_imm2f( struct ureg_program *ureg,
return ureg_DECL_immediate( ureg, v, 2 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm1f( struct ureg_program *ureg,
float a)
{
@@ -372,7 +372,7 @@ ureg_imm1f( struct ureg_program *ureg,
return ureg_DECL_immediate( ureg, v, 1 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm4u( struct ureg_program *ureg,
unsigned a, unsigned b,
unsigned c, unsigned d)
@@ -385,7 +385,7 @@ ureg_imm4u( struct ureg_program *ureg,
return ureg_DECL_immediate_uint( ureg, v, 4 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm3u( struct ureg_program *ureg,
unsigned a, unsigned b,
unsigned c)
@@ -397,7 +397,7 @@ ureg_imm3u( struct ureg_program *ureg,
return ureg_DECL_immediate_uint( ureg, v, 3 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm2u( struct ureg_program *ureg,
unsigned a, unsigned b)
{
@@ -407,14 +407,14 @@ ureg_imm2u( struct ureg_program *ureg,
return ureg_DECL_immediate_uint( ureg, v, 2 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm1u( struct ureg_program *ureg,
unsigned a)
{
return ureg_DECL_immediate_uint( ureg, &a, 1 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm4i( struct ureg_program *ureg,
int a, int b,
int c, int d)
@@ -427,7 +427,7 @@ ureg_imm4i( struct ureg_program *ureg,
return ureg_DECL_immediate_int( ureg, v, 4 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm3i( struct ureg_program *ureg,
int a, int b,
int c)
@@ -439,7 +439,7 @@ ureg_imm3i( struct ureg_program *ureg,
return ureg_DECL_immediate_int( ureg, v, 3 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm2i( struct ureg_program *ureg,
int a, int b)
{
@@ -449,7 +449,7 @@ ureg_imm2i( struct ureg_program *ureg,
return ureg_DECL_immediate_int( ureg, v, 2 );
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_imm1i( struct ureg_program *ureg,
int a)
{
@@ -459,7 +459,7 @@ ureg_imm1i( struct ureg_program *ureg,
/* Where the destination register has a valid file, but an empty
* writemask.
*/
-static INLINE boolean
+static inline boolean
ureg_dst_is_empty( struct ureg_dst dst )
{
return dst.File != TGSI_FILE_NULL &&
@@ -573,7 +573,7 @@ ureg_fixup_insn_size(struct ureg_program *ureg,
#define OP00( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg ) \
+static inline void ureg_##op( struct ureg_program *ureg ) \
{ \
unsigned opcode = TGSI_OPCODE_##op; \
struct ureg_emit_insn_result insn; \
@@ -592,7 +592,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg ) \
}
#define OP01( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_src src ) \
{ \
unsigned opcode = TGSI_OPCODE_##op; \
@@ -613,7 +613,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP00_LBL( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
unsigned *label_token ) \
{ \
unsigned opcode = TGSI_OPCODE_##op; \
@@ -634,7 +634,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP01_LBL( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_src src, \
unsigned *label_token ) \
{ \
@@ -657,7 +657,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP10( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst ) \
{ \
unsigned opcode = TGSI_OPCODE_##op; \
@@ -681,7 +681,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
#define OP11( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src ) \
{ \
@@ -706,7 +706,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP12( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1 ) \
@@ -733,7 +733,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP12_TEX( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
unsigned target, \
struct ureg_src src0, \
@@ -762,7 +762,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP12_SAMPLE( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1 ) \
@@ -791,7 +791,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP13( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
@@ -820,7 +820,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP13_SAMPLE( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
@@ -851,7 +851,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP14_TEX( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
unsigned target, \
struct ureg_src src0, \
@@ -884,7 +884,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP14_SAMPLE( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
@@ -918,7 +918,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
#define OP14( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
@@ -950,7 +950,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
#define OP15( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
@@ -983,7 +983,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
}
#define OP15_SAMPLE( op ) \
-static INLINE void ureg_##op( struct ureg_program *ureg, \
+static inline void ureg_##op( struct ureg_program *ureg, \
struct ureg_dst dst, \
struct ureg_src src0, \
struct ureg_src src1, \
@@ -1026,7 +1026,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg, \
/***********************************************************************
* Inline helpers for manipulating register structs:
*/
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_negate( struct ureg_src reg )
{
assert(reg.File != TGSI_FILE_NULL);
@@ -1034,7 +1034,7 @@ ureg_negate( struct ureg_src reg )
return reg;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_abs( struct ureg_src reg )
{
assert(reg.File != TGSI_FILE_NULL);
@@ -1043,7 +1043,7 @@ ureg_abs( struct ureg_src reg )
return reg;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_swizzle( struct ureg_src reg,
int x, int y, int z, int w )
{
@@ -1065,13 +1065,13 @@ ureg_swizzle( struct ureg_src reg,
return reg;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_scalar( struct ureg_src reg, int x )
{
return ureg_swizzle(reg, x, x, x, x);
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_writemask( struct ureg_dst reg,
unsigned writemask )
{
@@ -1080,7 +1080,7 @@ ureg_writemask( struct ureg_dst reg,
return reg;
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_saturate( struct ureg_dst reg )
{
assert(reg.File != TGSI_FILE_NULL);
@@ -1088,7 +1088,7 @@ ureg_saturate( struct ureg_dst reg )
return reg;
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_predicate(struct ureg_dst reg,
boolean negate,
unsigned swizzle_x,
@@ -1106,7 +1106,7 @@ ureg_predicate(struct ureg_dst reg,
return reg;
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr )
{
assert(reg.File != TGSI_FILE_NULL);
@@ -1118,7 +1118,7 @@ ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr )
return reg;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_src_indirect( struct ureg_src reg, struct ureg_src addr )
{
assert(reg.File != TGSI_FILE_NULL);
@@ -1130,7 +1130,7 @@ ureg_src_indirect( struct ureg_src reg, struct ureg_src addr )
return reg;
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_dst_dimension( struct ureg_dst reg, int index )
{
assert(reg.File != TGSI_FILE_NULL);
@@ -1140,7 +1140,7 @@ ureg_dst_dimension( struct ureg_dst reg, int index )
return reg;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_src_dimension( struct ureg_src reg, int index )
{
assert(reg.File != TGSI_FILE_NULL);
@@ -1150,7 +1150,7 @@ ureg_src_dimension( struct ureg_src reg, int index )
return reg;
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_dst_dimension_indirect( struct ureg_dst reg, struct ureg_src addr,
int index )
{
@@ -1164,7 +1164,7 @@ ureg_dst_dimension_indirect( struct ureg_dst reg, struct ureg_src addr,
return reg;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr,
int index )
{
@@ -1178,21 +1178,21 @@ ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr,
return reg;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_src_array_offset(struct ureg_src reg, int offset)
{
reg.Index += offset;
return reg;
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_dst_array_offset( struct ureg_dst reg, int offset )
{
reg.Index += offset;
return reg;
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_dst_array_register(unsigned file,
unsigned index,
unsigned array_id)
@@ -1224,14 +1224,14 @@ ureg_dst_array_register(unsigned file,
return dst;
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_dst_register(unsigned file,
unsigned index)
{
return ureg_dst_array_register(file, index, 0);
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_dst( struct ureg_src src )
{
struct ureg_dst dst;
@@ -1265,7 +1265,7 @@ ureg_dst( struct ureg_src src )
return dst;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_src_array_register(unsigned file,
unsigned index,
unsigned array_id)
@@ -1295,14 +1295,14 @@ ureg_src_array_register(unsigned file,
return src;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_src_register(unsigned file,
unsigned index)
{
return ureg_src_array_register(file, index, 0);
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_src( struct ureg_dst dst )
{
struct ureg_src src;
@@ -1332,7 +1332,7 @@ ureg_src( struct ureg_dst dst )
-static INLINE struct ureg_dst
+static inline struct ureg_dst
ureg_dst_undef( void )
{
struct ureg_dst dst;
@@ -1362,7 +1362,7 @@ ureg_dst_undef( void )
return dst;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
ureg_src_undef( void )
{
struct ureg_src src;
@@ -1390,13 +1390,13 @@ ureg_src_undef( void )
return src;
}
-static INLINE boolean
+static inline boolean
ureg_src_is_undef( struct ureg_src src )
{
return src.File == TGSI_FILE_NULL;
}
-static INLINE boolean
+static inline boolean
ureg_dst_is_undef( struct ureg_dst dst )
{
return dst.File == TGSI_FILE_NULL;
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index 7fe8ff8145f..d77561aa7ce 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -130,12 +130,12 @@ struct translate *translate_create( const struct translate_key *key );
boolean translate_is_output_format_supported(enum pipe_format format);
-static INLINE int translate_keysize( const struct translate_key *key )
+static inline int translate_keysize( const struct translate_key *key )
{
return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element);
}
-static INLINE int translate_key_compare( const struct translate_key *a,
+static inline int translate_key_compare( const struct translate_key *a,
const struct translate_key *b )
{
int keysize_a = translate_keysize(a);
@@ -148,7 +148,7 @@ static INLINE int translate_key_compare( const struct translate_key *a,
}
-static INLINE void translate_key_sanitize( struct translate_key *a )
+static inline void translate_key_sanitize( struct translate_key *a )
{
int keysize = translate_keysize(a);
char *ptr = (char *)a;
diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c
index bb8bdcb58c4..2bed02a454b 100644
--- a/src/gallium/auxiliary/translate/translate_cache.c
+++ b/src/gallium/auxiliary/translate/translate_cache.c
@@ -49,7 +49,7 @@ struct translate_cache * translate_cache_create( void )
}
-static INLINE void delete_translates(struct translate_cache *cache)
+static inline void delete_translates(struct translate_cache *cache)
{
struct cso_hash *hash = cache->hash;
struct cso_hash_iter iter = cso_hash_first_node(hash);
@@ -70,14 +70,14 @@ void translate_cache_destroy(struct translate_cache *cache)
}
-static INLINE unsigned translate_hash_key_size(struct translate_key *key)
+static inline unsigned translate_hash_key_size(struct translate_key *key)
{
unsigned size = sizeof(struct translate_key) -
sizeof(struct translate_element) * (TRANSLATE_MAX_ATTRIBS - key->nr_elements);
return size;
}
-static INLINE unsigned create_key(struct translate_key *key)
+static inline unsigned create_key(struct translate_key *key)
{
unsigned hash_key;
unsigned size = translate_hash_key_size(key);
diff --git a/src/gallium/auxiliary/util/u_bitmask.c b/src/gallium/auxiliary/util/u_bitmask.c
index 23c93a3ebcb..b19be29a5a4 100644
--- a/src/gallium/auxiliary/util/u_bitmask.c
+++ b/src/gallium/auxiliary/util/u_bitmask.c
@@ -85,7 +85,7 @@ util_bitmask_create(void)
/**
* Resize the bitmask if necessary
*/
-static INLINE boolean
+static inline boolean
util_bitmask_resize(struct util_bitmask *bm,
unsigned minimum_index)
{
@@ -131,7 +131,7 @@ util_bitmask_resize(struct util_bitmask *bm,
/**
* Lazily update the filled.
*/
-static INLINE void
+static inline void
util_bitmask_filled_set(struct util_bitmask *bm,
unsigned index)
{
@@ -144,7 +144,7 @@ util_bitmask_filled_set(struct util_bitmask *bm,
}
}
-static INLINE void
+static inline void
util_bitmask_filled_unset(struct util_bitmask *bm,
unsigned index)
{
diff --git a/src/gallium/auxiliary/util/u_blend.h b/src/gallium/auxiliary/util/u_blend.h
index 2485c34d418..4f969778972 100644
--- a/src/gallium/auxiliary/util/u_blend.h
+++ b/src/gallium/auxiliary/util/u_blend.h
@@ -9,7 +9,7 @@
* garbage that's there. Return a blend factor that will take that into
* account.
*/
-static INLINE int
+static inline int
util_blend_dst_alpha_to_one(int factor)
{
switch (factor) {
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index e3f30557a03..9737c940936 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -158,7 +158,7 @@ util_destroy_blit(struct blit_state *ctx)
/**
* Helper function to set the fragment shaders.
*/
-static INLINE void
+static inline void
set_fragment_shader(struct blit_state *ctx, uint writemask,
enum pipe_format format,
enum pipe_texture_target pipe_tex)
@@ -194,7 +194,7 @@ set_fragment_shader(struct blit_state *ctx, uint writemask,
/**
* Helper function to set the vertex shader.
*/
-static INLINE void
+static inline void
set_vertex_shader(struct blit_state *ctx)
{
/* vertex shader - still required to provide the linkage between
@@ -546,8 +546,8 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_save_rasterizer(ctx->cso);
cso_save_sample_mask(ctx->cso);
cso_save_min_samples(ctx->cso);
- cso_save_samplers(ctx->cso, PIPE_SHADER_FRAGMENT);
- cso_save_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT);
+ cso_save_fragment_samplers(ctx->cso);
+ cso_save_fragment_sampler_views(ctx->cso);
cso_save_stream_outputs(ctx->cso);
cso_save_viewport(ctx->cso);
cso_save_framebuffer(ctx->cso);
@@ -572,8 +572,10 @@ util_blit_pixels_tex(struct blit_state *ctx,
ctx->sampler.normalized_coords = normalized;
ctx->sampler.min_img_filter = filter;
ctx->sampler.mag_img_filter = filter;
- cso_single_sampler(ctx->cso, PIPE_SHADER_FRAGMENT, 0, &ctx->sampler);
- cso_single_sampler_done(ctx->cso, PIPE_SHADER_FRAGMENT);
+ {
+ const struct pipe_sampler_state *samplers[] = {&ctx->sampler};
+ cso_set_samplers(ctx->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
+ }
/* viewport */
ctx->viewport.scale[0] = 0.5f * dst->width;
@@ -628,8 +630,8 @@ util_blit_pixels_tex(struct blit_state *ctx,
cso_restore_rasterizer(ctx->cso);
cso_restore_sample_mask(ctx->cso);
cso_restore_min_samples(ctx->cso);
- cso_restore_samplers(ctx->cso, PIPE_SHADER_FRAGMENT);
- cso_restore_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT);
+ cso_restore_fragment_samplers(ctx->cso);
+ cso_restore_fragment_sampler_views(ctx->cso);
cso_restore_viewport(ctx->cso);
cso_restore_framebuffer(ctx->cso);
cso_restore_fragment_shader(ctx->cso);
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index b5ef9a23966..85206eab1a7 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -938,7 +938,7 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
}
}
-static INLINE
+static inline
void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
enum pipe_texture_target target,
unsigned nr_samples)
@@ -976,7 +976,7 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
}
}
-static INLINE
+static inline
void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx,
enum pipe_texture_target target,
unsigned nr_samples)
@@ -1014,7 +1014,7 @@ void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx,
}
}
-static INLINE
+static inline
void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx,
enum pipe_texture_target target,
unsigned nr_samples)
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 93b0e513bd0..0cd173d6284 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -143,7 +143,7 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter);
/**
* Return the pipe context associated with a blitter context.
*/
-static INLINE
+static inline
struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
{
return blitter->pipe;
@@ -371,77 +371,77 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
*
* States not listed here are not affected by util_blitter. */
-static INLINE
+static inline
void util_blitter_save_blend(struct blitter_context *blitter,
void *state)
{
blitter->saved_blend_state = state;
}
-static INLINE
+static inline
void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
void *state)
{
blitter->saved_dsa_state = state;
}
-static INLINE
+static inline
void util_blitter_save_vertex_elements(struct blitter_context *blitter,
void *state)
{
blitter->saved_velem_state = state;
}
-static INLINE
+static inline
void util_blitter_save_stencil_ref(struct blitter_context *blitter,
const struct pipe_stencil_ref *state)
{
blitter->saved_stencil_ref = *state;
}
-static INLINE
+static inline
void util_blitter_save_rasterizer(struct blitter_context *blitter,
void *state)
{
blitter->saved_rs_state = state;
}
-static INLINE
+static inline
void util_blitter_save_fragment_shader(struct blitter_context *blitter,
void *fs)
{
blitter->saved_fs = fs;
}
-static INLINE
+static inline
void util_blitter_save_vertex_shader(struct blitter_context *blitter,
void *vs)
{
blitter->saved_vs = vs;
}
-static INLINE
+static inline
void util_blitter_save_geometry_shader(struct blitter_context *blitter,
void *gs)
{
blitter->saved_gs = gs;
}
-static INLINE void
+static inline void
util_blitter_save_tessctrl_shader(struct blitter_context *blitter,
void *sh)
{
blitter->saved_tcs = sh;
}
-static INLINE void
+static inline void
util_blitter_save_tesseval_shader(struct blitter_context *blitter,
void *sh)
{
blitter->saved_tes = sh;
}
-static INLINE
+static inline
void util_blitter_save_framebuffer(struct blitter_context *blitter,
const struct pipe_framebuffer_state *state)
{
@@ -449,21 +449,21 @@ void util_blitter_save_framebuffer(struct blitter_context *blitter,
util_copy_framebuffer_state(&blitter->saved_fb_state, state);
}
-static INLINE
+static inline
void util_blitter_save_viewport(struct blitter_context *blitter,
struct pipe_viewport_state *state)
{
blitter->saved_viewport = *state;
}
-static INLINE
+static inline
void util_blitter_save_scissor(struct blitter_context *blitter,
struct pipe_scissor_state *state)
{
blitter->saved_scissor = *state;
}
-static INLINE
+static inline
void util_blitter_save_fragment_sampler_states(
struct blitter_context *blitter,
unsigned num_sampler_states,
@@ -476,7 +476,7 @@ void util_blitter_save_fragment_sampler_states(
num_sampler_states * sizeof(void *));
}
-static INLINE void
+static inline void
util_blitter_save_fragment_sampler_views(struct blitter_context *blitter,
unsigned num_views,
struct pipe_sampler_view **views)
@@ -490,7 +490,7 @@ util_blitter_save_fragment_sampler_views(struct blitter_context *blitter,
views[i]);
}
-static INLINE void
+static inline void
util_blitter_save_vertex_buffer_slot(struct blitter_context *blitter,
struct pipe_vertex_buffer *vertex_buffers)
{
@@ -500,7 +500,7 @@ util_blitter_save_vertex_buffer_slot(struct blitter_context *blitter,
sizeof(struct pipe_vertex_buffer));
}
-static INLINE void
+static inline void
util_blitter_save_so_targets(struct blitter_context *blitter,
unsigned num_targets,
struct pipe_stream_output_target **targets)
@@ -514,7 +514,7 @@ util_blitter_save_so_targets(struct blitter_context *blitter,
targets[i]);
}
-static INLINE void
+static inline void
util_blitter_save_sample_mask(struct blitter_context *blitter,
unsigned sample_mask)
{
@@ -522,7 +522,7 @@ util_blitter_save_sample_mask(struct blitter_context *blitter,
blitter->saved_sample_mask = sample_mask;
}
-static INLINE void
+static inline void
util_blitter_save_render_condition(struct blitter_context *blitter,
struct pipe_query *query,
boolean condition,
diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h
index 520a3d596cb..66cf989a830 100644
--- a/src/gallium/auxiliary/util/u_box.h
+++ b/src/gallium/auxiliary/util/u_box.h
@@ -4,7 +4,7 @@
#include "pipe/p_state.h"
#include "util/u_math.h"
-static INLINE
+static inline
void u_box_1d( unsigned x,
unsigned w,
struct pipe_box *box )
@@ -17,7 +17,7 @@ void u_box_1d( unsigned x,
box->depth = 1;
}
-static INLINE
+static inline
void u_box_2d( unsigned x,
unsigned y,
unsigned w,
@@ -32,7 +32,7 @@ void u_box_2d( unsigned x,
box->depth = 1;
}
-static INLINE
+static inline
void u_box_origin_2d( unsigned w,
unsigned h,
struct pipe_box *box )
@@ -45,7 +45,7 @@ void u_box_origin_2d( unsigned w,
box->depth = 1;
}
-static INLINE
+static inline
void u_box_2d_zslice( unsigned x,
unsigned y,
unsigned z,
@@ -61,7 +61,7 @@ void u_box_2d_zslice( unsigned x,
box->depth = 1;
}
-static INLINE
+static inline
void u_box_3d( unsigned x,
unsigned y,
unsigned z,
@@ -86,7 +86,7 @@ void u_box_3d( unsigned x,
* 3 if both width and height have been reduced.
* Aliasing permitted.
*/
-static INLINE int
+static inline int
u_box_clip_2d(struct pipe_box *dst,
const struct pipe_box *box, int w, int h)
{
@@ -129,14 +129,14 @@ u_box_clip_2d(struct pipe_box *dst,
return res;
}
-static INLINE int64_t
+static inline int64_t
u_box_volume_3d(const struct pipe_box *box)
{
return (int64_t)box->width * box->height * box->depth;
}
/* Aliasing of @dst permitted. */
-static INLINE void
+static inline void
u_box_union_2d(struct pipe_box *dst,
const struct pipe_box *a, const struct pipe_box *b)
{
@@ -148,7 +148,7 @@ u_box_union_2d(struct pipe_box *dst,
}
/* Aliasing of @dst permitted. */
-static INLINE void
+static inline void
u_box_union_3d(struct pipe_box *dst,
const struct pipe_box *a, const struct pipe_box *b)
{
@@ -161,7 +161,7 @@ u_box_union_3d(struct pipe_box *dst,
dst->depth = MAX2(a->z + a->depth, b->z + b->depth) - dst->z;
}
-static INLINE boolean
+static inline boolean
u_box_test_intersection_2d(const struct pipe_box *a,
const struct pipe_box *b)
{
@@ -185,7 +185,7 @@ u_box_test_intersection_2d(const struct pipe_box *a,
return TRUE;
}
-static INLINE void
+static inline void
u_box_minify_2d(struct pipe_box *dst,
const struct pipe_box *src, unsigned l)
{
diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c
index 9395c66f2f8..da0856981eb 100644
--- a/src/gallium/auxiliary/util/u_cache.c
+++ b/src/gallium/auxiliary/util/u_cache.c
@@ -155,7 +155,7 @@ util_cache_entry_get(struct util_cache *cache,
return NULL;
}
-static INLINE void
+static inline void
util_cache_entry_destroy(struct util_cache *cache,
struct util_cache_entry *entry)
{
diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h
index af557be00bd..864d1302b4f 100644
--- a/src/gallium/auxiliary/util/u_clear.h
+++ b/src/gallium/auxiliary/util/u_clear.h
@@ -37,7 +37,7 @@
* Clear the given buffers to the specified values.
* No masking, no scissor (clear entire buffer).
*/
-static INLINE void
+static inline void
util_clear(struct pipe_context *pipe,
struct pipe_framebuffer_state *framebuffer, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil)
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 23ab46c54bc..d1f9e978682 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -179,7 +179,7 @@ static int has_cpuid(void)
* @sa cpuid.h included in gcc-4.3 onwards.
* @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
*/
-static INLINE void
+static inline void
cpuid(uint32_t ax, uint32_t *p)
{
#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
@@ -216,7 +216,7 @@ cpuid(uint32_t ax, uint32_t *p)
* @sa cpuid.h included in gcc-4.4 onwards.
* @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx
*/
-static INLINE void
+static inline void
cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
{
#if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
@@ -250,7 +250,7 @@ cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
}
-static INLINE uint64_t xgetbv(void)
+static inline uint64_t xgetbv(void)
{
#if defined(PIPE_CC_GCC)
uint32_t eax, edx;
@@ -272,7 +272,7 @@ static INLINE uint64_t xgetbv(void)
#if defined(PIPE_ARCH_X86)
-PIPE_ALIGN_STACK static INLINE boolean sse2_has_daz(void)
+PIPE_ALIGN_STACK static inline boolean sse2_has_daz(void)
{
struct {
uint32_t pad1[7];
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 2d2d049b205..b4503deb8f6 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -256,12 +256,12 @@ static boolean str_has_option(const char *str, const char *name)
return FALSE;
}
-unsigned long
+uint64_t
debug_get_flags_option(const char *name,
const struct debug_named_value *flags,
- unsigned long dfault)
+ uint64_t dfault)
{
- unsigned long result;
+ uint64_t result;
const char *str;
const struct debug_named_value *orig = flags;
unsigned namealign = 0;
@@ -276,7 +276,7 @@ debug_get_flags_option(const char *name,
namealign = MAX2(namealign, strlen(flags->name));
for (flags = orig; flags->name; ++flags)
_debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
- (int)sizeof(unsigned long)*CHAR_BIT/4, flags->value,
+ (int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
flags->desc ? " " : "", flags->desc ? flags->desc : "");
}
else {
@@ -758,7 +758,8 @@ debug_print_bind_flags(const char *msg, unsigned usage)
DEBUG_NAMED_VALUE(PIPE_BIND_CURSOR),
DEBUG_NAMED_VALUE(PIPE_BIND_CUSTOM),
DEBUG_NAMED_VALUE(PIPE_BIND_GLOBAL),
- DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_RESOURCE),
+ DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_BUFFER),
+ DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_IMAGE),
DEBUG_NAMED_VALUE(PIPE_BIND_COMPUTE_RESOURCE),
DEBUG_NAMED_VALUE(PIPE_BIND_COMMAND_ARGS_BUFFER),
DEBUG_NAMED_VALUE(PIPE_BIND_SCANOUT),
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index 3b2255244a7..926063a1918 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -58,7 +58,7 @@ extern "C" {
void _debug_vprintf(const char *format, va_list ap);
-static INLINE void
+static inline void
_debug_printf(const char *format, ...)
{
va_list ap;
@@ -78,10 +78,10 @@ _debug_printf(const char *format, ...)
* that is guaranteed to be printed in all platforms)
*/
#if !defined(PIPE_OS_HAIKU)
-static INLINE void
+static inline void
debug_printf(const char *format, ...) _util_printf_format(1,2);
-static INLINE void
+static inline void
debug_printf(const char *format, ...)
{
#ifdef DEBUG
@@ -269,7 +269,7 @@ void _debug_assert_fail(const char *expr,
struct debug_named_value
{
const char *name;
- unsigned long value;
+ uint64_t value;
const char *desc;
};
@@ -377,10 +377,10 @@ debug_get_bool_option(const char *name, boolean dfault);
long
debug_get_num_option(const char *name, long dfault);
-unsigned long
+uint64_t
debug_get_flags_option(const char *name,
const struct debug_named_value *flags,
- unsigned long dfault);
+ uint64_t dfault);
#define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \
static boolean \
diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
index df73ed83ef6..f428d22d205 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.c
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -80,6 +80,15 @@ debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr)
util_sprintf(buf, "pipe_sampler_view<%s,%s>", res, util_format_short_name(ptr->format));
}
+void
+debug_describe_image_view(char* buf, const struct pipe_image_view *ptr)
+{
+ char res[128];
+ debug_describe_resource(res, ptr->resource);
+ util_sprintf(buf, "pipe_image_view<%s,%s>", res,
+ util_format_short_name(ptr->format));
+}
+
void
debug_describe_so_target(char* buf,
const struct pipe_stream_output_target *ptr)
diff --git a/src/gallium/auxiliary/util/u_debug_describe.h b/src/gallium/auxiliary/util/u_debug_describe.h
index 4f7882b0b37..2172ecb4395 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.h
+++ b/src/gallium/auxiliary/util/u_debug_describe.h
@@ -35,12 +35,14 @@ struct pipe_reference;
struct pipe_resource;
struct pipe_surface;
struct pipe_sampler_view;
+struct pipe_image_view;
/* a 256-byte buffer is necessary and sufficient */
void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
void debug_describe_surface(char* buf, const struct pipe_surface *ptr);
void debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr);
+void debug_describe_image_view(char* buf, const struct pipe_image_view *ptr);
void debug_describe_so_target(char* buf,
const struct pipe_stream_output_target *ptr);
diff --git a/src/gallium/auxiliary/util/u_debug_memory.c b/src/gallium/auxiliary/util/u_debug_memory.c
index 747837cd148..3e7ecfa79f3 100644
--- a/src/gallium/auxiliary/util/u_debug_memory.c
+++ b/src/gallium/auxiliary/util/u_debug_memory.c
@@ -92,7 +92,7 @@ pipe_static_mutex(list_mutex);
static unsigned long last_no = 0;
-static INLINE struct debug_memory_header *
+static inline struct debug_memory_header *
header_from_data(void *data)
{
if(data)
@@ -101,7 +101,7 @@ header_from_data(void *data)
return NULL;
}
-static INLINE void *
+static inline void *
data_from_header(struct debug_memory_header *hdr)
{
if(hdr)
@@ -110,7 +110,7 @@ data_from_header(struct debug_memory_header *hdr)
return NULL;
}
-static INLINE struct debug_memory_footer *
+static inline struct debug_memory_footer *
footer_from_header(struct debug_memory_header *hdr)
{
if(hdr)
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.h b/src/gallium/auxiliary/util/u_debug_refcnt.h
index c02fba27ddf..1f9218fec9a 100644
--- a/src/gallium/auxiliary/util/u_debug_refcnt.h
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.h
@@ -42,7 +42,7 @@ extern int debug_refcnt_state;
void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change);
-static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+static inline void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
{
if (debug_refcnt_state >= 0)
debug_reference_slowpath(p, get_desc, change);
@@ -50,7 +50,7 @@ static INLINE void debug_reference(const struct pipe_reference* p, debug_referen
#else
-static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+static inline void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
{
}
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index 542493252ce..10efdd593e5 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -146,7 +146,7 @@ DBGHELP_DISPATCH(SymGetLineFromAddr64,
#undef DBGHELP_DISPATCH
-static INLINE boolean
+static inline boolean
debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size)
{
DWORD64 dwAddr = (DWORD64)(uintptr_t)addr;
@@ -227,7 +227,7 @@ debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size)
*
* To fix this, post-process the output with tools/addr2line.sh
*/
-static INLINE boolean
+static inline boolean
debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
{
char** syms = backtrace_symbols((void**)&addr, 1);
diff --git a/src/gallium/auxiliary/util/u_dirty_surfaces.h b/src/gallium/auxiliary/util/u_dirty_surfaces.h
index d31f8b9170a..ccde8a8c115 100644
--- a/src/gallium/auxiliary/util/u_dirty_surfaces.h
+++ b/src/gallium/auxiliary/util/u_dirty_surfaces.h
@@ -47,13 +47,13 @@ struct util_dirty_surface
struct list_head dirty_list;
};
-static INLINE void
+static inline void
util_dirty_surfaces_init(struct util_dirty_surfaces *ds)
{
LIST_INITHEAD(&ds->dirty_list);
}
-static INLINE void
+static inline void
util_dirty_surfaces_use_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, util_dirty_surface_flush_t flush)
{
struct list_head *p, *next;
@@ -66,7 +66,7 @@ util_dirty_surfaces_use_for_sampling(struct pipe_context *pipe, struct util_dirt
}
}
-static INLINE void
+static inline void
util_dirty_surfaces_use_levels_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, unsigned first, unsigned last, util_dirty_surface_flush_t flush)
{
struct list_head *p, *next;
@@ -82,7 +82,7 @@ util_dirty_surfaces_use_levels_for_sampling(struct pipe_context *pipe, struct ut
}
}
-static INLINE void
+static inline void
util_dirty_surfaces_use_for_sampling_with(struct pipe_context *pipe, struct util_dirty_surfaces *dss, struct pipe_sampler_view *psv, struct pipe_sampler_state *pss, util_dirty_surface_flush_t flush)
{
if(!LIST_IS_EMPTY(&dss->dirty_list))
@@ -90,26 +90,26 @@ util_dirty_surfaces_use_for_sampling_with(struct pipe_context *pipe, struct util
MIN2((unsigned)ceilf(pss->max_lod) + psv->u.tex.first_level, psv->u.tex.last_level), flush);
}
-static INLINE void
+static inline void
util_dirty_surface_init(struct util_dirty_surface *ds)
{
LIST_INITHEAD(&ds->dirty_list);
}
-static INLINE boolean
+static inline boolean
util_dirty_surface_is_dirty(struct util_dirty_surface *ds)
{
return !LIST_IS_EMPTY(&ds->dirty_list);
}
-static INLINE void
+static inline void
util_dirty_surface_set_dirty(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
{
if(LIST_IS_EMPTY(&ds->dirty_list))
LIST_ADDTAIL(&ds->dirty_list, &dss->dirty_list);
}
-static INLINE void
+static inline void
util_dirty_surface_set_clean(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
{
if(!LIST_IS_EMPTY(&ds->dirty_list))
diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h
index 9fc3e9924e1..5c0880f6ce4 100644
--- a/src/gallium/auxiliary/util/u_draw.h
+++ b/src/gallium/auxiliary/util/u_draw.h
@@ -39,7 +39,7 @@ extern "C" {
#endif
-static INLINE void
+static inline void
util_draw_init_info(struct pipe_draw_info *info)
{
memset(info, 0, sizeof(*info));
@@ -48,7 +48,7 @@ util_draw_init_info(struct pipe_draw_info *info)
}
-static INLINE void
+static inline void
util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count)
{
struct pipe_draw_info info;
@@ -63,7 +63,7 @@ util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count)
pipe->draw_vbo(pipe, &info);
}
-static INLINE void
+static inline void
util_draw_elements(struct pipe_context *pipe, int index_bias,
uint mode, uint start, uint count)
{
@@ -79,7 +79,7 @@ util_draw_elements(struct pipe_context *pipe, int index_bias,
pipe->draw_vbo(pipe, &info);
}
-static INLINE void
+static inline void
util_draw_arrays_instanced(struct pipe_context *pipe,
uint mode, uint start, uint count,
uint start_instance,
@@ -99,7 +99,7 @@ util_draw_arrays_instanced(struct pipe_context *pipe,
pipe->draw_vbo(pipe, &info);
}
-static INLINE void
+static inline void
util_draw_elements_instanced(struct pipe_context *pipe,
int index_bias,
uint mode, uint start, uint count,
@@ -120,7 +120,7 @@ util_draw_elements_instanced(struct pipe_context *pipe,
pipe->draw_vbo(pipe, &info);
}
-static INLINE void
+static inline void
util_draw_range_elements(struct pipe_context *pipe,
int index_bias,
uint min_index,
diff --git a/src/gallium/auxiliary/util/u_dual_blend.h b/src/gallium/auxiliary/util/u_dual_blend.h
index e31d43c18bd..9450800f715 100644
--- a/src/gallium/auxiliary/util/u_dual_blend.h
+++ b/src/gallium/auxiliary/util/u_dual_blend.h
@@ -3,7 +3,7 @@
#include "pipe/p_state.h"
-static INLINE boolean util_blend_factor_is_dual_src(int factor)
+static inline boolean util_blend_factor_is_dual_src(int factor)
{
return (factor == PIPE_BLENDFACTOR_SRC1_COLOR) ||
(factor == PIPE_BLENDFACTOR_SRC1_ALPHA) ||
@@ -11,7 +11,7 @@ static INLINE boolean util_blend_factor_is_dual_src(int factor)
(factor == PIPE_BLENDFACTOR_INV_SRC1_ALPHA);
}
-static INLINE boolean util_blend_state_is_dual(const struct pipe_blend_state *blend,
+static inline boolean util_blend_state_is_dual(const struct pipe_blend_state *blend,
int index)
{
if (util_blend_factor_is_dual_src(blend->rt[index].rgb_src_factor) ||
diff --git a/src/gallium/auxiliary/util/u_dump.h b/src/gallium/auxiliary/util/u_dump.h
index 58e7dfd8244..2598851152b 100644
--- a/src/gallium/auxiliary/util/u_dump.h
+++ b/src/gallium/auxiliary/util/u_dump.h
@@ -88,14 +88,16 @@ util_dump_tex_filter(unsigned value, boolean shortened);
const char *
util_dump_query_type(unsigned value, boolean shortened);
+const char *
+util_dump_prim_mode(unsigned value, boolean shortened);
+
/*
* p_state.h, through a FILE
*/
void
-util_dump_template(FILE *stream,
- const struct pipe_resource *templat);
+util_dump_resource(FILE *stream, const struct pipe_resource *state);
void
util_dump_rasterizer_state(FILE *stream,
@@ -153,10 +155,23 @@ void
util_dump_surface(FILE *stream,
const struct pipe_surface *state);
+void
+util_dump_image_view(FILE *stream, const struct pipe_image_view *state);
+
+void
+util_dump_sampler_view(FILE *stream, const struct pipe_sampler_view *state);
+
void
util_dump_transfer(FILE *stream,
const struct pipe_transfer *state);
+void
+util_dump_constant_buffer(FILE *stream,
+ const struct pipe_constant_buffer *state);
+
+void
+util_dump_index_buffer(FILE *stream, const struct pipe_index_buffer *state);
+
void
util_dump_vertex_buffer(FILE *stream,
const struct pipe_vertex_buffer *state);
@@ -165,6 +180,10 @@ void
util_dump_vertex_element(FILE *stream,
const struct pipe_vertex_element *state);
+void
+util_dump_stream_output_target(FILE *stream,
+ const struct pipe_stream_output_target *state);
+
void
util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state);
diff --git a/src/gallium/auxiliary/util/u_dump_defines.c b/src/gallium/auxiliary/util/u_dump_defines.c
index 03fd15d0c44..3ddc9554b50 100644
--- a/src/gallium/auxiliary/util/u_dump_defines.c
+++ b/src/gallium/auxiliary/util/u_dump_defines.c
@@ -392,3 +392,44 @@ util_dump_query_type_short_names[] = {
};
DEFINE_UTIL_DUMP_CONTINUOUS(query_type)
+
+
+static const char *
+util_dump_prim_mode_names[] = {
+ "PIPE_PRIM_POINTS",
+ "PIPE_PRIM_LINES",
+ "PIPE_PRIM_LINE_LOOP",
+ "PIPE_PRIM_LINE_STRIP",
+ "PIPE_PRIM_TRIANGLES",
+ "PIPE_PRIM_TRIANGLE_STRIP",
+ "PIPE_PRIM_TRIANGLE_FAN",
+ "PIPE_PRIM_QUADS",
+ "PIPE_PRIM_QUAD_STRIP",
+ "PIPE_PRIM_POLYGON",
+ "PIPE_PRIM_LINES_ADJACENCY",
+ "PIPE_PRIM_LINE_STRIP_ADJACENCY",
+ "PIPE_PRIM_TRIANGLES_ADJACENCY",
+ "PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY",
+ "PIPE_PRIM_PATCHES",
+};
+
+static const char *
+util_dump_prim_mode_short_names[] = {
+ "points",
+ "lines",
+ "line_loop",
+ "line_strip",
+ "triangles",
+ "triangle_strip",
+ "triangle_fan",
+ "quads",
+ "quad_strip",
+ "polygon",
+ "lines_adjacency",
+ "line_strip_adjacency",
+ "triangles_adjacency",
+ "triangle_strip_adjacency",
+ "patches",
+};
+
+DEFINE_UTIL_DUMP_CONTINUOUS(prim_mode)
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index 7f620b50cf0..441d16236b5 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -39,7 +39,7 @@
* Dump primitives
*/
-static INLINE void
+static inline void
util_stream_writef(FILE *stream, const char *format, ...)
{
static char buf[1024];
@@ -247,6 +247,42 @@ util_dump_enum_func(FILE *stream, unsigned value)
util_dump_enum(stream, util_dump_func(value, TRUE));
}
+static void
+util_dump_enum_prim_mode(FILE *stream, unsigned value)
+{
+ util_dump_enum(stream, util_dump_prim_mode(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_target(FILE *stream, unsigned value)
+{
+ util_dump_enum(stream, util_dump_tex_target(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_filter(FILE *stream, unsigned value)
+{
+ util_dump_enum(stream, util_dump_tex_filter(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_mipfilter(FILE *stream, unsigned value)
+{
+ util_dump_enum(stream, util_dump_tex_mipfilter(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_wrap(FILE *stream, unsigned value)
+{
+ util_dump_enum(stream, util_dump_tex_wrap(value, TRUE));
+}
+
+static void
+util_dump_enum_stencil_op(FILE *stream, unsigned value)
+{
+ util_dump_enum(stream, util_dump_stencil_op(value, TRUE));
+}
+
/*
* Public functions
@@ -254,38 +290,28 @@ util_dump_enum_func(FILE *stream, unsigned value)
void
-util_dump_template(FILE *stream, const struct pipe_resource *templat)
+util_dump_resource(FILE *stream, const struct pipe_resource *state)
{
- if(!templat) {
+ if (!state) {
util_dump_null(stream);
return;
}
util_dump_struct_begin(stream, "pipe_resource");
- util_dump_member(stream, int, templat, target);
- util_dump_member(stream, format, templat, format);
+ util_dump_member(stream, enum_tex_target, state, target);
+ util_dump_member(stream, format, state, format);
- util_dump_member_begin(stream, "width");
- util_dump_uint(stream, templat->width0);
- util_dump_member_end(stream);
+ util_dump_member(stream, uint, state, width0);
+ util_dump_member(stream, uint, state, height0);
+ util_dump_member(stream, uint, state, depth0);
+ util_dump_member(stream, uint, state, array_size);
- util_dump_member_begin(stream, "height");
- util_dump_uint(stream, templat->height0);
- util_dump_member_end(stream);
-
- util_dump_member_begin(stream, "depth");
- util_dump_uint(stream, templat->depth0);
- util_dump_member_end(stream);
-
- util_dump_member_begin(stream, "array_size");
- util_dump_uint(stream, templat->array_size);
- util_dump_member_end(stream);
-
- util_dump_member(stream, uint, templat, last_level);
- util_dump_member(stream, uint, templat, usage);
- util_dump_member(stream, uint, templat, bind);
- util_dump_member(stream, uint, templat, flags);
+ util_dump_member(stream, uint, state, last_level);
+ util_dump_member(stream, uint, state, nr_samples);
+ util_dump_member(stream, uint, state, usage);
+ util_dump_member(stream, uint, state, bind);
+ util_dump_member(stream, uint, state, flags);
util_dump_struct_end(stream);
}
@@ -319,6 +345,7 @@ util_dump_rasterizer_state(FILE *stream, const struct pipe_rasterizer_state *sta
util_dump_member(stream, uint, state, sprite_coord_enable);
util_dump_member(stream, bool, state, sprite_coord_mode);
util_dump_member(stream, bool, state, point_quad_rasterization);
+ util_dump_member(stream, bool, state, point_tri_clip);
util_dump_member(stream, bool, state, point_size_per_vertex);
util_dump_member(stream, bool, state, multisample);
util_dump_member(stream, bool, state, line_smooth);
@@ -331,6 +358,7 @@ util_dump_rasterizer_state(FILE *stream, const struct pipe_rasterizer_state *sta
util_dump_member(stream, bool, state, bottom_edge_rule);
util_dump_member(stream, bool, state, rasterizer_discard);
util_dump_member(stream, bool, state, depth_clip);
+ util_dump_member(stream, bool, state, clip_halfz);
util_dump_member(stream, uint, state, clip_plane_enable);
util_dump_member(stream, float, state, line_width);
@@ -426,7 +454,6 @@ util_dump_clip_state(FILE *stream, const struct pipe_clip_state *state)
void
util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state)
{
- char str[8192];
unsigned i;
if(!state) {
@@ -434,33 +461,35 @@ util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state)
return;
}
- tgsi_dump_str(state->tokens, 0, str, sizeof(str));
-
util_dump_struct_begin(stream, "pipe_shader_state");
util_dump_member_begin(stream, "tokens");
- util_dump_string(stream, str);
+ fprintf(stream, "\"\n");
+ tgsi_dump_to_file(state->tokens, 0, stream);
+ fprintf(stream, "\"");
util_dump_member_end(stream);
- util_dump_member_begin(stream, "stream_output");
- util_dump_struct_begin(stream, "pipe_stream_output_info");
- util_dump_member(stream, uint, &state->stream_output, num_outputs);
- util_dump_array(stream, uint, state->stream_output.stride,
- Elements(state->stream_output.stride));
- util_dump_array_begin(stream);
- for(i = 0; i < state->stream_output.num_outputs; ++i) {
- util_dump_elem_begin(stream);
- util_dump_struct_begin(stream, ""); /* anonymous */
- util_dump_member(stream, uint, &state->stream_output.output[i], register_index);
- util_dump_member(stream, uint, &state->stream_output.output[i], start_component);
- util_dump_member(stream, uint, &state->stream_output.output[i], num_components);
- util_dump_member(stream, uint, &state->stream_output.output[i], output_buffer);
+ if (state->stream_output.num_outputs) {
+ util_dump_member_begin(stream, "stream_output");
+ util_dump_struct_begin(stream, "pipe_stream_output_info");
+ util_dump_member(stream, uint, &state->stream_output, num_outputs);
+ util_dump_array(stream, uint, state->stream_output.stride,
+ Elements(state->stream_output.stride));
+ util_dump_array_begin(stream);
+ for(i = 0; i < state->stream_output.num_outputs; ++i) {
+ util_dump_elem_begin(stream);
+ util_dump_struct_begin(stream, ""); /* anonymous */
+ util_dump_member(stream, uint, &state->stream_output.output[i], register_index);
+ util_dump_member(stream, uint, &state->stream_output.output[i], start_component);
+ util_dump_member(stream, uint, &state->stream_output.output[i], num_components);
+ util_dump_member(stream, uint, &state->stream_output.output[i], output_buffer);
+ util_dump_struct_end(stream);
+ util_dump_elem_end(stream);
+ }
+ util_dump_array_end(stream);
util_dump_struct_end(stream);
- util_dump_elem_end(stream);
+ util_dump_member_end(stream);
}
- util_dump_array_end(stream);
- util_dump_struct_end(stream);
- util_dump_member_end(stream);
util_dump_struct_end(stream);
}
@@ -496,9 +525,12 @@ util_dump_depth_stencil_alpha_state(FILE *stream, const struct pipe_depth_stenci
util_dump_member(stream, bool, &state->stencil[i], enabled);
if (state->stencil[i].enabled) {
util_dump_member(stream, enum_func, &state->stencil[i], func);
- util_dump_member(stream, uint, &state->stencil[i], fail_op);
- util_dump_member(stream, uint, &state->stencil[i], zpass_op);
- util_dump_member(stream, uint, &state->stencil[i], zfail_op);
+ util_dump_member(stream, enum_stencil_op,
+ &state->stencil[i], fail_op);
+ util_dump_member(stream, enum_stencil_op,
+ &state->stencil[i], zpass_op);
+ util_dump_member(stream, enum_stencil_op,
+ &state->stencil[i], zfail_op);
util_dump_member(stream, uint, &state->stencil[i], valuemask);
util_dump_member(stream, uint, &state->stencil[i], writemask);
}
@@ -555,6 +587,8 @@ util_dump_blend_state(FILE *stream, const struct pipe_blend_state *state)
util_dump_struct_begin(stream, "pipe_blend_state");
util_dump_member(stream, bool, state, dither);
+ util_dump_member(stream, bool, state, alpha_to_coverage);
+ util_dump_member(stream, bool, state, alpha_to_one);
util_dump_member(stream, bool, state, logicop_enable);
if (state->logicop_enable) {
@@ -629,16 +663,17 @@ util_dump_sampler_state(FILE *stream, const struct pipe_sampler_state *state)
util_dump_struct_begin(stream, "pipe_sampler_state");
- util_dump_member(stream, uint, state, wrap_s);
- util_dump_member(stream, uint, state, wrap_t);
- util_dump_member(stream, uint, state, wrap_r);
- util_dump_member(stream, uint, state, min_img_filter);
- util_dump_member(stream, uint, state, min_mip_filter);
- util_dump_member(stream, uint, state, mag_img_filter);
+ util_dump_member(stream, enum_tex_wrap, state, wrap_s);
+ util_dump_member(stream, enum_tex_wrap, state, wrap_t);
+ util_dump_member(stream, enum_tex_wrap, state, wrap_r);
+ util_dump_member(stream, enum_tex_filter, state, min_img_filter);
+ util_dump_member(stream, enum_tex_mipfilter, state, min_mip_filter);
+ util_dump_member(stream, enum_tex_filter, state, mag_img_filter);
util_dump_member(stream, uint, state, compare_mode);
util_dump_member(stream, enum_func, state, compare_func);
util_dump_member(stream, bool, state, normalized_coords);
util_dump_member(stream, uint, state, max_anisotropy);
+ util_dump_member(stream, bool, state, seamless_cube_map);
util_dump_member(stream, float, state, lod_bias);
util_dump_member(stream, float, state, min_lod);
util_dump_member(stream, float, state, max_lod);
@@ -671,6 +706,67 @@ util_dump_surface(FILE *stream, const struct pipe_surface *state)
}
+void
+util_dump_image_view(FILE *stream, const struct pipe_image_view *state)
+{
+ if (!state) {
+ util_dump_null(stream);
+ return;
+ }
+
+ util_dump_struct_begin(stream, "pipe_image_view");
+
+ util_dump_member(stream, ptr, state, resource);
+ util_dump_member(stream, format, state, format);
+
+ if (state->resource->target == PIPE_BUFFER) {
+ util_dump_member(stream, uint, state, u.buf.first_element);
+ util_dump_member(stream, uint, state, u.buf.last_element);
+ }
+ else {
+ util_dump_member(stream, uint, state, u.tex.first_layer);
+ util_dump_member(stream, uint, state, u.tex.last_layer);
+ util_dump_member(stream, uint, state, u.tex.level);
+ }
+
+ util_dump_struct_end(stream);
+}
+
+
+void
+util_dump_sampler_view(FILE *stream, const struct pipe_sampler_view *state)
+{
+ if (!state) {
+ util_dump_null(stream);
+ return;
+ }
+
+ util_dump_struct_begin(stream, "pipe_sampler_view");
+
+ util_dump_member(stream, enum_tex_target, state, target);
+ util_dump_member(stream, format, state, format);
+ util_dump_member(stream, ptr, state, texture);
+
+ if (state->target == PIPE_BUFFER) {
+ util_dump_member(stream, uint, state, u.buf.first_element);
+ util_dump_member(stream, uint, state, u.buf.last_element);
+ }
+ else {
+ util_dump_member(stream, uint, state, u.tex.first_layer);
+ util_dump_member(stream, uint, state, u.tex.last_layer);
+ util_dump_member(stream, uint, state, u.tex.last_level);
+ util_dump_member(stream, uint, state, u.tex.last_level);
+ }
+
+ util_dump_member(stream, uint, state, swizzle_r);
+ util_dump_member(stream, uint, state, swizzle_g);
+ util_dump_member(stream, uint, state, swizzle_b);
+ util_dump_member(stream, uint, state, swizzle_a);
+
+ util_dump_struct_end(stream);
+}
+
+
void
util_dump_transfer(FILE *stream, const struct pipe_transfer *state)
{
@@ -694,6 +790,45 @@ util_dump_transfer(FILE *stream, const struct pipe_transfer *state)
}
+void
+util_dump_constant_buffer(FILE *stream,
+ const struct pipe_constant_buffer *state)
+{
+ if (!state) {
+ util_dump_null(stream);
+ return;
+ }
+
+ util_dump_struct_begin(stream, "pipe_constant_buffer");
+
+ util_dump_member(stream, ptr, state, buffer);
+ util_dump_member(stream, uint, state, buffer_offset);
+ util_dump_member(stream, uint, state, buffer_size);
+ util_dump_member(stream, ptr, state, user_buffer);
+
+ util_dump_struct_end(stream);
+}
+
+
+void
+util_dump_index_buffer(FILE *stream, const struct pipe_index_buffer *state)
+{
+ if (!state) {
+ util_dump_null(stream);
+ return;
+ }
+
+ util_dump_struct_begin(stream, "pipe_index_buffer");
+
+ util_dump_member(stream, uint, state, index_size);
+ util_dump_member(stream, uint, state, offset);
+ util_dump_member(stream, ptr, state, buffer);
+ util_dump_member(stream, ptr, state, user_buffer);
+
+ util_dump_struct_end(stream);
+}
+
+
void
util_dump_vertex_buffer(FILE *stream, const struct pipe_vertex_buffer *state)
{
@@ -707,6 +842,7 @@ util_dump_vertex_buffer(FILE *stream, const struct pipe_vertex_buffer *state)
util_dump_member(stream, uint, state, stride);
util_dump_member(stream, uint, state, buffer_offset);
util_dump_member(stream, ptr, state, buffer);
+ util_dump_member(stream, ptr, state, user_buffer);
util_dump_struct_end(stream);
}
@@ -731,6 +867,25 @@ util_dump_vertex_element(FILE *stream, const struct pipe_vertex_element *state)
}
+void
+util_dump_stream_output_target(FILE *stream,
+ const struct pipe_stream_output_target *state)
+{
+ if (!state) {
+ util_dump_null(stream);
+ return;
+ }
+
+ util_dump_struct_begin(stream, "pipe_stream_output_target");
+
+ util_dump_member(stream, ptr, state, buffer);
+ util_dump_member(stream, uint, state, buffer_offset);
+ util_dump_member(stream, uint, state, buffer_size);
+
+ util_dump_struct_end(stream);
+}
+
+
void
util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state)
{
@@ -743,7 +898,7 @@ util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state)
util_dump_member(stream, bool, state, indexed);
- util_dump_member(stream, uint, state, mode);
+ util_dump_member(stream, enum_prim_mode, state, mode);
util_dump_member(stream, uint, state, start);
util_dump_member(stream, uint, state, count);
@@ -830,12 +985,14 @@ void util_dump_blit_info(FILE *stream, const struct pipe_blit_info *info)
util_dump_member_begin(stream, "mask");
util_dump_string(stream, mask);
util_dump_member_end(stream);
- util_dump_member(stream, uint, info, filter);
+ util_dump_member(stream, enum_tex_filter, info, filter);
util_dump_member(stream, bool, info, scissor_enable);
util_dump_member_begin(stream, "scissor");
util_dump_scissor_state(stream, &info->scissor);
util_dump_member_end(stream);
+ util_dump_member(stream, bool, info, render_condition_enable);
+
util_dump_struct_end(stream);
}
diff --git a/src/gallium/auxiliary/util/u_dynarray.h b/src/gallium/auxiliary/util/u_dynarray.h
index 980cadf22d1..7b7a093d824 100644
--- a/src/gallium/auxiliary/util/u_dynarray.h
+++ b/src/gallium/auxiliary/util/u_dynarray.h
@@ -43,13 +43,13 @@ struct util_dynarray
unsigned capacity;
};
-static INLINE void
+static inline void
util_dynarray_init(struct util_dynarray *buf)
{
memset(buf, 0, sizeof(*buf));
}
-static INLINE void
+static inline void
util_dynarray_fini(struct util_dynarray *buf)
{
if(buf->data)
@@ -60,7 +60,7 @@ util_dynarray_fini(struct util_dynarray *buf)
}
/* use util_dynarray_trim to reduce the allocated storage */
-static INLINE void *
+static inline void *
util_dynarray_resize(struct util_dynarray *buf, unsigned newsize)
{
char *p;
@@ -78,13 +78,13 @@ util_dynarray_resize(struct util_dynarray *buf, unsigned newsize)
return p;
}
-static INLINE void *
+static inline void *
util_dynarray_grow(struct util_dynarray *buf, int diff)
{
return util_dynarray_resize(buf, buf->size + diff);
}
-static INLINE void
+static inline void
util_dynarray_trim(struct util_dynarray *buf)
{
if (buf->size != buf->capacity) {
diff --git a/src/gallium/auxiliary/util/u_fifo.h b/src/gallium/auxiliary/util/u_fifo.h
index 9e007de1ada..a7aad6179d9 100644
--- a/src/gallium/auxiliary/util/u_fifo.h
+++ b/src/gallium/auxiliary/util/u_fifo.h
@@ -36,7 +36,7 @@ struct util_fifo
size_t size;
};
-static INLINE struct util_fifo *
+static inline struct util_fifo *
u_fifo_create(size_t size)
{
struct util_fifo *fifo;
@@ -50,7 +50,7 @@ u_fifo_create(size_t size)
return fifo;
}
-static INLINE boolean
+static inline boolean
u_fifo_add(struct util_fifo *fifo, void *ptr)
{
void **array = (void**)&fifo[1];
@@ -67,7 +67,7 @@ u_fifo_add(struct util_fifo *fifo, void *ptr)
return TRUE;
}
-static INLINE boolean
+static inline boolean
u_fifo_pop(struct util_fifo *fifo, void **ptr)
{
void **array = (void**)&fifo[1];
@@ -85,7 +85,7 @@ u_fifo_pop(struct util_fifo *fifo, void **ptr)
return TRUE;
}
-static INLINE void
+static inline void
u_fifo_destroy(struct util_fifo *fifo)
{
FREE(fifo);
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 621574c9673..42b39ff04fd 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -425,7 +425,7 @@ util_format_description(enum pipe_format format);
* Format query functions.
*/
-static INLINE const char *
+static inline const char *
util_format_name(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -438,7 +438,7 @@ util_format_name(enum pipe_format format)
return desc->name;
}
-static INLINE const char *
+static inline const char *
util_format_short_name(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -454,7 +454,7 @@ util_format_short_name(enum pipe_format format)
/**
* Whether this format is plain, see UTIL_FORMAT_LAYOUT_PLAIN for more info.
*/
-static INLINE boolean
+static inline boolean
util_format_is_plain(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -466,7 +466,7 @@ util_format_is_plain(enum pipe_format format)
return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ? TRUE : FALSE;
}
-static INLINE boolean
+static inline boolean
util_format_is_compressed(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -488,7 +488,7 @@ util_format_is_compressed(enum pipe_format format)
}
}
-static INLINE boolean
+static inline boolean
util_format_is_s3tc(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -501,28 +501,28 @@ util_format_is_s3tc(enum pipe_format format)
return desc->layout == UTIL_FORMAT_LAYOUT_S3TC ? TRUE : FALSE;
}
-static INLINE boolean
+static inline boolean
util_format_is_srgb(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
return desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB;
}
-static INLINE boolean
+static inline boolean
util_format_has_depth(const struct util_format_description *desc)
{
return desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
desc->swizzle[0] != UTIL_FORMAT_SWIZZLE_NONE;
}
-static INLINE boolean
+static inline boolean
util_format_has_stencil(const struct util_format_description *desc)
{
return desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE;
}
-static INLINE boolean
+static inline boolean
util_format_is_depth_or_stencil(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -536,7 +536,7 @@ util_format_is_depth_or_stencil(enum pipe_format format)
util_format_has_stencil(desc);
}
-static INLINE boolean
+static inline boolean
util_format_is_depth_and_stencil(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -554,7 +554,7 @@ util_format_is_depth_and_stencil(enum pipe_format format)
/**
* Calculates the depth format type based upon the incoming format description.
*/
-static INLINE unsigned
+static inline unsigned
util_get_depth_format_type(const struct util_format_description *desc)
{
unsigned depth_channel = desc->swizzle[0];
@@ -581,7 +581,7 @@ util_get_depth_format_mrd(const struct util_format_description *desc);
* Return whether this is an RGBA, Z, S, or combined ZS format.
* Useful for initializing pipe_blit_info::mask.
*/
-static INLINE unsigned
+static inline unsigned
util_format_get_mask(enum pipe_format format)
{
const struct util_format_description *desc =
@@ -611,7 +611,7 @@ util_format_get_mask(enum pipe_format format)
*
* That is, the channels whose values are preserved.
*/
-static INLINE unsigned
+static inline unsigned
util_format_colormask(const struct util_format_description *desc)
{
unsigned colormask;
@@ -643,7 +643,7 @@ util_format_colormask(const struct util_format_description *desc)
* @param desc a format description to check colormask with
* @param colormask a bit mask for channels, matches format of PIPE_MASK_RGBA
*/
-static INLINE boolean
+static inline boolean
util_format_colormask_full(const struct util_format_description *desc, unsigned colormask)
{
return (~colormask & util_format_colormask(desc)) == 0;
@@ -709,7 +709,7 @@ util_format_is_supported(enum pipe_format format, unsigned bind);
*
* PIPE_FORMAT_?8?8?8?8_UNORM
*/
-static INLINE boolean
+static inline boolean
util_format_is_rgba8_variant(const struct util_format_description *desc)
{
unsigned chan;
@@ -737,7 +737,7 @@ util_format_is_rgba8_variant(const struct util_format_description *desc)
/**
* Return total bits needed for the pixel format per block.
*/
-static INLINE uint
+static inline uint
util_format_get_blocksizebits(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -753,7 +753,7 @@ util_format_get_blocksizebits(enum pipe_format format)
/**
* Return bytes per block (not pixel) for the given format.
*/
-static INLINE uint
+static inline uint
util_format_get_blocksize(enum pipe_format format)
{
uint bits = util_format_get_blocksizebits(format);
@@ -768,7 +768,7 @@ util_format_get_blocksize(enum pipe_format format)
return bytes;
}
-static INLINE uint
+static inline uint
util_format_get_blockwidth(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -781,7 +781,7 @@ util_format_get_blockwidth(enum pipe_format format)
return desc->block.width;
}
-static INLINE uint
+static inline uint
util_format_get_blockheight(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -794,7 +794,7 @@ util_format_get_blockheight(enum pipe_format format)
return desc->block.height;
}
-static INLINE unsigned
+static inline unsigned
util_format_get_nblocksx(enum pipe_format format,
unsigned x)
{
@@ -802,7 +802,7 @@ util_format_get_nblocksx(enum pipe_format format,
return (x + blockwidth - 1) / blockwidth;
}
-static INLINE unsigned
+static inline unsigned
util_format_get_nblocksy(enum pipe_format format,
unsigned y)
{
@@ -810,7 +810,7 @@ util_format_get_nblocksy(enum pipe_format format,
return (y + blockheight - 1) / blockheight;
}
-static INLINE unsigned
+static inline unsigned
util_format_get_nblocks(enum pipe_format format,
unsigned width,
unsigned height)
@@ -818,14 +818,14 @@ util_format_get_nblocks(enum pipe_format format,
return util_format_get_nblocksx(format, width) * util_format_get_nblocksy(format, height);
}
-static INLINE size_t
+static inline size_t
util_format_get_stride(enum pipe_format format,
unsigned width)
{
return util_format_get_nblocksx(format, width) * util_format_get_blocksize(format);
}
-static INLINE size_t
+static inline size_t
util_format_get_2d_size(enum pipe_format format,
size_t stride,
unsigned height)
@@ -833,7 +833,7 @@ util_format_get_2d_size(enum pipe_format format,
return util_format_get_nblocksy(format, height) * stride;
}
-static INLINE uint
+static inline uint
util_format_get_component_bits(enum pipe_format format,
enum util_format_colorspace colorspace,
uint component)
@@ -880,7 +880,7 @@ util_format_get_component_bits(enum pipe_format format,
* Given a linear RGB colorspace format, return the corresponding SRGB
* format, or PIPE_FORMAT_NONE if none.
*/
-static INLINE enum pipe_format
+static inline enum pipe_format
util_format_srgb(enum pipe_format format)
{
if (util_format_is_srgb(format))
@@ -930,7 +930,7 @@ util_format_srgb(enum pipe_format format)
* Given an sRGB format, return the corresponding linear colorspace format.
* For non sRGB formats, return the format unchanged.
*/
-static INLINE enum pipe_format
+static inline enum pipe_format
util_format_linear(enum pipe_format format)
{
switch (format) {
@@ -977,7 +977,7 @@ util_format_linear(enum pipe_format format)
* Given a depth-stencil format, return the corresponding stencil-only format.
* For stencil-only formats, return the format unchanged.
*/
-static INLINE enum pipe_format
+static inline enum pipe_format
util_format_stencil_only(enum pipe_format format)
{
switch (format) {
@@ -1006,7 +1006,7 @@ util_format_stencil_only(enum pipe_format format)
* Converts PIPE_FORMAT_*I* to PIPE_FORMAT_*R*.
* This is identity for non-intensity formats.
*/
-static INLINE enum pipe_format
+static inline enum pipe_format
util_format_intensity_to_red(enum pipe_format format)
{
switch (format) {
@@ -1044,7 +1044,7 @@ util_format_intensity_to_red(enum pipe_format format)
* Converts PIPE_FORMAT_*L* to PIPE_FORMAT_*R*.
* This is identity for non-luminance formats.
*/
-static INLINE enum pipe_format
+static inline enum pipe_format
util_format_luminance_to_red(enum pipe_format format)
{
switch (format) {
@@ -1122,7 +1122,7 @@ util_format_luminance_to_red(enum pipe_format format)
* Return the number of components stored.
* Formats with block size != 1x1 will always have 1 component (the block).
*/
-static INLINE unsigned
+static inline unsigned
util_format_get_nr_components(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
@@ -1133,7 +1133,7 @@ util_format_get_nr_components(enum pipe_format format)
* Return the index of the first non-void channel
* -1 if no non-void channels
*/
-static INLINE int
+static inline int
util_format_get_first_non_void_channel(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index d5138cc0577..fb42de723c4 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -616,7 +616,7 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix):
name = format.short_name()
- print 'static INLINE void'
+ print 'static inline void'
print 'util_format_%s_unpack_%s(%s *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, dst_suffix, dst_native_type)
print '{'
@@ -645,7 +645,7 @@ def generate_format_pack(format, src_channel, src_native_type, src_suffix):
name = format.short_name()
- print 'static INLINE void'
+ print 'static inline void'
print 'util_format_%s_pack_%s(uint8_t *dst_row, unsigned dst_stride, const %s *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, src_suffix, src_native_type)
print '{'
@@ -674,7 +674,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix):
name = format.short_name()
- print 'static INLINE void'
+ print 'static inline void'
print 'util_format_%s_fetch_%s(%s *dst, const uint8_t *src, unsigned i, unsigned j)' % (name, dst_suffix, dst_native_type)
print '{'
diff --git a/src/gallium/auxiliary/util/u_format_r11g11b10f.h b/src/gallium/auxiliary/util/u_format_r11g11b10f.h
index 57516c39c6e..218822b16e6 100644
--- a/src/gallium/auxiliary/util/u_format_r11g11b10f.h
+++ b/src/gallium/auxiliary/util/u_format_r11g11b10f.h
@@ -45,7 +45,7 @@
#define F32_INFINITY 0x7f800000
-static INLINE unsigned f32_to_uf11(float val)
+static inline unsigned f32_to_uf11(float val)
{
union {
float f;
@@ -94,7 +94,7 @@ static INLINE unsigned f32_to_uf11(float val)
return uf11;
}
-static INLINE float uf11_to_f32(uint16_t val)
+static inline float uf11_to_f32(uint16_t val)
{
union {
float f;
@@ -131,7 +131,7 @@ static INLINE float uf11_to_f32(uint16_t val)
return f32.f;
}
-static INLINE unsigned f32_to_uf10(float val)
+static inline unsigned f32_to_uf10(float val)
{
union {
float f;
@@ -180,7 +180,7 @@ static INLINE unsigned f32_to_uf10(float val)
return uf10;
}
-static INLINE float uf10_to_f32(uint16_t val)
+static inline float uf10_to_f32(uint16_t val)
{
union {
float f;
@@ -217,14 +217,14 @@ static INLINE float uf10_to_f32(uint16_t val)
return f32.f;
}
-static INLINE unsigned float3_to_r11g11b10f(const float rgb[3])
+static inline unsigned float3_to_r11g11b10f(const float rgb[3])
{
return ( f32_to_uf11(rgb[0]) & 0x7ff) |
((f32_to_uf11(rgb[1]) & 0x7ff) << 11) |
((f32_to_uf10(rgb[2]) & 0x3ff) << 22);
}
-static INLINE void r11g11b10f_to_float3(unsigned rgb, float retval[3])
+static inline void r11g11b10f_to_float3(unsigned rgb, float retval[3])
{
retval[0] = uf11_to_f32( rgb & 0x7ff);
retval[1] = uf11_to_f32((rgb >> 11) & 0x7ff);
diff --git a/src/gallium/auxiliary/util/u_format_rgb9e5.h b/src/gallium/auxiliary/util/u_format_rgb9e5.h
index c2a3f6f3e9d..59fc291e917 100644
--- a/src/gallium/auxiliary/util/u_format_rgb9e5.h
+++ b/src/gallium/auxiliary/util/u_format_rgb9e5.h
@@ -26,9 +26,10 @@
#ifndef RGB9E5_H
#define RGB9E5_H
-#include
#include
+#include "c99_math.h"
+
#define RGB9E5_EXPONENT_BITS 5
#define RGB9E5_MANTISSA_BITS 9
#define RGB9E5_EXP_BIAS 15
@@ -73,9 +74,9 @@ typedef union {
} field;
} rgb9e5;
-static INLINE float rgb9e5_ClampRange(float x)
+static inline float rgb9e5_ClampRange(float x)
{
- if (x > 0.0) {
+ if (x > 0.0f) {
if (x >= MAX_RGB9E5) {
return MAX_RGB9E5;
} else {
@@ -90,7 +91,7 @@ static INLINE float rgb9e5_ClampRange(float x)
/* Ok, FloorLog2 is not correct for the denorm and zero values, but we
are going to do a max of this value with the minimum rgb9e5 exponent
that will hide these problem cases. */
-static INLINE int rgb9e5_FloorLog2(float x)
+static inline int rgb9e5_FloorLog2(float x)
{
float754 f;
@@ -98,7 +99,7 @@ static INLINE int rgb9e5_FloorLog2(float x)
return (f.field.biasedexponent - 127);
}
-static INLINE unsigned float3_to_rgb9e5(const float rgb[3])
+static inline unsigned float3_to_rgb9e5(const float rgb[3])
{
rgb9e5 retval;
float maxrgb;
@@ -115,8 +116,8 @@ static INLINE unsigned float3_to_rgb9e5(const float rgb[3])
exp_shared = MAX2(-RGB9E5_EXP_BIAS-1, rgb9e5_FloorLog2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
assert(exp_shared >= 0);
- /* This pow function could be replaced by a table. */
- denom = pow(2, exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);
+ /* This exp2 function could be replaced by a table. */
+ denom = exp2(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);
maxm = (int) floor(maxrgb / denom + 0.5);
if (maxm == MAX_RGB9E5_MANTISSA+1) {
@@ -146,7 +147,7 @@ static INLINE unsigned float3_to_rgb9e5(const float rgb[3])
return retval.raw;
}
-static INLINE void rgb9e5_to_float3(unsigned rgb, float retval[3])
+static inline void rgb9e5_to_float3(unsigned rgb, float retval[3])
{
rgb9e5 v;
int exponent;
@@ -154,7 +155,7 @@ static INLINE void rgb9e5_to_float3(unsigned rgb, float retval[3])
v.raw = rgb;
exponent = v.field.biasedexponent - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS;
- scale = (float) pow(2, exponent);
+ scale = exp2f(exponent);
retval[0] = v.field.r * scale;
retval[1] = v.field.g * scale;
diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c
index 7e05989e6a1..cd3e165d3f0 100644
--- a/src/gallium/auxiliary/util/u_format_s3tc.c
+++ b/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -235,7 +235,7 @@ util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned
* Block decompression.
*/
-static INLINE void
+static inline void
util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
const uint8_t *src_row, unsigned src_stride,
unsigned width, unsigned height,
@@ -312,7 +312,7 @@ util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
16, FALSE);
}
-static INLINE void
+static inline void
util_format_dxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
const uint8_t *src_row, unsigned src_stride,
unsigned width, unsigned height,
@@ -400,7 +400,7 @@ util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
* Block compression.
*/
-static INLINE void
+static inline void
util_format_dxtn_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
const uint8_t *src, unsigned src_stride,
unsigned width, unsigned height,
@@ -478,7 +478,7 @@ util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
16, FALSE);
}
-static INLINE void
+static inline void
util_format_dxtn_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
const float *src, unsigned src_stride,
unsigned width, unsigned height,
diff --git a/src/gallium/auxiliary/util/u_format_yuv.h b/src/gallium/auxiliary/util/u_format_yuv.h
index 4ec39812e47..41524d63f3a 100644
--- a/src/gallium/auxiliary/util/u_format_yuv.h
+++ b/src/gallium/auxiliary/util/u_format_yuv.h
@@ -54,7 +54,7 @@
* precision in the coefficients.
*/
-static INLINE void
+static inline void
util_format_rgb_float_to_yuv(float r, float g, float b,
uint8_t *y, uint8_t *u, uint8_t *v)
{
@@ -74,7 +74,7 @@ util_format_rgb_float_to_yuv(float r, float g, float b,
}
-static INLINE void
+static inline void
util_format_yuv_to_rgb_float(uint8_t y, uint8_t u, uint8_t v,
float *r, float *g, float *b)
{
@@ -92,7 +92,7 @@ util_format_yuv_to_rgb_float(uint8_t y, uint8_t u, uint8_t v,
}
-static INLINE void
+static inline void
util_format_rgb_8unorm_to_yuv(uint8_t r, uint8_t g, uint8_t b,
uint8_t *y, uint8_t *u, uint8_t *v)
{
@@ -102,7 +102,7 @@ util_format_rgb_8unorm_to_yuv(uint8_t r, uint8_t g, uint8_t b,
}
-static INLINE void
+static inline void
util_format_yuv_to_rgb_8unorm(uint8_t y, uint8_t u, uint8_t v,
uint8_t *r, uint8_t *g, uint8_t *b)
{
diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c
index f1ed32f1d5c..69f2f2971f7 100644
--- a/src/gallium/auxiliary/util/u_format_zs.c
+++ b/src/gallium/auxiliary/util/u_format_zs.c
@@ -35,28 +35,28 @@
* z32_unorm conversion functions
*/
-static INLINE uint16_t
+static inline uint16_t
z32_unorm_to_z16_unorm(uint32_t z)
{
/* z * 0xffff / 0xffffffff */
return z >> 16;
}
-static INLINE uint32_t
+static inline uint32_t
z16_unorm_to_z32_unorm(uint16_t z)
{
/* z * 0xffffffff / 0xffff */
return (z << 16) | z;
}
-static INLINE uint32_t
+static inline uint32_t
z32_unorm_to_z24_unorm(uint32_t z)
{
/* z * 0xffffff / 0xffffffff */
return z >> 8;
}
-static INLINE uint32_t
+static inline uint32_t
z24_unorm_to_z32_unorm(uint32_t z)
{
/* z * 0xffffffff / 0xffffff */
@@ -68,42 +68,42 @@ z24_unorm_to_z32_unorm(uint32_t z)
* z32_float conversion functions
*/
-static INLINE uint16_t
+static inline uint16_t
z32_float_to_z16_unorm(float z)
{
const float scale = 0xffff;
return (uint16_t)(z * scale + 0.5f);
}
-static INLINE float
+static inline float
z16_unorm_to_z32_float(uint16_t z)
{
const float scale = 1.0 / 0xffff;
return (float)(z * scale);
}
-static INLINE uint32_t
+static inline uint32_t
z32_float_to_z24_unorm(float z)
{
const double scale = 0xffffff;
return (uint32_t)(z * scale) & 0xffffff;
}
-static INLINE float
+static inline float
z24_unorm_to_z32_float(uint32_t z)
{
const double scale = 1.0 / 0xffffff;
return (float)(z * scale);
}
-static INLINE uint32_t
+static inline uint32_t
z32_float_to_z32_unorm(float z)
{
const double scale = 0xffffffff;
return (uint32_t)(z * scale);
}
-static INLINE float
+static inline float
z32_unorm_to_z32_float(uint32_t z)
{
const double scale = 1.0 / 0xffffffff;
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
index d340b9a7aef..d28fae3c77d 100644
--- a/src/gallium/auxiliary/util/u_half.h
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -43,7 +43,7 @@ extern "C" {
* https://gist.github.com/2144712
*/
-static INLINE uint16_t
+static inline uint16_t
util_float_to_half(float f)
{
uint32_t sign_mask = 0x80000000;
@@ -96,7 +96,7 @@ util_float_to_half(float f)
return f16;
}
-static INLINE float
+static inline float
util_half_to_float(uint16_t f16)
{
union fi infnan;
diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c
index 85302f1e194..42c4e44b644 100644
--- a/src/gallium/auxiliary/util/u_handle_table.c
+++ b/src/gallium/auxiliary/util/u_handle_table.c
@@ -96,7 +96,7 @@ handle_table_set_destroy(struct handle_table *ht,
/**
* Resize the table if necessary
*/
-static INLINE int
+static inline int
handle_table_resize(struct handle_table *ht,
unsigned minimum_size)
{
@@ -126,7 +126,7 @@ handle_table_resize(struct handle_table *ht,
}
-static INLINE void
+static inline void
handle_table_clear(struct handle_table *ht,
unsigned index)
{
diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c
index 06c8b5c91a5..a505fbc4d83 100644
--- a/src/gallium/auxiliary/util/u_hash_table.c
+++ b/src/gallium/auxiliary/util/u_hash_table.c
@@ -68,7 +68,7 @@ struct util_hash_table_item
};
-static INLINE struct util_hash_table_item *
+static inline struct util_hash_table_item *
util_hash_table_item(struct cso_hash_iter iter)
{
return (struct util_hash_table_item *)cso_hash_iter_data(iter);
@@ -98,7 +98,7 @@ util_hash_table_create(unsigned (*hash)(void *key),
}
-static INLINE struct cso_hash_iter
+static inline struct cso_hash_iter
util_hash_table_find_iter(struct util_hash_table *ht,
void *key,
unsigned key_hash)
@@ -118,7 +118,7 @@ util_hash_table_find_iter(struct util_hash_table *ht,
}
-static INLINE struct util_hash_table_item *
+static inline struct util_hash_table_item *
util_hash_table_find_item(struct util_hash_table *ht,
void *key,
unsigned key_hash)
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 95401621ec3..bb99a02ce49 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -51,13 +51,13 @@ extern "C" {
*/
-static INLINE void
+static inline void
pipe_reference_init(struct pipe_reference *reference, unsigned count)
{
p_atomic_set(&reference->count, count);
}
-static INLINE boolean
+static inline boolean
pipe_is_referenced(struct pipe_reference *reference)
{
return p_atomic_read(&reference->count) != 0;
@@ -69,7 +69,7 @@ pipe_is_referenced(struct pipe_reference *reference)
* Both 'ptr' and 'reference' may be NULL.
* \return TRUE if the object's refcount hits zero and should be destroyed.
*/
-static INLINE boolean
+static inline boolean
pipe_reference_described(struct pipe_reference *ptr,
struct pipe_reference *reference,
debug_reference_descriptor get_desc)
@@ -96,14 +96,14 @@ pipe_reference_described(struct pipe_reference *ptr,
return destroy;
}
-static INLINE boolean
+static inline boolean
pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
{
return pipe_reference_described(ptr, reference,
(debug_reference_descriptor)debug_describe_reference);
}
-static INLINE void
+static inline void
pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
{
struct pipe_surface *old_surf = *ptr;
@@ -120,7 +120,7 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
* of using a deleted context's surface_destroy() method when freeing a surface
* that's shared by multiple contexts.
*/
-static INLINE void
+static inline void
pipe_surface_release(struct pipe_context *pipe, struct pipe_surface **ptr)
{
if (pipe_reference_described(&(*ptr)->reference, NULL,
@@ -130,7 +130,7 @@ pipe_surface_release(struct pipe_context *pipe, struct pipe_surface **ptr)
}
-static INLINE void
+static inline void
pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
{
struct pipe_resource *old_tex = *ptr;
@@ -141,7 +141,7 @@ pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
*ptr = tex;
}
-static INLINE void
+static inline void
pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view)
{
struct pipe_sampler_view *old_view = *ptr;
@@ -158,7 +158,7 @@ pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_
* work-around for fixing a dangling context pointer problem when textures
* are shared by multiple contexts. XXX fix this someday.
*/
-static INLINE void
+static inline void
pipe_sampler_view_release(struct pipe_context *ctx,
struct pipe_sampler_view **ptr)
{
@@ -173,8 +173,18 @@ pipe_sampler_view_release(struct pipe_context *ctx,
*ptr = NULL;
}
+static inline void
+pipe_image_view_reference(struct pipe_image_view **ptr, struct pipe_image_view *view)
+{
+ struct pipe_image_view *old_view = *ptr;
-static INLINE void
+ if (pipe_reference_described(&(*ptr)->reference, &view->reference,
+ (debug_reference_descriptor)debug_describe_image_view))
+ old_view->context->image_view_destroy(old_view->context, old_view);
+ *ptr = view;
+}
+
+static inline void
pipe_so_target_reference(struct pipe_stream_output_target **ptr,
struct pipe_stream_output_target *target)
{
@@ -186,7 +196,7 @@ pipe_so_target_reference(struct pipe_stream_output_target **ptr,
*ptr = target;
}
-static INLINE void
+static inline void
pipe_surface_reset(struct pipe_context *ctx, struct pipe_surface* ps,
struct pipe_resource *pt, unsigned level, unsigned layer)
{
@@ -199,7 +209,7 @@ pipe_surface_reset(struct pipe_context *ctx, struct pipe_surface* ps,
ps->context = ctx;
}
-static INLINE void
+static inline void
pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps,
struct pipe_resource *pt, unsigned level, unsigned layer)
{
@@ -209,7 +219,7 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps,
}
/* Return true if the surfaces are equal. */
-static INLINE boolean
+static inline boolean
pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2)
{
return s1->texture == s2->texture &&
@@ -233,7 +243,7 @@ pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2)
* \param bind bitmask of PIPE_BIND_x flags
* \param usage bitmask of PIPE_USAGE_x flags
*/
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
pipe_buffer_create( struct pipe_screen *screen,
unsigned bind,
unsigned usage,
@@ -261,7 +271,7 @@ pipe_buffer_create( struct pipe_screen *screen,
* \param access bitmask of PIPE_TRANSFER_x flags
* \param transfer returns a transfer object
*/
-static INLINE void *
+static inline void *
pipe_buffer_map_range(struct pipe_context *pipe,
struct pipe_resource *buffer,
unsigned offset,
@@ -292,7 +302,7 @@ pipe_buffer_map_range(struct pipe_context *pipe,
* \param access bitmask of PIPE_TRANSFER_x flags
* \param transfer returns a transfer object
*/
-static INLINE void *
+static inline void *
pipe_buffer_map(struct pipe_context *pipe,
struct pipe_resource *buffer,
unsigned access,
@@ -302,14 +312,14 @@ pipe_buffer_map(struct pipe_context *pipe,
}
-static INLINE void
+static inline void
pipe_buffer_unmap(struct pipe_context *pipe,
struct pipe_transfer *transfer)
{
pipe->transfer_unmap(pipe, transfer);
}
-static INLINE void
+static inline void
pipe_buffer_flush_mapped_range(struct pipe_context *pipe,
struct pipe_transfer *transfer,
unsigned offset,
@@ -333,7 +343,7 @@ pipe_buffer_flush_mapped_range(struct pipe_context *pipe,
pipe->transfer_flush_region(pipe, transfer, &box);
}
-static INLINE void
+static inline void
pipe_buffer_write(struct pipe_context *pipe,
struct pipe_resource *buf,
unsigned offset,
@@ -367,7 +377,7 @@ pipe_buffer_write(struct pipe_context *pipe,
* We can avoid GPU/CPU synchronization when writing range that has never
* been written before.
*/
-static INLINE void
+static inline void
pipe_buffer_write_nooverlap(struct pipe_context *pipe,
struct pipe_resource *buf,
unsigned offset, unsigned size,
@@ -393,7 +403,7 @@ pipe_buffer_write_nooverlap(struct pipe_context *pipe,
* \param bind bitmask of PIPE_BIND_x flags
* \param usage bitmask of PIPE_USAGE_x flags
*/
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
pipe_buffer_create_with_data(struct pipe_context *pipe,
unsigned bind,
unsigned usage,
@@ -406,7 +416,7 @@ pipe_buffer_create_with_data(struct pipe_context *pipe,
return res;
}
-static INLINE void
+static inline void
pipe_buffer_read(struct pipe_context *pipe,
struct pipe_resource *buf,
unsigned offset,
@@ -433,7 +443,7 @@ pipe_buffer_read(struct pipe_context *pipe,
* Map a resource for reading/writing.
* \param access bitmask of PIPE_TRANSFER_x flags
*/
-static INLINE void *
+static inline void *
pipe_transfer_map(struct pipe_context *context,
struct pipe_resource *resource,
unsigned level, unsigned layer,
@@ -456,7 +466,7 @@ pipe_transfer_map(struct pipe_context *context,
* Map a 3D (texture) resource for reading/writing.
* \param access bitmask of PIPE_TRANSFER_x flags
*/
-static INLINE void *
+static inline void *
pipe_transfer_map_3d(struct pipe_context *context,
struct pipe_resource *resource,
unsigned level,
@@ -474,14 +484,14 @@ pipe_transfer_map_3d(struct pipe_context *context,
&box, transfer);
}
-static INLINE void
+static inline void
pipe_transfer_unmap( struct pipe_context *context,
struct pipe_transfer *transfer )
{
context->transfer_unmap( context, transfer );
}
-static INLINE void
+static inline void
pipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
struct pipe_resource *buf)
{
@@ -502,7 +512,7 @@ pipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
* Get the polygon offset enable/disable flag for the given polygon fill mode.
* \param fill_mode one of PIPE_POLYGON_MODE_POINT/LINE/FILL
*/
-static INLINE boolean
+static inline boolean
util_get_offset(const struct pipe_rasterizer_state *templ,
unsigned fill_mode)
{
@@ -519,7 +529,7 @@ util_get_offset(const struct pipe_rasterizer_state *templ,
}
}
-static INLINE float
+static inline float
util_get_min_point_size(const struct pipe_rasterizer_state *state)
{
/* The point size should be clamped to this value at the rasterizer stage.
@@ -529,7 +539,7 @@ util_get_min_point_size(const struct pipe_rasterizer_state *state)
!state->multisample ? 1.0f : 0.0f;
}
-static INLINE void
+static inline void
util_query_clear_result(union pipe_query_result *result, unsigned type)
{
switch (type) {
@@ -560,7 +570,7 @@ util_query_clear_result(union pipe_query_result *result, unsigned type)
}
/** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */
-static INLINE unsigned
+static inline unsigned
util_pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target,
unsigned nr_samples)
{
@@ -605,7 +615,7 @@ util_pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target,
}
-static INLINE void
+static inline void
util_copy_constant_buffer(struct pipe_constant_buffer *dst,
const struct pipe_constant_buffer *src)
{
@@ -623,7 +633,7 @@ util_copy_constant_buffer(struct pipe_constant_buffer *dst,
}
}
-static INLINE unsigned
+static inline unsigned
util_max_layer(const struct pipe_resource *r, unsigned level)
{
switch (r->target) {
diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c
index ae14eda3cec..daa2991ced6 100644
--- a/src/gallium/auxiliary/util/u_keymap.c
+++ b/src/gallium/auxiliary/util/u_keymap.c
@@ -71,7 +71,7 @@ default_delete_func(const struct keymap *map,
}
-static INLINE struct keymap_item *
+static inline struct keymap_item *
hash_table_item(struct cso_hash_iter iter)
{
return (struct keymap_item *) cso_hash_iter_data(iter);
@@ -143,7 +143,7 @@ util_delete_keymap(struct keymap *map, void *user)
}
-static INLINE struct cso_hash_iter
+static inline struct cso_hash_iter
hash_table_find_iter(const struct keymap *map, const void *key,
unsigned key_hash)
{
@@ -162,7 +162,7 @@ hash_table_find_iter(const struct keymap *map, const void *key,
}
-static INLINE struct keymap_item *
+static inline struct keymap_item *
hash_table_find_item(const struct keymap *map, const void *key,
unsigned key_hash)
{
diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h
index 81ffc9fb27d..87e52a344d4 100644
--- a/src/gallium/auxiliary/util/u_linear.h
+++ b/src/gallium/auxiliary/util/u_linear.h
@@ -89,7 +89,7 @@ void pipe_linear_fill_info(struct pipe_tile_info *t,
unsigned tile_width, unsigned tile_height,
unsigned tiles_x, unsigned tiles_y);
-static INLINE boolean pipe_linear_check_tile(const struct pipe_tile_info *t)
+static inline boolean pipe_linear_check_tile(const struct pipe_tile_info *t)
{
if (t->tile.size != t->block.size * t->cols * t->rows)
return FALSE;
diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c
index ae9e9513b04..c58af911be7 100644
--- a/src/gallium/auxiliary/util/u_math.c
+++ b/src/gallium/auxiliary/util/u_math.c
@@ -48,7 +48,7 @@ init_pow2_table(void)
{
int i;
for (i = 0; i < POW2_TABLE_SIZE; i++)
- pow2_table[i] = (float) pow(2.0, (i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE);
+ pow2_table[i] = exp2f((i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE);
}
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 3b4040f0ee2..56bd185f527 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -92,7 +92,7 @@ union di {
/**
* Extract the IEEE float32 exponent.
*/
-static INLINE signed
+static inline signed
util_get_float32_exponent(float x)
{
union fi f;
@@ -112,7 +112,7 @@ util_get_float32_exponent(float x)
* Compute exp2(ipart) with i << ipart
* Compute exp2(fpart) with lookup table.
*/
-static INLINE float
+static inline float
util_fast_exp2(float x)
{
int32_t ipart;
@@ -143,7 +143,7 @@ util_fast_exp2(float x)
/**
* Fast approximation to exp(x).
*/
-static INLINE float
+static inline float
util_fast_exp(float x)
{
const float k = 1.44269f; /* = log2(e) */
@@ -160,7 +160,7 @@ extern float log2_table[LOG2_TABLE_SIZE];
/**
* Fast approximation to log2(x).
*/
-static INLINE float
+static inline float
util_fast_log2(float x)
{
union fi num;
@@ -176,7 +176,7 @@ util_fast_log2(float x)
/**
* Fast approximation to x^y.
*/
-static INLINE float
+static inline float
util_fast_pow(float x, float y)
{
return util_fast_exp2(util_fast_log2(x) * y);
@@ -184,7 +184,7 @@ util_fast_pow(float x, float y)
/* Note that this counts zero as a power of two.
*/
-static INLINE boolean
+static inline boolean
util_is_power_of_two( unsigned v )
{
return (v & (v-1)) == 0;
@@ -194,7 +194,7 @@ util_is_power_of_two( unsigned v )
/**
* Floor(x), returned as int.
*/
-static INLINE int
+static inline int
util_ifloor(float f)
{
int ai, bi;
@@ -211,7 +211,7 @@ util_ifloor(float f)
/**
* Round float to nearest int.
*/
-static INLINE int
+static inline int
util_iround(float f)
{
#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
@@ -237,10 +237,10 @@ util_iround(float f)
/**
* Approximate floating point comparison
*/
-static INLINE boolean
+static inline boolean
util_is_approx(float a, float b, float tol)
{
- return fabs(b - a) <= tol;
+ return fabsf(b - a) <= tol;
}
@@ -256,7 +256,7 @@ util_is_approx(float a, float b, float tol)
/**
* Single-float
*/
-static INLINE boolean
+static inline boolean
util_is_inf_or_nan(float x)
{
union fi tmp;
@@ -265,7 +265,7 @@ util_is_inf_or_nan(float x)
}
-static INLINE boolean
+static inline boolean
util_is_nan(float x)
{
union fi tmp;
@@ -274,7 +274,7 @@ util_is_nan(float x)
}
-static INLINE int
+static inline int
util_inf_sign(float x)
{
union fi tmp;
@@ -290,7 +290,7 @@ util_inf_sign(float x)
/**
* Double-float
*/
-static INLINE boolean
+static inline boolean
util_is_double_inf_or_nan(double x)
{
union di tmp;
@@ -299,7 +299,7 @@ util_is_double_inf_or_nan(double x)
}
-static INLINE boolean
+static inline boolean
util_is_double_nan(double x)
{
union di tmp;
@@ -308,7 +308,7 @@ util_is_double_nan(double x)
}
-static INLINE int
+static inline int
util_double_inf_sign(double x)
{
union di tmp;
@@ -324,21 +324,21 @@ util_double_inf_sign(double x)
/**
* Half-float
*/
-static INLINE boolean
+static inline boolean
util_is_half_inf_or_nan(int16_t x)
{
return (x & 0x7c00) == 0x7c00;
}
-static INLINE boolean
+static inline boolean
util_is_half_nan(int16_t x)
{
return (x & 0x7fff) > 0x7c00;
}
-static INLINE int
+static inline int
util_half_inf_sign(int16_t x)
{
if ((x & 0x7fff) != 0x7c00) {
@@ -359,7 +359,7 @@ util_half_inf_sign(int16_t x)
#if defined(_MSC_VER) && (_M_IX86 || _M_AMD64 || _M_IA64)
unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask);
#pragma intrinsic(_BitScanForward)
-static INLINE
+static inline
unsigned long ffs( unsigned long u )
{
unsigned long i;
@@ -369,7 +369,7 @@ unsigned long ffs( unsigned long u )
return 0;
}
#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
-static INLINE
+static inline
unsigned ffs( unsigned u )
{
unsigned i;
@@ -409,7 +409,7 @@ unsigned ffs( unsigned u )
* Find last bit set in a word. The least significant bit is 1.
* Return 0 if no bits are set.
*/
-static INLINE unsigned
+static inline unsigned
util_last_bit(unsigned u)
{
#if defined(HAVE___BUILTIN_CLZ)
@@ -428,7 +428,7 @@ util_last_bit(unsigned u)
* Find last bit set in a word. The least significant bit is 1.
* Return 0 if no bits are set.
*/
-static INLINE unsigned
+static inline unsigned
util_last_bit64(uint64_t u)
{
#if defined(HAVE___BUILTIN_CLZLL)
@@ -448,7 +448,7 @@ util_last_bit64(uint64_t u)
* significant bit is 1.
* Return 0 if no bits are set.
*/
-static INLINE unsigned
+static inline unsigned
util_last_bit_signed(int i)
{
if (i >= 0)
@@ -465,7 +465,7 @@ util_last_bit_signed(int i)
* }
*
*/
-static INLINE int
+static inline int
u_bit_scan(unsigned *mask)
{
int i = ffs(*mask) - 1;
@@ -474,7 +474,7 @@ u_bit_scan(unsigned *mask)
}
#ifndef _MSC_VER
-static INLINE int
+static inline int
u_bit_scan64(uint64_t *mask)
{
int i = ffsll(*mask) - 1;
@@ -486,7 +486,7 @@ u_bit_scan64(uint64_t *mask)
/**
* Return float bits.
*/
-static INLINE unsigned
+static inline unsigned
fui( float f )
{
union fi fi;
@@ -494,7 +494,7 @@ fui( float f )
return fi.ui;
}
-static INLINE float
+static inline float
uif(uint32_t ui)
{
union fi fi;
@@ -507,7 +507,7 @@ uif(uint32_t ui)
* Convert ubyte to float in [0, 1].
* XXX a 256-entry lookup table would be slightly faster.
*/
-static INLINE float
+static inline float
ubyte_to_float(ubyte ub)
{
return (float) ub * (1.0f / 255.0f);
@@ -517,7 +517,7 @@ ubyte_to_float(ubyte ub)
/**
* Convert float in [0,1] to ubyte in [0,255] with clamping.
*/
-static INLINE ubyte
+static inline ubyte
float_to_ubyte(float f)
{
union fi tmp;
@@ -535,13 +535,13 @@ float_to_ubyte(float f)
}
}
-static INLINE float
+static inline float
byte_to_float_tex(int8_t b)
{
return (b == -128) ? -1.0F : b * 1.0F / 127.0F;
}
-static INLINE int8_t
+static inline int8_t
float_to_byte_tex(float f)
{
return (int8_t) (127.0F * f);
@@ -550,7 +550,7 @@ float_to_byte_tex(float f)
/**
* Calc log base 2
*/
-static INLINE unsigned
+static inline unsigned
util_logbase2(unsigned n)
{
#if defined(HAVE___BUILTIN_CLZ)
@@ -570,7 +570,7 @@ util_logbase2(unsigned n)
/**
* Returns the smallest power of two >= x
*/
-static INLINE unsigned
+static inline unsigned
util_next_power_of_two(unsigned x)
{
#if defined(HAVE___BUILTIN_CLZ)
@@ -602,7 +602,7 @@ util_next_power_of_two(unsigned x)
/**
* Return number of bits set in n.
*/
-static INLINE unsigned
+static inline unsigned
util_bitcount(unsigned n)
{
#if defined(HAVE___BUILTIN_POPCOUNT)
@@ -623,7 +623,7 @@ util_bitcount(unsigned n)
}
-static INLINE unsigned
+static inline unsigned
util_bitcount64(uint64_t n)
{
#ifdef HAVE___BUILTIN_POPCOUNTLL
@@ -639,7 +639,7 @@ util_bitcount64(uint64_t n)
* Algorithm taken from:
* http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer
*/
-static INLINE unsigned
+static inline unsigned
util_bitreverse(unsigned n)
{
n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1);
@@ -671,7 +671,7 @@ util_bitreverse(unsigned n)
/**
* Reverse byte order of a 32 bit word.
*/
-static INLINE uint32_t
+static inline uint32_t
util_bswap32(uint32_t n)
{
#if defined(HAVE___BUILTIN_BSWAP32)
@@ -687,7 +687,7 @@ util_bswap32(uint32_t n)
/**
* Reverse byte order of a 64bit word.
*/
-static INLINE uint64_t
+static inline uint64_t
util_bswap64(uint64_t n)
{
#if defined(HAVE___BUILTIN_BSWAP64)
@@ -702,14 +702,14 @@ util_bswap64(uint64_t n)
/**
* Reverse byte order of a 16 bit word.
*/
-static INLINE uint16_t
+static inline uint16_t
util_bswap16(uint16_t n)
{
return (n >> 8) |
(n << 8);
}
-static INLINE void*
+static inline void*
util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n)
{
#ifdef PIPE_ARCH_BIG_ENDIAN
@@ -746,7 +746,7 @@ util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t
/**
* Align a value, only works pot alignemnts.
*/
-static INLINE int
+static inline int
align(int value, int alignment)
{
return (value + alignment - 1) & ~(alignment - 1);
@@ -755,7 +755,7 @@ align(int value, int alignment)
/**
* Works like align but on npot alignments.
*/
-static INLINE size_t
+static inline size_t
util_align_npot(size_t value, size_t alignment)
{
if (value % alignment)
@@ -763,7 +763,7 @@ util_align_npot(size_t value, size_t alignment)
return value;
}
-static INLINE unsigned
+static inline unsigned
u_minify(unsigned value, unsigned levels)
{
return MAX2(1, value >> levels);
@@ -796,13 +796,13 @@ do { \
#endif
-static INLINE uint32_t
+static inline uint32_t
util_unsigned_fixed(float value, unsigned frac_bits)
{
return value < 0 ? 0 : (uint32_t)(value * (1<min) / info->incr);
}
-static INLINE boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr )
+static inline boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr )
{
const struct u_prim_vertex_count *count = u_prim_vertex_count(pipe_prim);
@@ -159,7 +159,7 @@ static INLINE boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr )
}
-static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
+static inline boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
{
const struct u_prim_vertex_count *count = u_prim_vertex_count(pipe_prim);
@@ -174,7 +174,7 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
}
}
-static INLINE unsigned
+static inline unsigned
u_vertices_per_prim(int primitive)
{
switch(primitive) {
@@ -216,7 +216,7 @@ u_vertices_per_prim(int primitive)
* statistics depend on knowing the exact number of decomposed
* primitives for a set of vertices.
*/
-static INLINE unsigned
+static inline unsigned
u_decomposed_prims_for_vertices(int primitive, int vertices)
{
switch (primitive) {
@@ -263,7 +263,7 @@ u_decomposed_prims_for_vertices(int primitive, int vertices)
* count. Each quad is treated as two triangles. Polygons are treated as
* triangle fans.
*/
-static INLINE unsigned
+static inline unsigned
u_reduced_prims_for_vertices(int primitive, int vertices)
{
switch (primitive) {
diff --git a/src/gallium/auxiliary/util/u_range.h b/src/gallium/auxiliary/util/u_range.h
index efe25ef5e42..a1da5e5a6f0 100644
--- a/src/gallium/auxiliary/util/u_range.h
+++ b/src/gallium/auxiliary/util/u_range.h
@@ -47,7 +47,7 @@ struct util_range {
};
-static INLINE void
+static inline void
util_range_set_empty(struct util_range *range)
{
range->start = ~0;
@@ -55,7 +55,7 @@ util_range_set_empty(struct util_range *range)
}
/* This is like a union of two sets. */
-static INLINE void
+static inline void
util_range_add(struct util_range *range, unsigned start, unsigned end)
{
if (start < range->start || end > range->end) {
@@ -66,7 +66,7 @@ util_range_add(struct util_range *range, unsigned start, unsigned end)
}
}
-static INLINE boolean
+static inline boolean
util_ranges_intersect(struct util_range *range, unsigned start, unsigned end)
{
return MAX2(start, range->start) < MIN2(end, range->end);
@@ -75,14 +75,14 @@ util_ranges_intersect(struct util_range *range, unsigned start, unsigned end)
/* Init/deinit */
-static INLINE void
+static inline void
util_range_init(struct util_range *range)
{
pipe_mutex_init(range->write_mutex);
util_range_set_empty(range);
}
-static INLINE void
+static inline void
util_range_destroy(struct util_range *range)
{
pipe_mutex_destroy(range->write_mutex);
diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index cf29dff0d02..b26f671f313 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -43,7 +43,7 @@ struct u_rect {
/* Do two rectangles intersect?
*/
-static INLINE boolean
+static inline boolean
u_rect_test_intersection(const struct u_rect *a,
const struct u_rect *b)
{
@@ -55,7 +55,7 @@ u_rect_test_intersection(const struct u_rect *a,
/* Find the intersection of two rectangles known to intersect.
*/
-static INLINE void
+static inline void
u_rect_find_intersection(const struct u_rect *a,
struct u_rect *b)
{
@@ -68,13 +68,13 @@ u_rect_find_intersection(const struct u_rect *a,
}
-static INLINE int
+static inline int
u_rect_area(const struct u_rect *r)
{
return (r->x1 - r->x0) * (r->y1 - r->y0);
}
-static INLINE void
+static inline void
u_rect_possible_intersection(const struct u_rect *a,
struct u_rect *b)
{
@@ -88,7 +88,7 @@ u_rect_possible_intersection(const struct u_rect *a,
/* Set @d to a rectangle that covers both @a and @b.
*/
-static INLINE void
+static inline void
u_rect_union(struct u_rect *d, const struct u_rect *a, const struct u_rect *b)
{
d->x0 = MIN2(a->x0, b->x0);
diff --git a/src/gallium/auxiliary/util/u_resource.h b/src/gallium/auxiliary/util/u_resource.h
index a5e091fd66e..6736476f4da 100644
--- a/src/gallium/auxiliary/util/u_resource.h
+++ b/src/gallium/auxiliary/util/u_resource.h
@@ -36,7 +36,7 @@ util_resource_size(const struct pipe_resource *res);
*
* Note that this function returns true for single-layered array textures.
*/
-static INLINE boolean
+static inline boolean
util_resource_is_array_texture(const struct pipe_resource *res)
{
switch (res->target) {
diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c b/src/gallium/auxiliary/util/u_ringbuffer.c
index 648b105b137..5816b781660 100644
--- a/src/gallium/auxiliary/util/u_ringbuffer.c
+++ b/src/gallium/auxiliary/util/u_ringbuffer.c
@@ -56,7 +56,7 @@ void util_ringbuffer_destroy( struct util_ringbuffer *ring )
/**
* Return number of free entries in the ring
*/
-static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring )
+static inline unsigned util_ringbuffer_space( const struct util_ringbuffer *ring )
{
return (ring->tail - (ring->head + 1)) & ring->mask;
}
@@ -64,7 +64,7 @@ static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring
/**
* Is the ring buffer empty?
*/
-static INLINE boolean util_ringbuffer_empty( const struct util_ringbuffer *ring )
+static inline boolean util_ringbuffer_empty( const struct util_ringbuffer *ring )
{
return util_ringbuffer_space(ring) == ring->mask;
}
diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c
deleted file mode 100644
index 39e9b70d0f8..00000000000
--- a/src/gallium/auxiliary/util/u_snprintf.c
+++ /dev/null
@@ -1,1480 +0,0 @@
-/*
- * Copyright (c) 1995 Patrick Powell.
- *
- * This code is based on code written by Patrick Powell .
- * It may be used for any purpose as long as this notice remains intact on all
- * source code distributions.
- */
-
-/*
- * Copyright (c) 2008 Holger Weiss.
- *
- * This version of the code is maintained by Holger Weiss .
- * My changes to the code may freely be used, modified and/or redistributed for
- * any purpose. It would be nice if additions and fixes to this file (including
- * trivial code cleanups) would be sent back in order to let me include them in
- * the version available at .
- * However, this is not a requirement for using or redistributing (possibly
- * modified) versions of this file, nor is leaving this notice intact mandatory.
- */
-
-/*
- * History
- *
- * 2008-01-20 Holger Weiss for C99-snprintf 1.1:
- *
- * Fixed the detection of infinite floating point values on IRIX (and
- * possibly other systems) and applied another few minor cleanups.
- *
- * 2008-01-06 Holger Weiss for C99-snprintf 1.0:
- *
- * Added a lot of new features, fixed many bugs, and incorporated various
- * improvements done by Andrew Tridgell , Russ Allbery
- * , Hrvoje Niksic , Damien Miller
- * , and others for the Samba, INN, Wget, and OpenSSH
- * projects. The additions include: support the "e", "E", "g", "G", and
- * "F" conversion specifiers (and use conversion style "f" or "F" for the
- * still unsupported "a" and "A" specifiers); support the "hh", "ll", "j",
- * "t", and "z" length modifiers; support the "#" flag and the (non-C99)
- * "'" flag; use localeconv(3) (if available) to get both the current
- * locale's decimal point character and the separator between groups of
- * digits; fix the handling of various corner cases of field width and
- * precision specifications; fix various floating point conversion bugs;
- * handle infinite and NaN floating point values; don't attempt to write to
- * the output buffer (which may be NULL) if a size of zero was specified;
- * check for integer overflow of the field width, precision, and return
- * values and during the floating point conversion; use the OUTCHAR() macro
- * instead of a function for better performance; provide asprintf(3) and
- * vasprintf(3) functions; add new test cases. The replacement functions
- * have been renamed to use an "rpl_" prefix, the function calls in the
- * main project (and in this file) must be redefined accordingly for each
- * replacement function which is needed (by using Autoconf or other means).
- * Various other minor improvements have been applied and the coding style
- * was cleaned up for consistency.
- *
- * 2007-07-23 Holger Weiss for Mutt 1.5.13:
- *
- * C99 compliant snprintf(3) and vsnprintf(3) functions return the number
- * of characters that would have been written to a sufficiently sized
- * buffer (excluding the '\0'). The original code simply returned the
- * length of the resulting output string, so that's been fixed.
- *
- * 1998-03-05 Michael Elkins for Mutt 0.90.8:
- *
- * The original code assumed that both snprintf(3) and vsnprintf(3) were
- * missing. Some systems only have snprintf(3) but not vsnprintf(3), so
- * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
- *
- * 1998-01-27 Thomas Roessler for Mutt 0.89i:
- *
- * The PGP code was using unsigned hexadecimal formats. Unfortunately,
- * unsigned formats simply didn't work.
- *
- * 1997-10-22 Brandon Long for Mutt 0.87.1:
- *
- * Ok, added some minimal floating point support, which means this probably
- * requires libm on most operating systems. Don't yet support the exponent
- * (e,E) and sigfig (g,G). Also, fmtint() was pretty badly broken, it just
- * wasn't being exercised in ways which showed it, so that's been fixed.
- * Also, formatted the code to Mutt conventions, and removed dead code left
- * over from the original. Also, there is now a builtin-test, run with:
- * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf
- *
- * 2996-09-15 Brandon Long for Mutt 0.43:
- *
- * This was ugly. It is still ugly. I opted out of floating point
- * numbers, but the formatter understands just about everything from the
- * normal C string format, at least as far as I can tell from the Solaris
- * 2.5 printf(3S) man page.
- */
-
-/*
- * ToDo
- *
- * - Add wide character support.
- * - Add support for "%a" and "%A" conversions.
- * - Create test routines which predefine the expected results. Our test cases
- * usually expose bugs in system implementations rather than in ours :-)
- */
-
-/*
- * Usage
- *
- * 1) The following preprocessor macros should be defined to 1 if the feature or
- * file in question is available on the target system (by using Autoconf or
- * other means), though basic functionality should be available as long as
- * HAVE_STDARG_H and HAVE_STDLIB_H are defined correctly:
- *
- * HAVE_VSNPRINTF
- * HAVE_SNPRINTF
- * HAVE_VASPRINTF
- * HAVE_ASPRINTF
- * HAVE_STDARG_H
- * HAVE_STDDEF_H
- * HAVE_STDINT_H
- * HAVE_STDLIB_H
- * HAVE_INTTYPES_H
- * HAVE_LOCALE_H
- * HAVE_LOCALECONV
- * HAVE_LCONV_DECIMAL_POINT
- * HAVE_LCONV_THOUSANDS_SEP
- * HAVE_LONG_DOUBLE
- * HAVE_LONG_LONG_INT
- * HAVE_UNSIGNED_LONG_LONG_INT
- * HAVE_INTMAX_T
- * HAVE_UINTMAX_T
- * HAVE_UINTPTR_T
- * HAVE_PTRDIFF_T
- * HAVE_VA_COPY
- * HAVE___VA_COPY
- *
- * 2) The calls to the functions which should be replaced must be redefined
- * throughout the project files (by using Autoconf or other means):
- *
- * #define vsnprintf rpl_vsnprintf
- * #define snprintf rpl_snprintf
- * #define vasprintf rpl_vasprintf
- * #define asprintf rpl_asprintf
- *
- * 3) The required replacement functions should be declared in some header file
- * included throughout the project files:
- *
- * #if HAVE_CONFIG_H
- * #include
- * #endif
- * #if HAVE_STDARG_H
- * #include
- * #if !HAVE_VSNPRINTF
- * int rpl_vsnprintf(char *, size_t, const char *, va_list);
- * #endif
- * #if !HAVE_SNPRINTF
- * int rpl_snprintf(char *, size_t, const char *, ...);
- * #endif
- * #if !HAVE_VASPRINTF
- * int rpl_vasprintf(char **, const char *, va_list);
- * #endif
- * #if !HAVE_ASPRINTF
- * int rpl_asprintf(char **, const char *, ...);
- * #endif
- * #endif
- *
- * Autoconf macros for handling step 1 and step 2 are available at
- * .
- */
-
-#include "pipe/p_config.h"
-
-#if HAVE_CONFIG_H
-#include
-#else
-#ifdef _MSC_VER
-#define vsnprintf util_vsnprintf
-#define snprintf util_snprintf
-#define HAVE_VSNPRINTF 0
-#define HAVE_SNPRINTF 0
-#define HAVE_VASPRINTF 1 /* not needed */
-#define HAVE_ASPRINTF 1 /* not needed */
-#define HAVE_STDARG_H 1
-#define HAVE_STDDEF_H 1
-#define HAVE_STDINT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_INTTYPES_H 0
-#define HAVE_LOCALE_H 0
-#define HAVE_LOCALECONV 0
-#define HAVE_LCONV_DECIMAL_POINT 0
-#define HAVE_LCONV_THOUSANDS_SEP 0
-#define HAVE_LONG_DOUBLE 0
-#define HAVE_LONG_LONG_INT 1
-#define HAVE_UNSIGNED_LONG_LONG_INT 1
-#define HAVE_INTMAX_T 0
-#define HAVE_UINTMAX_T 0
-#define HAVE_UINTPTR_T 1
-#define HAVE_PTRDIFF_T 1
-#define HAVE_VA_COPY 0
-#define HAVE___VA_COPY 0
-#else
-#define HAVE_VSNPRINTF 1
-#define HAVE_SNPRINTF 1
-#define HAVE_VASPRINTF 1
-#define HAVE_ASPRINTF 1
-#endif
-#endif /* HAVE_CONFIG_H */
-
-#if !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || !HAVE_VASPRINTF
-#include /* For NULL, size_t, vsnprintf(3), and vasprintf(3). */
-#ifdef VA_START
-#undef VA_START
-#endif /* defined(VA_START) */
-#ifdef VA_SHIFT
-#undef VA_SHIFT
-#endif /* defined(VA_SHIFT) */
-#if HAVE_STDARG_H
-#include
-#define VA_START(ap, last) va_start(ap, last)
-#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */
-#else /* Assume is available. */
-#include
-#define VA_START(ap, last) va_start(ap) /* "last" is ignored. */
-#define VA_SHIFT(ap, value, type) value = va_arg(ap, type)
-#endif /* HAVE_STDARG_H */
-
-#if !HAVE_VASPRINTF
-#if HAVE_STDLIB_H
-#include /* For malloc(3). */
-#endif /* HAVE_STDLIB_H */
-#ifdef VA_COPY
-#undef VA_COPY
-#endif /* defined(VA_COPY) */
-#ifdef VA_END_COPY
-#undef VA_END_COPY
-#endif /* defined(VA_END_COPY) */
-#if HAVE_VA_COPY
-#define VA_COPY(dest, src) va_copy(dest, src)
-#define VA_END_COPY(ap) va_end(ap)
-#elif HAVE___VA_COPY
-#define VA_COPY(dest, src) __va_copy(dest, src)
-#define VA_END_COPY(ap) va_end(ap)
-#else
-#define VA_COPY(dest, src) (void)mymemcpy(&dest, &src, sizeof(va_list))
-#define VA_END_COPY(ap) /* No-op. */
-#define NEED_MYMEMCPY 1
-static void *mymemcpy(void *, void *, size_t);
-#endif /* HAVE_VA_COPY */
-#endif /* !HAVE_VASPRINTF */
-
-#if !HAVE_VSNPRINTF
-#include /* For *_MAX. */
-#if HAVE_INTTYPES_H
-#include /* For intmax_t (if not defined in ). */
-#endif /* HAVE_INTTYPES_H */
-#if HAVE_LOCALE_H
-#include /* For localeconv(3). */
-#endif /* HAVE_LOCALE_H */
-#if HAVE_STDDEF_H
-#include /* For ptrdiff_t. */
-#endif /* HAVE_STDDEF_H */
-#if HAVE_STDINT_H
-#include /* For intmax_t. */
-#endif /* HAVE_STDINT_H */
-
-/* Support for unsigned long long int. We may also need ULLONG_MAX. */
-#ifndef ULONG_MAX /* We may need ULONG_MAX as a fallback. */
-#ifdef UINT_MAX
-#define ULONG_MAX UINT_MAX
-#else
-#define ULONG_MAX INT_MAX
-#endif /* defined(UINT_MAX) */
-#endif /* !defined(ULONG_MAX) */
-#ifdef ULLONG
-#undef ULLONG
-#endif /* defined(ULLONG) */
-#if HAVE_UNSIGNED_LONG_LONG_INT
-#define ULLONG unsigned long long int
-#ifndef ULLONG_MAX
-#define ULLONG_MAX ULONG_MAX
-#endif /* !defined(ULLONG_MAX) */
-#else
-#define ULLONG unsigned long int
-#ifdef ULLONG_MAX
-#undef ULLONG_MAX
-#endif /* defined(ULLONG_MAX) */
-#define ULLONG_MAX ULONG_MAX
-#endif /* HAVE_LONG_LONG_INT */
-
-/* Support for uintmax_t. We also need UINTMAX_MAX. */
-#ifdef UINTMAX_T
-#undef UINTMAX_T
-#endif /* defined(UINTMAX_T) */
-#if HAVE_UINTMAX_T || defined(uintmax_t)
-#define UINTMAX_T uintmax_t
-#ifndef UINTMAX_MAX
-#define UINTMAX_MAX ULLONG_MAX
-#endif /* !defined(UINTMAX_MAX) */
-#else
-#define UINTMAX_T ULLONG
-#ifdef UINTMAX_MAX
-#undef UINTMAX_MAX
-#endif /* defined(UINTMAX_MAX) */
-#define UINTMAX_MAX ULLONG_MAX
-#endif /* HAVE_UINTMAX_T || defined(uintmax_t) */
-
-/* Support for long double. */
-#ifndef LDOUBLE
-#if HAVE_LONG_DOUBLE
-#define LDOUBLE long double
-#else
-#define LDOUBLE double
-#endif /* HAVE_LONG_DOUBLE */
-#endif /* !defined(LDOUBLE) */
-
-/* Support for long long int. */
-#ifndef LLONG
-#if HAVE_LONG_LONG_INT
-#define LLONG long long int
-#else
-#define LLONG long int
-#endif /* HAVE_LONG_LONG_INT */
-#endif /* !defined(LLONG) */
-
-/* Support for intmax_t. */
-#ifndef INTMAX_T
-#if HAVE_INTMAX_T || defined(intmax_t)
-#define INTMAX_T intmax_t
-#else
-#define INTMAX_T LLONG
-#endif /* HAVE_INTMAX_T || defined(intmax_t) */
-#endif /* !defined(INTMAX_T) */
-
-/* Support for uintptr_t. */
-#ifndef UINTPTR_T
-#if HAVE_UINTPTR_T || defined(uintptr_t)
-#define UINTPTR_T uintptr_t
-#else
-#define UINTPTR_T unsigned long int
-#endif /* HAVE_UINTPTR_T || defined(uintptr_t) */
-#endif /* !defined(UINTPTR_T) */
-
-/* Support for ptrdiff_t. */
-#ifndef PTRDIFF_T
-#if HAVE_PTRDIFF_T || defined(ptrdiff_t)
-#define PTRDIFF_T ptrdiff_t
-#else
-#define PTRDIFF_T long int
-#endif /* HAVE_PTRDIFF_T || defined(ptrdiff_t) */
-#endif /* !defined(PTRDIFF_T) */
-
-/*
- * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99:
- * 7.19.6.1, 7). However, we'll simply use PTRDIFF_T and convert it to an
- * unsigned type if necessary. This should work just fine in practice.
- */
-#ifndef UPTRDIFF_T
-#define UPTRDIFF_T PTRDIFF_T
-#endif /* !defined(UPTRDIFF_T) */
-
-/*
- * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7).
- * However, we'll simply use size_t and convert it to a signed type if
- * necessary. This should work just fine in practice.
- */
-#ifndef SSIZE_T
-#define SSIZE_T size_t
-#endif /* !defined(SSIZE_T) */
-
-/* Either ERANGE or E2BIG should be available everywhere. */
-#ifndef ERANGE
-#define ERANGE E2BIG
-#endif /* !defined(ERANGE) */
-#ifndef EOVERFLOW
-#define EOVERFLOW ERANGE
-#endif /* !defined(EOVERFLOW) */
-
-/*
- * Buffer size to hold the octal string representation of UINT128_MAX without
- * nul-termination ("3777777777777777777777777777777777777777777").
- */
-#ifdef MAX_CONVERT_LENGTH
-#undef MAX_CONVERT_LENGTH
-#endif /* defined(MAX_CONVERT_LENGTH) */
-#define MAX_CONVERT_LENGTH 43
-
-/* Format read states. */
-#define PRINT_S_DEFAULT 0
-#define PRINT_S_FLAGS 1
-#define PRINT_S_WIDTH 2
-#define PRINT_S_DOT 3
-#define PRINT_S_PRECISION 4
-#define PRINT_S_MOD 5
-#define PRINT_S_CONV 6
-
-/* Format flags. */
-#define PRINT_F_MINUS (1 << 0)
-#define PRINT_F_PLUS (1 << 1)
-#define PRINT_F_SPACE (1 << 2)
-#define PRINT_F_NUM (1 << 3)
-#define PRINT_F_ZERO (1 << 4)
-#define PRINT_F_QUOTE (1 << 5)
-#define PRINT_F_UP (1 << 6)
-#define PRINT_F_UNSIGNED (1 << 7)
-#define PRINT_F_TYPE_G (1 << 8)
-#define PRINT_F_TYPE_E (1 << 9)
-
-/* Conversion flags. */
-#define PRINT_C_CHAR 1
-#define PRINT_C_SHORT 2
-#define PRINT_C_LONG 3
-#define PRINT_C_LLONG 4
-#define PRINT_C_LDOUBLE 5
-#define PRINT_C_SIZE 6
-#define PRINT_C_PTRDIFF 7
-#define PRINT_C_INTMAX 8
-
-#ifndef MAX
-#define MAX(x, y) ((x >= y) ? x : y)
-#endif /* !defined(MAX) */
-#ifndef CHARTOINT
-#define CHARTOINT(ch) (ch - '0')
-#endif /* !defined(CHARTOINT) */
-#ifndef ISDIGIT
-#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9')
-#endif /* !defined(ISDIGIT) */
-#ifndef ISNAN
-#define ISNAN(x) (x != x)
-#endif /* !defined(ISNAN) */
-#ifndef ISINF
-#define ISINF(x) (x != 0.0 && x + x == x)
-#endif /* !defined(ISINF) */
-
-#ifdef OUTCHAR
-#undef OUTCHAR
-#endif /* defined(OUTCHAR) */
-#define OUTCHAR(str, len, size, ch) \
-do { \
- if (len + 1 < size) \
- str[len] = ch; \
- (len)++; \
-} while (/* CONSTCOND */ 0)
-
-static void fmtstr(char *, size_t *, size_t, const char *, int, int, int);
-static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int);
-static void fmtflt(char *, size_t *, size_t, LDOUBLE, int, int, int, int *);
-static void printsep(char *, size_t *, size_t);
-static int getnumsep(int);
-static int getexponent(LDOUBLE);
-static int convert(UINTMAX_T, char *, size_t, int, int);
-static UINTMAX_T cast(LDOUBLE);
-static UINTMAX_T myround(LDOUBLE);
-static LDOUBLE mypow10(int);
-
-int
-util_vsnprintf(char *str, size_t size, const char *format, va_list args)
-{
- LDOUBLE fvalue;
- INTMAX_T value;
- unsigned char cvalue;
- const char *strvalue;
- INTMAX_T *intmaxptr;
- PTRDIFF_T *ptrdiffptr;
- SSIZE_T *sizeptr;
- LLONG *llongptr;
- long int *longptr;
- int *intptr;
- short int *shortptr;
- signed char *charptr;
- size_t len = 0;
- int overflow = 0;
- int base = 0;
- int cflags = 0;
- int flags = 0;
- int width = 0;
- int precision = -1;
- int state = PRINT_S_DEFAULT;
- char ch = *format++;
-
- /*
- * C99 says: "If `n' is zero, nothing is written, and `s' may be a null
- * pointer." (7.19.6.5, 2) We're forgiving and allow a NULL pointer
- * even if a size larger than zero was specified. At least NetBSD's
- * snprintf(3) does the same, as well as other versions of this file.
- * (Though some of these versions will write to a non-NULL buffer even
- * if a size of zero was specified, which violates the standard.)
- */
- if (str == NULL && size != 0)
- size = 0;
-
- while (ch != '\0')
- switch (state) {
- case PRINT_S_DEFAULT:
- if (ch == '%')
- state = PRINT_S_FLAGS;
- else
- OUTCHAR(str, len, size, ch);
- ch = *format++;
- break;
- case PRINT_S_FLAGS:
- switch (ch) {
- case '-':
- flags |= PRINT_F_MINUS;
- ch = *format++;
- break;
- case '+':
- flags |= PRINT_F_PLUS;
- ch = *format++;
- break;
- case ' ':
- flags |= PRINT_F_SPACE;
- ch = *format++;
- break;
- case '#':
- flags |= PRINT_F_NUM;
- ch = *format++;
- break;
- case '0':
- flags |= PRINT_F_ZERO;
- ch = *format++;
- break;
- case '\'': /* SUSv2 flag (not in C99). */
- flags |= PRINT_F_QUOTE;
- ch = *format++;
- break;
- default:
- state = PRINT_S_WIDTH;
- break;
- }
- break;
- case PRINT_S_WIDTH:
- if (ISDIGIT(ch)) {
- ch = CHARTOINT(ch);
- if (width > (INT_MAX - ch) / 10) {
- overflow = 1;
- goto out;
- }
- width = 10 * width + ch;
- ch = *format++;
- } else if (ch == '*') {
- /*
- * C99 says: "A negative field width argument is
- * taken as a `-' flag followed by a positive
- * field width." (7.19.6.1, 5)
- */
- if ((width = va_arg(args, int)) < 0) {
- flags |= PRINT_F_MINUS;
- width = -width;
- }
- ch = *format++;
- state = PRINT_S_DOT;
- } else
- state = PRINT_S_DOT;
- break;
- case PRINT_S_DOT:
- if (ch == '.') {
- state = PRINT_S_PRECISION;
- ch = *format++;
- } else
- state = PRINT_S_MOD;
- break;
- case PRINT_S_PRECISION:
- if (precision == -1)
- precision = 0;
- if (ISDIGIT(ch)) {
- ch = CHARTOINT(ch);
- if (precision > (INT_MAX - ch) / 10) {
- overflow = 1;
- goto out;
- }
- precision = 10 * precision + ch;
- ch = *format++;
- } else if (ch == '*') {
- /*
- * C99 says: "A negative precision argument is
- * taken as if the precision were omitted."
- * (7.19.6.1, 5)
- */
- if ((precision = va_arg(args, int)) < 0)
- precision = -1;
- ch = *format++;
- state = PRINT_S_MOD;
- } else
- state = PRINT_S_MOD;
- break;
- case PRINT_S_MOD:
- switch (ch) {
- case 'h':
- ch = *format++;
- if (ch == 'h') { /* It's a char. */
- ch = *format++;
- cflags = PRINT_C_CHAR;
- } else
- cflags = PRINT_C_SHORT;
- break;
- case 'l':
- ch = *format++;
- if (ch == 'l') { /* It's a long long. */
- ch = *format++;
- cflags = PRINT_C_LLONG;
- } else
- cflags = PRINT_C_LONG;
- break;
- case 'L':
- cflags = PRINT_C_LDOUBLE;
- ch = *format++;
- break;
- case 'j':
- cflags = PRINT_C_INTMAX;
- ch = *format++;
- break;
- case 't':
- cflags = PRINT_C_PTRDIFF;
- ch = *format++;
- break;
- case 'z':
- cflags = PRINT_C_SIZE;
- ch = *format++;
- break;
- }
- state = PRINT_S_CONV;
- break;
- case PRINT_S_CONV:
- switch (ch) {
- case 'd':
- /* FALLTHROUGH */
- case 'i':
- switch (cflags) {
- case PRINT_C_CHAR:
- value = (signed char)va_arg(args, int);
- break;
- case PRINT_C_SHORT:
- value = (short int)va_arg(args, int);
- break;
- case PRINT_C_LONG:
- value = va_arg(args, long int);
- break;
- case PRINT_C_LLONG:
- value = va_arg(args, LLONG);
- break;
- case PRINT_C_SIZE:
- value = va_arg(args, SSIZE_T);
- break;
- case PRINT_C_INTMAX:
- value = va_arg(args, INTMAX_T);
- break;
- case PRINT_C_PTRDIFF:
- value = va_arg(args, PTRDIFF_T);
- break;
- default:
- value = va_arg(args, int);
- break;
- }
- fmtint(str, &len, size, value, 10, width,
- precision, flags);
- break;
- case 'X':
- flags |= PRINT_F_UP;
- /* FALLTHROUGH */
- case 'x':
- base = 16;
- /* FALLTHROUGH */
- case 'o':
- if (base == 0)
- base = 8;
- /* FALLTHROUGH */
- case 'u':
- if (base == 0)
- base = 10;
- flags |= PRINT_F_UNSIGNED;
- switch (cflags) {
- case PRINT_C_CHAR:
- value = (unsigned char)va_arg(args,
- unsigned int);
- break;
- case PRINT_C_SHORT:
- value = (unsigned short int)va_arg(args,
- unsigned int);
- break;
- case PRINT_C_LONG:
- value = va_arg(args, unsigned long int);
- break;
- case PRINT_C_LLONG:
- value = va_arg(args, ULLONG);
- break;
- case PRINT_C_SIZE:
- value = va_arg(args, size_t);
- break;
- case PRINT_C_INTMAX:
- value = va_arg(args, UINTMAX_T);
- break;
- case PRINT_C_PTRDIFF:
- value = va_arg(args, UPTRDIFF_T);
- break;
- default:
- value = va_arg(args, unsigned int);
- break;
- }
- fmtint(str, &len, size, value, base, width,
- precision, flags);
- break;
- case 'A':
- /* Not yet supported, we'll use "%F". */
- /* FALLTHROUGH */
- case 'F':
- flags |= PRINT_F_UP;
- case 'a':
- /* Not yet supported, we'll use "%f". */
- /* FALLTHROUGH */
- case 'f':
- if (cflags == PRINT_C_LDOUBLE)
- fvalue = va_arg(args, LDOUBLE);
- else
- fvalue = va_arg(args, double);
- fmtflt(str, &len, size, fvalue, width,
- precision, flags, &overflow);
- if (overflow)
- goto out;
- break;
- case 'E':
- flags |= PRINT_F_UP;
- /* FALLTHROUGH */
- case 'e':
- flags |= PRINT_F_TYPE_E;
- if (cflags == PRINT_C_LDOUBLE)
- fvalue = va_arg(args, LDOUBLE);
- else
- fvalue = va_arg(args, double);
- fmtflt(str, &len, size, fvalue, width,
- precision, flags, &overflow);
- if (overflow)
- goto out;
- break;
- case 'G':
- flags |= PRINT_F_UP;
- /* FALLTHROUGH */
- case 'g':
- flags |= PRINT_F_TYPE_G;
- if (cflags == PRINT_C_LDOUBLE)
- fvalue = va_arg(args, LDOUBLE);
- else
- fvalue = va_arg(args, double);
- /*
- * If the precision is zero, it is treated as
- * one (cf. C99: 7.19.6.1, 8).
- */
- if (precision == 0)
- precision = 1;
- fmtflt(str, &len, size, fvalue, width,
- precision, flags, &overflow);
- if (overflow)
- goto out;
- break;
- case 'c':
- cvalue = (unsigned char)va_arg(args, int);
- OUTCHAR(str, len, size, cvalue);
- break;
- case 's':
- strvalue = va_arg(args, char *);
- fmtstr(str, &len, size, strvalue, width,
- precision, flags);
- break;
- case 'p':
- /*
- * C99 says: "The value of the pointer is
- * converted to a sequence of printing
- * characters, in an implementation-defined
- * manner." (C99: 7.19.6.1, 8)
- */
- if ((strvalue = va_arg(args, void *)) == NULL)
- /*
- * We use the glibc format. BSD prints
- * "0x0", SysV "0".
- */
- fmtstr(str, &len, size, "(nil)", width,
- -1, flags);
- else {
- /*
- * We use the BSD/glibc format. SysV
- * omits the "0x" prefix (which we emit
- * using the PRINT_F_NUM flag).
- */
- flags |= PRINT_F_NUM;
- flags |= PRINT_F_UNSIGNED;
- fmtint(str, &len, size,
- (UINTPTR_T)strvalue, 16, width,
- precision, flags);
- }
- break;
- case 'n':
- switch (cflags) {
- case PRINT_C_CHAR:
- charptr = va_arg(args, signed char *);
- *charptr = (signed char)len;
- break;
- case PRINT_C_SHORT:
- shortptr = va_arg(args, short int *);
- *shortptr = (short int)len;
- break;
- case PRINT_C_LONG:
- longptr = va_arg(args, long int *);
- *longptr = (long int)len;
- break;
- case PRINT_C_LLONG:
- llongptr = va_arg(args, LLONG *);
- *llongptr = (LLONG)len;
- break;
- case PRINT_C_SIZE:
- /*
- * C99 says that with the "z" length
- * modifier, "a following `n' conversion
- * specifier applies to a pointer to a
- * signed integer type corresponding to
- * size_t argument." (7.19.6.1, 7)
- */
- sizeptr = va_arg(args, SSIZE_T *);
- *sizeptr = len;
- break;
- case PRINT_C_INTMAX:
- intmaxptr = va_arg(args, INTMAX_T *);
- *intmaxptr = len;
- break;
- case PRINT_C_PTRDIFF:
- ptrdiffptr = va_arg(args, PTRDIFF_T *);
- *ptrdiffptr = len;
- break;
- default:
- intptr = va_arg(args, int *);
- *intptr = (int)len;
- break;
- }
- break;
- case '%': /* Print a "%" character verbatim. */
- OUTCHAR(str, len, size, ch);
- break;
- default: /* Skip other characters. */
- break;
- }
- ch = *format++;
- state = PRINT_S_DEFAULT;
- base = cflags = flags = width = 0;
- precision = -1;
- break;
- }
-out:
- if (len < size)
- str[len] = '\0';
- else if (size > 0)
- str[size - 1] = '\0';
-
- if (overflow || len >= INT_MAX) {
- return -1;
- }
- return (int)len;
-}
-
-static void
-fmtstr(char *str, size_t *len, size_t size, const char *value, int width,
- int precision, int flags)
-{
- int padlen, strln; /* Amount to pad. */
- int noprecision = (precision == -1);
-
- if (value == NULL) /* We're forgiving. */
- value = "(null)";
-
- /* If a precision was specified, don't read the string past it. */
- for (strln = 0; value[strln] != '\0' &&
- (noprecision || strln < precision); strln++)
- continue;
-
- if ((padlen = width - strln) < 0)
- padlen = 0;
- if (flags & PRINT_F_MINUS) /* Left justify. */
- padlen = -padlen;
-
- while (padlen > 0) { /* Leading spaces. */
- OUTCHAR(str, *len, size, ' ');
- padlen--;
- }
- while (*value != '\0' && (noprecision || precision-- > 0)) {
- OUTCHAR(str, *len, size, *value);
- value++;
- }
- while (padlen < 0) { /* Trailing spaces. */
- OUTCHAR(str, *len, size, ' ');
- padlen++;
- }
-}
-
-static void
-fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width,
- int precision, int flags)
-{
- UINTMAX_T uvalue;
- char iconvert[MAX_CONVERT_LENGTH];
- char sign = 0;
- char hexprefix = 0;
- int spadlen = 0; /* Amount to space pad. */
- int zpadlen = 0; /* Amount to zero pad. */
- int pos;
- int separators = (flags & PRINT_F_QUOTE);
- int noprecision = (precision == -1);
-
- if (flags & PRINT_F_UNSIGNED)
- uvalue = value;
- else {
- uvalue = (value >= 0) ? value : -value;
- if (value < 0)
- sign = '-';
- else if (flags & PRINT_F_PLUS) /* Do a sign. */
- sign = '+';
- else if (flags & PRINT_F_SPACE)
- sign = ' ';
- }
-
- pos = convert(uvalue, iconvert, sizeof(iconvert), base,
- flags & PRINT_F_UP);
-
- if (flags & PRINT_F_NUM && uvalue != 0) {
- /*
- * C99 says: "The result is converted to an `alternative form'.
- * For `o' conversion, it increases the precision, if and only
- * if necessary, to force the first digit of the result to be a
- * zero (if the value and precision are both 0, a single 0 is
- * printed). For `x' (or `X') conversion, a nonzero result has
- * `0x' (or `0X') prefixed to it." (7.19.6.1, 6)
- */
- switch (base) {
- case 8:
- if (precision <= pos)
- precision = pos + 1;
- break;
- case 16:
- hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x';
- break;
- }
- }
-
- if (separators) /* Get the number of group separators we'll print. */
- separators = getnumsep(pos);
-
- zpadlen = precision - pos - separators;
- spadlen = width /* Minimum field width. */
- - separators /* Number of separators. */
- - MAX(precision, pos) /* Number of integer digits. */
- - ((sign != 0) ? 1 : 0) /* Will we print a sign? */
- - ((hexprefix != 0) ? 2 : 0); /* Will we print a prefix? */
-
- if (zpadlen < 0)
- zpadlen = 0;
- if (spadlen < 0)
- spadlen = 0;
-
- /*
- * C99 says: "If the `0' and `-' flags both appear, the `0' flag is
- * ignored. For `d', `i', `o', `u', `x', and `X' conversions, if a
- * precision is specified, the `0' flag is ignored." (7.19.6.1, 6)
- */
- if (flags & PRINT_F_MINUS) /* Left justify. */
- spadlen = -spadlen;
- else if (flags & PRINT_F_ZERO && noprecision) {
- zpadlen += spadlen;
- spadlen = 0;
- }
- while (spadlen > 0) { /* Leading spaces. */
- OUTCHAR(str, *len, size, ' ');
- spadlen--;
- }
- if (sign != 0) /* Sign. */
- OUTCHAR(str, *len, size, sign);
- if (hexprefix != 0) { /* A "0x" or "0X" prefix. */
- OUTCHAR(str, *len, size, '0');
- OUTCHAR(str, *len, size, hexprefix);
- }
- while (zpadlen > 0) { /* Leading zeros. */
- OUTCHAR(str, *len, size, '0');
- zpadlen--;
- }
- while (pos > 0) { /* The actual digits. */
- pos--;
- OUTCHAR(str, *len, size, iconvert[pos]);
- if (separators > 0 && pos > 0 && pos % 3 == 0)
- printsep(str, len, size);
- }
- while (spadlen < 0) { /* Trailing spaces. */
- OUTCHAR(str, *len, size, ' ');
- spadlen++;
- }
-}
-
-static void
-fmtflt(char *str, size_t *len, size_t size, LDOUBLE fvalue, int width,
- int precision, int flags, int *overflow)
-{
- LDOUBLE ufvalue;
- UINTMAX_T intpart;
- UINTMAX_T fracpart;
- UINTMAX_T mask;
- const char *infnan = NULL;
- char iconvert[MAX_CONVERT_LENGTH];
- char fconvert[MAX_CONVERT_LENGTH];
- char econvert[4]; /* "e-12" (without nul-termination). */
- char esign = 0;
- char sign = 0;
- int leadfraczeros = 0;
- int exponent = 0;
- int emitpoint = 0;
- int omitzeros = 0;
- int omitcount = 0;
- int padlen = 0;
- int epos = 0;
- int fpos = 0;
- int ipos = 0;
- int separators = (flags & PRINT_F_QUOTE);
- int estyle = (flags & PRINT_F_TYPE_E);
-#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT
- struct lconv *lc = localeconv();
-#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */
-
- /*
- * AIX' man page says the default is 0, but C99 and at least Solaris'
- * and NetBSD's man pages say the default is 6, and sprintf(3) on AIX
- * defaults to 6.
- */
- if (precision == -1)
- precision = 6;
-
- if (fvalue < 0.0)
- sign = '-';
- else if (flags & PRINT_F_PLUS) /* Do a sign. */
- sign = '+';
- else if (flags & PRINT_F_SPACE)
- sign = ' ';
-
- if (ISNAN(fvalue))
- infnan = (flags & PRINT_F_UP) ? "NAN" : "nan";
- else if (ISINF(fvalue))
- infnan = (flags & PRINT_F_UP) ? "INF" : "inf";
-
- if (infnan != NULL) {
- if (sign != 0)
- iconvert[ipos++] = sign;
- while (*infnan != '\0')
- iconvert[ipos++] = *infnan++;
- fmtstr(str, len, size, iconvert, width, ipos, flags);
- return;
- }
-
- /* "%e" (or "%E") or "%g" (or "%G") conversion. */
- if (flags & PRINT_F_TYPE_E || flags & PRINT_F_TYPE_G) {
- if (flags & PRINT_F_TYPE_G) {
- /*
- * For "%g" (and "%G") conversions, the precision
- * specifies the number of significant digits, which
- * includes the digits in the integer part. The
- * conversion will or will not be using "e-style" (like
- * "%e" or "%E" conversions) depending on the precision
- * and on the exponent. However, the exponent can be
- * affected by rounding the converted value, so we'll
- * leave this decision for later. Until then, we'll
- * assume that we're going to do an "e-style" conversion
- * (in order to get the exponent calculated). For
- * "e-style", the precision must be decremented by one.
- */
- precision--;
- /*
- * For "%g" (and "%G") conversions, trailing zeros are
- * removed from the fractional portion of the result
- * unless the "#" flag was specified.
- */
- if (!(flags & PRINT_F_NUM))
- omitzeros = 1;
- }
- exponent = getexponent(fvalue);
- estyle = 1;
- }
-
-again:
- /*
- * Sorry, we only support 9, 19, or 38 digits (that is, the number of
- * digits of the 32-bit, the 64-bit, or the 128-bit UINTMAX_MAX value
- * minus one) past the decimal point due to our conversion method.
- */
- switch (sizeof(UINTMAX_T)) {
- case 16:
- if (precision > 38)
- precision = 38;
- break;
- case 8:
- if (precision > 19)
- precision = 19;
- break;
- default:
- if (precision > 9)
- precision = 9;
- break;
- }
-
- ufvalue = (fvalue >= 0.0) ? fvalue : -fvalue;
- if (estyle) /* We want exactly one integer digit. */
- ufvalue /= mypow10(exponent);
-
- if ((intpart = cast(ufvalue)) == UINTMAX_MAX) {
- *overflow = 1;
- return;
- }
-
- /*
- * Factor of ten with the number of digits needed for the fractional
- * part. For example, if the precision is 3, the mask will be 1000.
- */
- mask = (UINTMAX_T)mypow10(precision);
- /*
- * We "cheat" by converting the fractional part to integer by
- * multiplying by a factor of ten.
- */
- if ((fracpart = myround(mask * (ufvalue - intpart))) >= mask) {
- /*
- * For example, ufvalue = 2.99962, intpart = 2, and mask = 1000
- * (because precision = 3). Now, myround(1000 * 0.99962) will
- * return 1000. So, the integer part must be incremented by one
- * and the fractional part must be set to zero.
- */
- intpart++;
- fracpart = 0;
- if (estyle && intpart == 10) {
- /*
- * The value was rounded up to ten, but we only want one
- * integer digit if using "e-style". So, the integer
- * part must be set to one and the exponent must be
- * incremented by one.
- */
- intpart = 1;
- exponent++;
- }
- }
-
- /*
- * Now that we know the real exponent, we can check whether or not to
- * use "e-style" for "%g" (and "%G") conversions. If we don't need
- * "e-style", the precision must be adjusted and the integer and
- * fractional parts must be recalculated from the original value.
- *
- * C99 says: "Let P equal the precision if nonzero, 6 if the precision
- * is omitted, or 1 if the precision is zero. Then, if a conversion
- * with style `E' would have an exponent of X:
- *
- * - if P > X >= -4, the conversion is with style `f' (or `F') and
- * precision P - (X + 1).
- *
- * - otherwise, the conversion is with style `e' (or `E') and precision
- * P - 1." (7.19.6.1, 8)
- *
- * Note that we had decremented the precision by one.
- */
- if (flags & PRINT_F_TYPE_G && estyle &&
- precision + 1 > exponent && exponent >= -4) {
- precision -= exponent;
- estyle = 0;
- goto again;
- }
-
- if (estyle) {
- if (exponent < 0) {
- exponent = -exponent;
- esign = '-';
- } else
- esign = '+';
-
- /*
- * Convert the exponent. The sizeof(econvert) is 4. So, the
- * econvert buffer can hold e.g. "e+99" and "e-99". We don't
- * support an exponent which contains more than two digits.
- * Therefore, the following stores are safe.
- */
- epos = convert(exponent, econvert, 2, 10, 0);
- /*
- * C99 says: "The exponent always contains at least two digits,
- * and only as many more digits as necessary to represent the
- * exponent." (7.19.6.1, 8)
- */
- if (epos == 1)
- econvert[epos++] = '0';
- econvert[epos++] = esign;
- econvert[epos++] = (flags & PRINT_F_UP) ? 'E' : 'e';
- }
-
- /* Convert the integer part and the fractional part. */
- ipos = convert(intpart, iconvert, sizeof(iconvert), 10, 0);
- if (fracpart != 0) /* convert() would return 1 if fracpart == 0. */
- fpos = convert(fracpart, fconvert, sizeof(fconvert), 10, 0);
-
- leadfraczeros = precision - fpos;
-
- if (omitzeros) {
- if (fpos > 0) /* Omit trailing fractional part zeros. */
- while (omitcount < fpos && fconvert[omitcount] == '0')
- omitcount++;
- else { /* The fractional part is zero, omit it completely. */
- omitcount = precision;
- leadfraczeros = 0;
- }
- precision -= omitcount;
- }
-
- /*
- * Print a decimal point if either the fractional part is non-zero
- * and/or the "#" flag was specified.
- */
- if (precision > 0 || flags & PRINT_F_NUM)
- emitpoint = 1;
- if (separators) /* Get the number of group separators we'll print. */
- separators = getnumsep(ipos);
-
- padlen = width /* Minimum field width. */
- - ipos /* Number of integer digits. */
- - epos /* Number of exponent characters. */
- - precision /* Number of fractional digits. */
- - separators /* Number of group separators. */
- - (emitpoint ? 1 : 0) /* Will we print a decimal point? */
- - ((sign != 0) ? 1 : 0); /* Will we print a sign character? */
-
- if (padlen < 0)
- padlen = 0;
-
- /*
- * C99 says: "If the `0' and `-' flags both appear, the `0' flag is
- * ignored." (7.19.6.1, 6)
- */
- if (flags & PRINT_F_MINUS) /* Left justifty. */
- padlen = -padlen;
- else if (flags & PRINT_F_ZERO && padlen > 0) {
- if (sign != 0) { /* Sign. */
- OUTCHAR(str, *len, size, sign);
- sign = 0;
- }
- while (padlen > 0) { /* Leading zeros. */
- OUTCHAR(str, *len, size, '0');
- padlen--;
- }
- }
- while (padlen > 0) { /* Leading spaces. */
- OUTCHAR(str, *len, size, ' ');
- padlen--;
- }
- if (sign != 0) /* Sign. */
- OUTCHAR(str, *len, size, sign);
- while (ipos > 0) { /* Integer part. */
- ipos--;
- OUTCHAR(str, *len, size, iconvert[ipos]);
- if (separators > 0 && ipos > 0 && ipos % 3 == 0)
- printsep(str, len, size);
- }
- if (emitpoint) { /* Decimal point. */
-#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT
- if (lc->decimal_point != NULL && *lc->decimal_point != '\0')
- OUTCHAR(str, *len, size, *lc->decimal_point);
- else /* We'll always print some decimal point character. */
-#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */
- OUTCHAR(str, *len, size, '.');
- }
- while (leadfraczeros > 0) { /* Leading fractional part zeros. */
- OUTCHAR(str, *len, size, '0');
- leadfraczeros--;
- }
- while (fpos > omitcount) { /* The remaining fractional part. */
- fpos--;
- OUTCHAR(str, *len, size, fconvert[fpos]);
- }
- while (epos > 0) { /* Exponent. */
- epos--;
- OUTCHAR(str, *len, size, econvert[epos]);
- }
- while (padlen < 0) { /* Trailing spaces. */
- OUTCHAR(str, *len, size, ' ');
- padlen++;
- }
-}
-
-static void
-printsep(char *str, size_t *len, size_t size)
-{
-#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP
- struct lconv *lc = localeconv();
- int i;
-
- if (lc->thousands_sep != NULL)
- for (i = 0; lc->thousands_sep[i] != '\0'; i++)
- OUTCHAR(str, *len, size, lc->thousands_sep[i]);
- else
-#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */
- OUTCHAR(str, *len, size, ',');
-}
-
-static int
-getnumsep(int digits)
-{
- int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3;
-#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP
- int strln;
- struct lconv *lc = localeconv();
-
- /* We support an arbitrary separator length (including zero). */
- if (lc->thousands_sep != NULL) {
- for (strln = 0; lc->thousands_sep[strln] != '\0'; strln++)
- continue;
- separators *= strln;
- }
-#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */
- return separators;
-}
-
-static int
-getexponent(LDOUBLE value)
-{
- LDOUBLE tmp = (value >= 0.0) ? value : -value;
- int exponent = 0;
-
- /*
- * We check for 99 > exponent > -99 in order to work around possible
- * endless loops which could happen (at least) in the second loop (at
- * least) if we're called with an infinite value. However, we checked
- * for infinity before calling this function using our ISINF() macro, so
- * this might be somewhat paranoid.
- */
- while (tmp < 1.0 && tmp > 0.0 && --exponent > -99)
- tmp *= 10;
- while (tmp >= 10.0 && ++exponent < 99)
- tmp /= 10;
-
- return exponent;
-}
-
-static int
-convert(UINTMAX_T value, char *buf, size_t size, int base, int caps)
-{
- const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef";
- size_t pos = 0;
-
- /* We return an unterminated buffer with the digits in reverse order. */
- do {
- buf[pos++] = digits[value % base];
- value /= base;
- } while (value != 0 && pos < size);
-
- return (int)pos;
-}
-
-static UINTMAX_T
-cast(LDOUBLE value)
-{
- UINTMAX_T result;
-
- /*
- * We check for ">=" and not for ">" because if UINTMAX_MAX cannot be
- * represented exactly as an LDOUBLE value (but is less than LDBL_MAX),
- * it may be increased to the nearest higher representable value for the
- * comparison (cf. C99: 6.3.1.4, 2). It might then equal the LDOUBLE
- * value although converting the latter to UINTMAX_T would overflow.
- */
- if (value >= UINTMAX_MAX)
- return UINTMAX_MAX;
-
- result = (UINTMAX_T)value;
- /*
- * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to
- * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates
- * the standard). Sigh.
- */
- return (result <= value) ? result : result - 1;
-}
-
-static UINTMAX_T
-myround(LDOUBLE value)
-{
- UINTMAX_T intpart = cast(value);
-
- return ((value -= intpart) < 0.5) ? intpart : intpart + 1;
-}
-
-static LDOUBLE
-mypow10(int exponent)
-{
- LDOUBLE result = 1;
-
- while (exponent > 0) {
- result *= 10;
- exponent--;
- }
- while (exponent < 0) {
- result /= 10;
- exponent++;
- }
- return result;
-}
-#endif /* !HAVE_VSNPRINTF */
-
-#if !HAVE_VASPRINTF
-#if NEED_MYMEMCPY
-void *
-mymemcpy(void *dst, void *src, size_t len)
-{
- const char *from = src;
- char *to = dst;
-
- /* No need for optimization, we use this only to replace va_copy(3). */
- while (len-- > 0)
- *to++ = *from++;
- return dst;
-}
-#endif /* NEED_MYMEMCPY */
-
-int
-util_vasprintf(char **ret, const char *format, va_list ap)
-{
- size_t size;
- int len;
- va_list aq;
-
- VA_COPY(aq, ap);
- len = vsnprintf(NULL, 0, format, aq);
- VA_END_COPY(aq);
- if (len < 0 || (*ret = malloc(size = len + 1)) == NULL)
- return -1;
- return vsnprintf(*ret, size, format, ap);
-}
-#endif /* !HAVE_VASPRINTF */
-
-#if !HAVE_SNPRINTF
-#if HAVE_STDARG_H
-int
-util_snprintf(char *str, size_t size, const char *format, ...)
-#else
-int
-util_snprintf(va_alist) va_dcl
-#endif /* HAVE_STDARG_H */
-{
-#if !HAVE_STDARG_H
- char *str;
- size_t size;
- char *format;
-#endif /* HAVE_STDARG_H */
- va_list ap;
- int len;
-
- VA_START(ap, format);
- VA_SHIFT(ap, str, char *);
- VA_SHIFT(ap, size, size_t);
- VA_SHIFT(ap, format, const char *);
- len = vsnprintf(str, size, format, ap);
- va_end(ap);
- return len;
-}
-#endif /* !HAVE_SNPRINTF */
-
-#if !HAVE_ASPRINTF
-#if HAVE_STDARG_H
-int
-util_asprintf(char **ret, const char *format, ...)
-#else
-int
-util_asprintf(va_alist) va_dcl
-#endif /* HAVE_STDARG_H */
-{
-#if !HAVE_STDARG_H
- char **ret;
- char *format;
-#endif /* HAVE_STDARG_H */
- va_list ap;
- int len;
-
- VA_START(ap, format);
- VA_SHIFT(ap, ret, char **);
- VA_SHIFT(ap, format, const char *);
- len = vasprintf(ret, format, ap);
- va_end(ap);
- return len;
-}
-#endif /* !HAVE_ASPRINTF */
-#else /* Dummy declaration to avoid empty translation unit warnings. */
-int main(void);
-#endif /* !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || [...] */
-
-
-/* vim: set joinspaces textwidth=80: */
diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
index 7f80fc12700..5afb7d9a920 100644
--- a/src/gallium/auxiliary/util/u_split_prim.h
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -23,7 +23,7 @@ struct util_split_prim {
uint edgeflag_off:1;
};
-static INLINE void
+static inline void
util_split_prim_init(struct util_split_prim *s,
unsigned mode, unsigned start, unsigned count)
{
@@ -41,7 +41,7 @@ util_split_prim_init(struct util_split_prim *s,
s->repeat_first = 0;
}
-static INLINE boolean
+static inline boolean
util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
{
int repeat = 0;
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index d4f51912a2d..7f8e5a1a3cf 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -51,7 +51,7 @@ union m128i {
uint ui[4];
};
-static INLINE void u_print_epi8(const char *name, __m128i r)
+static inline void u_print_epi8(const char *name, __m128i r)
{
union { __m128i m; ubyte ub[16]; } u;
u.m = r;
@@ -80,7 +80,7 @@ static INLINE void u_print_epi8(const char *name, __m128i r)
u.ub[12], u.ub[13], u.ub[14], u.ub[15]);
}
-static INLINE void u_print_epi16(const char *name, __m128i r)
+static inline void u_print_epi16(const char *name, __m128i r)
{
union { __m128i m; ushort us[8]; } u;
u.m = r;
@@ -99,7 +99,7 @@ static INLINE void u_print_epi16(const char *name, __m128i r)
u.us[4], u.us[5], u.us[6], u.us[7]);
}
-static INLINE void u_print_epi32(const char *name, __m128i r)
+static inline void u_print_epi32(const char *name, __m128i r)
{
union { __m128i m; uint ui[4]; } u;
u.m = r;
@@ -113,7 +113,7 @@ static INLINE void u_print_epi32(const char *name, __m128i r)
u.ui[0], u.ui[1], u.ui[2], u.ui[3]);
}
-static INLINE void u_print_ps(const char *name, __m128 r)
+static inline void u_print_ps(const char *name, __m128 r)
{
union { __m128 m; float f[4]; } u;
u.m = r;
@@ -179,7 +179,7 @@ _mm_shuffle_epi8(__m128i a, __m128i mask)
* _mm_mullo_epi32() intrinsic as to not justify adding an sse4
* dependency at this point.
*/
-static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
+static inline __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
{
__m128i a4 = _mm_srli_epi64(a, 32); /* shift by one dword */
__m128i b4 = _mm_srli_epi64(b, 32); /* shift by one dword */
@@ -204,7 +204,7 @@ static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
}
-static INLINE void
+static inline void
transpose4_epi32(const __m128i * restrict a,
const __m128i * restrict b,
const __m128i * restrict c,
diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h
index dc89c4400bc..f7ab09c8f1c 100644
--- a/src/gallium/auxiliary/util/u_string.h
+++ b/src/gallium/auxiliary/util/u_string.h
@@ -35,13 +35,14 @@
#ifndef U_STRING_H_
#define U_STRING_H_
-#if !defined(_MSC_VER) && !defined(XF86_LIBC_H)
+#if !defined(XF86_LIBC_H)
#include
#endif
#include
#include
#include "pipe/p_compiler.h"
+#include "util/macros.h" // PRINTFLIKE
#ifdef __cplusplus
@@ -54,7 +55,7 @@ extern "C" {
#else
-static INLINE char *
+static inline char *
util_strchrnul(const char *s, char c)
{
for (; *s && *s != c; ++s);
@@ -64,18 +65,44 @@ util_strchrnul(const char *s, char c)
#endif
-#ifdef _MSC_VER
+#ifdef _WIN32
-int util_vsnprintf(char *, size_t, const char *, va_list);
-int util_snprintf(char *str, size_t size, const char *format, ...);
+static inline int
+util_vsnprintf(char *str, size_t size, const char *format, va_list ap)
+{
+ /* We need to use _vscprintf to calculate the length as vsnprintf returns -1
+ * if the number of characters to write is greater than count.
+ */
+ va_list ap_copy;
+ int ret;
+ va_copy(ap_copy, ap);
+ ret = _vsnprintf(str, size, format, ap);
+ if (ret < 0) {
+ ret = _vscprintf(format, ap_copy);
+ }
+ return ret;
+}
-static INLINE void
+static inline int
+ PRINTFLIKE(3, 4)
+util_snprintf(char *str, size_t size, const char *format, ...)
+{
+ va_list ap;
+ int ret;
+ va_start(ap, format);
+ ret = util_vsnprintf(str, size, format, ap);
+ va_end(ap);
+ return ret;
+}
+
+static inline void
util_vsprintf(char *str, const char *format, va_list ap)
{
util_vsnprintf(str, (size_t)-1, format, ap);
}
-static INLINE void
+static inline void
+ PRINTFLIKE(2, 3)
util_sprintf(char *str, const char *format, ...)
{
va_list ap;
@@ -84,7 +111,7 @@ util_sprintf(char *str, const char *format, ...)
va_end(ap);
}
-static INLINE char *
+static inline char *
util_strchr(const char *s, char c)
{
char *p = util_strchrnul(s, c);
@@ -92,7 +119,7 @@ util_strchr(const char *s, char c)
return *p ? p : NULL;
}
-static INLINE char*
+static inline char*
util_strncat(char *dst, const char *src, size_t n)
{
char *p = dst + strlen(dst);
@@ -106,7 +133,7 @@ util_strncat(char *dst, const char *src, size_t n)
return dst;
}
-static INLINE int
+static inline int
util_strcmp(const char *s1, const char *s2)
{
unsigned char u1, u2;
@@ -122,7 +149,7 @@ util_strcmp(const char *s1, const char *s2)
return 0;
}
-static INLINE int
+static inline int
util_strncmp(const char *s1, const char *s2, size_t n)
{
unsigned char u1, u2;
@@ -138,7 +165,7 @@ util_strncmp(const char *s1, const char *s2, size_t n)
return 0;
}
-static INLINE char *
+static inline char *
util_strstr(const char *haystack, const char *needle)
{
const char *p = haystack;
@@ -152,7 +179,7 @@ util_strstr(const char *haystack, const char *needle)
return NULL;
}
-static INLINE void *
+static inline void *
util_memmove(void *dest, const void *src, size_t n)
{
char *p = (char *)dest;
@@ -199,7 +226,7 @@ struct util_strbuf
};
-static INLINE void
+static inline void
util_strbuf_init(struct util_strbuf *sbuf, char *str, size_t size)
{
sbuf->str = str;
@@ -209,7 +236,7 @@ util_strbuf_init(struct util_strbuf *sbuf, char *str, size_t size)
}
-static INLINE void
+static inline void
util_strbuf_printf(struct util_strbuf *sbuf, const char *format, ...)
{
if(sbuf->left > 1) {
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
index 1605215cb88..b84694c540b 100644
--- a/src/gallium/auxiliary/util/u_surfaces.h
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -50,7 +50,7 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size,
struct pipe_surface **res);
/* fast inline path for the very common case */
-static INLINE boolean
+static inline boolean
util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size,
struct pipe_context *ctx, struct pipe_resource *pt,
unsigned level, unsigned layer,
@@ -70,7 +70,7 @@ util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size,
return util_surfaces_do_get(us, surface_struct_size, ctx, pt, level, layer, res);
}
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned level, unsigned layer)
{
if(!us->u.pv)
@@ -84,7 +84,7 @@ util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned
void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
-static INLINE void
+static inline void
util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps)
{
if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT))
diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
index a33d7f7722b..dc1f568a8e5 100644
--- a/src/gallium/auxiliary/util/u_tile.h
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -42,7 +42,7 @@ struct pipe_transfer;
*
* \return TRUE if tile is totally clipped, FALSE otherwise
*/
-static INLINE boolean
+static inline boolean
u_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_box *box)
{
if ((int) x >= box->width)
diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h
index 2bee1e00014..a5017d6bce2 100644
--- a/src/gallium/auxiliary/util/u_time.h
+++ b/src/gallium/auxiliary/util/u_time.h
@@ -60,7 +60,7 @@ struct util_time
PIPE_DEPRECATED
-static INLINE void
+static inline void
util_time_get(struct util_time *t)
{
t->counter = os_time_get();
@@ -71,7 +71,7 @@ util_time_get(struct util_time *t)
* Return t2 = t1 + usecs
*/
PIPE_DEPRECATED
-static INLINE void
+static inline void
util_time_add(const struct util_time *t1,
int64_t usecs,
struct util_time *t2)
@@ -84,7 +84,7 @@ util_time_add(const struct util_time *t1,
* Return difference between times, in microseconds
*/
PIPE_DEPRECATED
-static INLINE int64_t
+static inline int64_t
util_time_diff(const struct util_time *t1,
const struct util_time *t2)
{
@@ -98,7 +98,7 @@ util_time_diff(const struct util_time *t1,
* Not publicly available because it does not take in account wrap-arounds.
* Use util_time_timeout instead.
*/
-static INLINE int
+static inline int
_util_time_compare(const struct util_time *t1,
const struct util_time *t2)
{
@@ -115,7 +115,7 @@ _util_time_compare(const struct util_time *t1,
* Returns non-zero when the timeout expires.
*/
PIPE_DEPRECATED
-static INLINE boolean
+static inline boolean
util_time_timeout(const struct util_time *start,
const struct util_time *end,
const struct util_time *curr)
@@ -128,7 +128,7 @@ util_time_timeout(const struct util_time *start,
* Return current time in microseconds
*/
PIPE_DEPRECATED
-static INLINE int64_t
+static inline int64_t
util_time_micros(void)
{
return os_time_get();
@@ -136,7 +136,7 @@ util_time_micros(void)
PIPE_DEPRECATED
-static INLINE void
+static inline void
util_time_sleep(int64_t usecs)
{
os_time_sleep(usecs);
diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c
index 71da35d6d39..4cb524d5cb1 100644
--- a/src/gallium/auxiliary/util/u_transfer.c
+++ b/src/gallium/auxiliary/util/u_transfer.c
@@ -90,7 +90,7 @@ void u_default_transfer_unmap( struct pipe_context *pipe,
}
-static INLINE struct u_resource *
+static inline struct u_resource *
u_resource( struct pipe_resource *res )
{
return (struct u_resource *)res;
diff --git a/src/gallium/auxiliary/util/u_video.h b/src/gallium/auxiliary/util/u_video.h
index b4743d13fbf..ddc00216105 100644
--- a/src/gallium/auxiliary/util/u_video.h
+++ b/src/gallium/auxiliary/util/u_video.h
@@ -40,7 +40,7 @@ extern "C" {
#include "util/u_debug.h"
#include "util/u_math.h"
-static INLINE enum pipe_video_format
+static inline enum pipe_video_format
u_reduce_video_profile(enum pipe_video_profile profile)
{
switch (profile)
@@ -68,12 +68,19 @@ u_reduce_video_profile(enum pipe_video_profile profile)
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444:
return PIPE_VIDEO_FORMAT_MPEG4_AVC;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_STILL:
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_12:
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_444:
+ return PIPE_VIDEO_FORMAT_HEVC;
+
default:
return PIPE_VIDEO_FORMAT_UNKNOWN;
}
}
-static INLINE void
+static inline void
u_copy_nv12_to_yv12(void *const *destination_data,
uint32_t const *destination_pitches,
int src_plane, int src_field,
@@ -99,7 +106,7 @@ u_copy_nv12_to_yv12(void *const *destination_data,
}
}
-static INLINE void
+static inline void
u_copy_yv12_to_nv12(void *const *destination_data,
uint32_t const *destination_pitches,
int src_plane, int src_field,
@@ -122,7 +129,7 @@ u_copy_yv12_to_nv12(void *const *destination_data,
}
}
-static INLINE void
+static inline void
u_copy_swap422_packed(void *const *destination_data,
uint32_t const *destination_pitches,
int src_plane, int src_field,
@@ -147,7 +154,7 @@ u_copy_swap422_packed(void *const *destination_data,
}
}
-static INLINE uint32_t
+static inline uint32_t
u_get_h264_level(uint32_t width, uint32_t height, uint32_t *max_reference)
{
uint32_t max_dpb_mbs;
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 69839e61386..afe53063b48 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -538,7 +538,7 @@ cleanup_buffers(struct vl_compositor *c)
pipe_resource_reference(&c->vertex_buf.buffer, NULL);
}
-static INLINE struct u_rect
+static inline struct u_rect
default_rect(struct vl_compositor_layer *layer)
{
struct pipe_resource *res = layer->sampler_views[0]->texture;
@@ -546,21 +546,21 @@ default_rect(struct vl_compositor_layer *layer)
return rect;
}
-static INLINE struct vertex2f
+static inline struct vertex2f
calc_topleft(struct vertex2f size, struct u_rect rect)
{
struct vertex2f res = { rect.x0 / size.x, rect.y0 / size.y };
return res;
}
-static INLINE struct vertex2f
+static inline struct vertex2f
calc_bottomright(struct vertex2f size, struct u_rect rect)
{
struct vertex2f res = { rect.x1 / size.x, rect.y1 / size.y };
return res;
}
-static INLINE void
+static inline void
calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height,
struct u_rect src, struct u_rect dst)
{
@@ -658,7 +658,7 @@ gen_rect_verts(struct vertex2f *vb, struct vl_compositor_layer *layer)
vb[19].y = layer->colors[3].w;
}
-static INLINE struct u_rect
+static inline struct u_rect
calc_drawn_area(struct vl_compositor_state *s, struct vl_compositor_layer *layer)
{
struct vertex2f tl, br;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index abb3780f61e..52ce6c416aa 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -533,7 +533,7 @@ static struct dct_coeff tbl_B14_DC[1 << 17];
static struct dct_coeff tbl_B14_AC[1 << 17];
static struct dct_coeff tbl_B15[1 << 17];
-static INLINE void
+static inline void
init_dct_coeff_table(struct dct_coeff *dst, const struct dct_coeff_compressed *src,
unsigned size, bool is_DC)
{
@@ -594,7 +594,7 @@ init_dct_coeff_table(struct dct_coeff *dst, const struct dct_coeff_compressed *s
}
}
-static INLINE void
+static inline void
init_tables()
{
vl_vlc_init_table(tbl_B1, Elements(tbl_B1), macroblock_address_increment, Elements(macroblock_address_increment));
@@ -611,19 +611,19 @@ init_tables()
init_dct_coeff_table(tbl_B15, dct_coeff_tbl_one, Elements(dct_coeff_tbl_one), false);
}
-static INLINE int
+static inline int
DIV2DOWN(int todiv)
{
return (todiv&~1)/2;
}
-static INLINE int
+static inline int
DIV2UP(int todiv)
{
return (todiv+1)/2;
}
-static INLINE void
+static inline void
motion_vector(struct vl_mpg12_bs *bs, int r, int s, int dmv, short delta[2], short dmvector[2])
{
int t;
@@ -647,7 +647,7 @@ motion_vector(struct vl_mpg12_bs *bs, int r, int s, int dmv, short delta[2], sho
}
}
-static INLINE int
+static inline int
wrap(short f, int shift)
{
if (f < (-16 << shift))
@@ -658,7 +658,7 @@ wrap(short f, int shift)
return f;
}
-static INLINE void
+static inline void
motion_vector_frame(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
{
int dmv = mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
@@ -682,7 +682,7 @@ motion_vector_frame(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock
}
}
-static INLINE void
+static inline void
motion_vector_field(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
{
int dmv = mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
@@ -701,12 +701,12 @@ motion_vector_field(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock
}
}
-static INLINE void
+static inline void
reset_predictor(struct vl_mpg12_bs *bs) {
bs->pred_dc[0] = bs->pred_dc[1] = bs->pred_dc[2] = 0;
}
-static INLINE void
+static inline void
decode_dct(struct vl_mpg12_bs *bs, struct pipe_mpeg12_macroblock *mb, int scale)
{
static const unsigned blk2cc[] = { 0, 0, 0, 0, 1, 2 };
@@ -805,7 +805,7 @@ entry:
vl_vlc_eatbits(&bs->vlc, 1);
}
-static INLINE void
+static inline void
decode_slice(struct vl_mpg12_bs *bs, struct pipe_video_buffer *target)
{
struct pipe_mpeg12_macroblock mb;
@@ -929,6 +929,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_video_buffer *target)
mb.PMV[1][0][0] = mb.PMV[0][0][0];
mb.PMV[1][0][1] = mb.PMV[0][0][1];
assert(extra);
+ (void) extra;
} else if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ||
!(mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD |
PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD))) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 8579460e070..b7009837293 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -84,6 +84,9 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
struct video_buffer_private
{
+ struct list_head list;
+ struct pipe_video_buffer *video_buffer;
+
struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS];
struct pipe_surface *surfaces[VL_MAX_SURFACES];
@@ -99,6 +102,8 @@ destroy_video_buffer_private(void *private)
struct video_buffer_private *priv = private;
unsigned i;
+ list_del(&priv->list);
+
for (i = 0; i < VL_NUM_COMPONENTS; ++i)
pipe_sampler_view_reference(&priv->sampler_view_planes[i], NULL);
@@ -126,6 +131,9 @@ get_video_buffer_private(struct vl_mpeg12_decoder *dec, struct pipe_video_buffer
priv = CALLOC_STRUCT(video_buffer_private);
+ list_add(&priv->list, &dec->buffer_privates);
+ priv->video_buffer = buf;
+
sv = buf->get_sampler_view_planes(buf);
for (i = 0; i < VL_NUM_COMPONENTS; ++i)
if (sv[i])
@@ -141,6 +149,18 @@ get_video_buffer_private(struct vl_mpeg12_decoder *dec, struct pipe_video_buffer
return priv;
}
+static void
+free_video_buffer_privates(struct vl_mpeg12_decoder *dec)
+{
+ struct video_buffer_private *priv, *next;
+
+ LIST_FOR_EACH_ENTRY_SAFE(priv, next, &dec->buffer_privates, list) {
+ struct pipe_video_buffer *buf = priv->video_buffer;
+
+ vl_video_buffer_set_associated_data(buf, &dec->base, NULL, NULL);
+ }
+}
+
static bool
init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer)
{
@@ -297,7 +317,7 @@ cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
vl_mc_cleanup_buffer(&buf->mc[i]);
}
-static INLINE void
+static inline void
MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned weights[2])
{
assert(mb);
@@ -332,7 +352,7 @@ MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned we
}
}
-static INLINE struct vl_motionvector
+static inline struct vl_motionvector
MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector,
unsigned field_select_mask, unsigned weight)
{
@@ -383,7 +403,7 @@ MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector,
return mv;
}
-static INLINE void
+static inline void
UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
struct vl_mpeg12_buffer *buf,
const struct pipe_mpeg12_macroblock *mb)
@@ -464,6 +484,8 @@ vl_mpeg12_destroy(struct pipe_video_codec *decoder)
assert(decoder);
+ free_video_buffer_privates(dec);
+
/* Asserted in softpipe_delete_fs_state() for some reason */
dec->context->bind_vs_state(dec->context, NULL);
dec->context->bind_fs_state(dec->context, NULL);
@@ -1187,6 +1209,8 @@ vl_create_mpeg12_decoder(struct pipe_context *context,
if (!init_pipe_state(dec))
goto error_pipe_state;
+ list_inithead(&dec->buffer_privates);
+
return &dec->base;
error_pipe_state:
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 2a604054387..505dd675f66 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -30,6 +30,8 @@
#include "pipe/p_video_codec.h"
+#include "util/list.h"
+
#include "vl_mpeg12_bitstream.h"
#include "vl_zscan.h"
#include "vl_idct.h"
@@ -77,6 +79,8 @@ struct vl_mpeg12_decoder
unsigned current_buffer;
struct vl_mpeg12_buffer *dec_buffers[4];
+
+ struct list_head buffer_privates;
};
struct vl_mpeg12_buffer
diff --git a/src/gallium/auxiliary/vl/vl_rbsp.h b/src/gallium/auxiliary/vl/vl_rbsp.h
index 2e3da8e1d28..7867238c49e 100644
--- a/src/gallium/auxiliary/vl/vl_rbsp.h
+++ b/src/gallium/auxiliary/vl/vl_rbsp.h
@@ -48,7 +48,7 @@ struct vl_rbsp {
/**
* Initialize the RBSP object
*/
-static INLINE void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsigned num_bits)
+static inline void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsigned num_bits)
{
unsigned bits_left = vl_vlc_bits_left(nal);
@@ -71,7 +71,7 @@ static INLINE void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsign
/**
* Make at least 16 more bits available
*/
-static INLINE void vl_rbsp_fillbits(struct vl_rbsp *rbsp)
+static inline void vl_rbsp_fillbits(struct vl_rbsp *rbsp)
{
unsigned valid = vl_vlc_valid_bits(&rbsp->nal);
unsigned i, bits;
@@ -108,7 +108,7 @@ static INLINE void vl_rbsp_fillbits(struct vl_rbsp *rbsp)
/**
* Return an unsigned integer from the first n bits
*/
-static INLINE unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n)
+static inline unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n)
{
if (n == 0)
return 0;
@@ -120,7 +120,7 @@ static INLINE unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n)
/**
* Return an unsigned exponential Golomb encoded integer
*/
-static INLINE unsigned vl_rbsp_ue(struct vl_rbsp *rbsp)
+static inline unsigned vl_rbsp_ue(struct vl_rbsp *rbsp)
{
unsigned bits = 0;
@@ -134,7 +134,7 @@ static INLINE unsigned vl_rbsp_ue(struct vl_rbsp *rbsp)
/**
* Return an signed exponential Golomb encoded integer
*/
-static INLINE signed vl_rbsp_se(struct vl_rbsp *rbsp)
+static inline signed vl_rbsp_se(struct vl_rbsp *rbsp)
{
signed codeNum = vl_rbsp_ue(rbsp);
if (codeNum & 1)
@@ -146,7 +146,7 @@ static INLINE signed vl_rbsp_se(struct vl_rbsp *rbsp)
/**
* Are more data available in the RBSP ?
*/
-static INLINE bool vl_rbsp_more_data(struct vl_rbsp *rbsp)
+static inline bool vl_rbsp_more_data(struct vl_rbsp *rbsp)
{
unsigned bits, value;
diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h
index 2f905956dbf..7821b8be0a1 100644
--- a/src/gallium/auxiliary/vl/vl_vlc.h
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -65,7 +65,7 @@ struct vl_vlc_compressed
/**
* initalize and decompress a lookup table
*/
-static INLINE void
+static inline void
vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size)
{
unsigned i, bits = util_logbase2(dst_size);
@@ -87,7 +87,7 @@ vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_v
/**
* switch over to next input buffer
*/
-static INLINE void
+static inline void
vl_vlc_next_input(struct vl_vlc *vlc)
{
unsigned len = vlc->sizes[0];
@@ -112,7 +112,7 @@ vl_vlc_next_input(struct vl_vlc *vlc)
/**
* align the data pointer to the next dword
*/
-static INLINE void
+static inline void
vl_vlc_align_data_ptr(struct vl_vlc *vlc)
{
/* align the data pointer */
@@ -126,7 +126,7 @@ vl_vlc_align_data_ptr(struct vl_vlc *vlc)
/**
* fill the bit buffer, so that at least 32 bits are valid
*/
-static INLINE void
+static inline void
vl_vlc_fillbits(struct vl_vlc *vlc)
{
assert(vlc);
@@ -175,7 +175,7 @@ vl_vlc_fillbits(struct vl_vlc *vlc)
/**
* initialize vlc structure and start reading from first input buffer
*/
-static INLINE void
+static inline void
vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs,
const void *const *inputs, const unsigned *sizes)
{
@@ -203,7 +203,7 @@ vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs,
/**
* number of bits still valid in bit buffer
*/
-static INLINE unsigned
+static inline unsigned
vl_vlc_valid_bits(struct vl_vlc *vlc)
{
return 32 - vlc->invalid_bits;
@@ -212,7 +212,7 @@ vl_vlc_valid_bits(struct vl_vlc *vlc)
/**
* number of bits left over all inbut buffers
*/
-static INLINE unsigned
+static inline unsigned
vl_vlc_bits_left(struct vl_vlc *vlc)
{
signed bytes_left = vlc->end - vlc->data;
@@ -223,7 +223,7 @@ vl_vlc_bits_left(struct vl_vlc *vlc)
/**
* get num_bits from bit buffer without removing them
*/
-static INLINE unsigned
+static inline unsigned
vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
{
assert(vl_vlc_valid_bits(vlc) >= num_bits || vlc->data >= vlc->end);
@@ -233,7 +233,7 @@ vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
/**
* remove num_bits from bit buffer
*/
-static INLINE void
+static inline void
vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
{
assert(vl_vlc_valid_bits(vlc) >= num_bits);
@@ -245,7 +245,7 @@ vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
/**
* get num_bits from bit buffer with removing them
*/
-static INLINE unsigned
+static inline unsigned
vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
{
unsigned value;
@@ -261,7 +261,7 @@ vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
/**
* treat num_bits as signed value and remove them from bit buffer
*/
-static INLINE signed
+static inline signed
vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
{
signed value;
@@ -277,7 +277,7 @@ vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
/**
* lookup a value and length in a decompressed table
*/
-static INLINE int8_t
+static inline int8_t
vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits)
{
tbl += vl_vlc_peekbits(vlc, num_bits);
@@ -288,7 +288,7 @@ vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned n
/**
* fast forward search for a specific byte value
*/
-static INLINE boolean
+static inline boolean
vl_vlc_search_byte(struct vl_vlc *vlc, unsigned num_bits, uint8_t value)
{
/* make sure we are on a byte boundary */
@@ -345,7 +345,7 @@ vl_vlc_search_byte(struct vl_vlc *vlc, unsigned num_bits, uint8_t value)
/**
* remove num_bits bits starting at pos from the bitbuffer
*/
-static INLINE void
+static inline void
vl_vlc_removebits(struct vl_vlc *vlc, unsigned pos, unsigned num_bits)
{
uint64_t lo = (vlc->buffer & (~0UL >> (pos + num_bits))) << num_bits;
@@ -357,7 +357,7 @@ vl_vlc_removebits(struct vl_vlc *vlc, unsigned pos, unsigned num_bits)
/**
* limit the number of bits left for fetching
*/
-static INLINE void
+static inline void
vl_vlc_limit(struct vl_vlc *vlc, unsigned bits_left)
{
assert(bits_left <= vl_vlc_bits_left(vlc));
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 7e61b88e6b5..3b1b87f9523 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -37,6 +37,8 @@
#include
#include
+#include "loader.h"
+
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
#include "pipe/p_state.h"
@@ -293,6 +295,16 @@ vl_screen_get_private(struct vl_screen *vscreen)
return vscreen;
}
+static xcb_screen_t *
+get_xcb_screen(xcb_screen_iterator_t iter, int screen)
+{
+ for (; iter.rem; --screen, xcb_screen_next(&iter))
+ if (screen == 0)
+ return iter.data;
+
+ return NULL;
+}
+
struct vl_screen*
vl_screen_create(Display *display, int screen)
{
@@ -334,8 +346,7 @@ vl_screen_create(Display *display, int screen)
goto free_query;
s = xcb_setup_roots_iterator(xcb_get_setup(scrn->conn));
- while (screen--)
- xcb_screen_next(&s);
+
driverType = XCB_DRI2_DRIVER_TYPE_DRI;
#ifdef DRI2DriverPrimeShift
{
@@ -351,7 +362,7 @@ vl_screen_create(Display *display, int screen)
}
#endif
- connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, s.data->root, driverType);
+ connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, driverType);
connect = xcb_dri2_connect_reply(scrn->conn, connect_cookie, NULL);
if (connect == NULL || connect->driver_name_length + connect->device_name_length == 0)
goto free_connect;
@@ -361,7 +372,7 @@ vl_screen_create(Display *display, int screen)
if (!device_name)
goto free_connect;
memcpy(device_name, xcb_dri2_connect_device_name(connect), device_name_length);
- fd = open(device_name, O_RDWR);
+ fd = loader_open_device(device_name);
free(device_name);
if (fd < 0)
@@ -370,7 +381,7 @@ vl_screen_create(Display *display, int screen)
if (drmGetMagic(fd, &magic))
goto free_connect;
- authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, s.data->root, magic);
+ authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, magic);
authenticate = xcb_dri2_authenticate_reply(scrn->conn, authenticate_cookie, NULL);
if (authenticate == NULL || !authenticate->authenticated)
@@ -379,7 +390,7 @@ vl_screen_create(Display *display, int screen)
#if GALLIUM_STATIC_TARGETS
scrn->base.pscreen = dd_create_screen(fd);
#else
- if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd, false))
+ if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, PIPE_SEARCH_DIR);
#endif // GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/docs/d3d11ddi.txt b/src/gallium/docs/d3d11ddi.txt
deleted file mode 100644
index a7036481411..00000000000
--- a/src/gallium/docs/d3d11ddi.txt
+++ /dev/null
@@ -1,462 +0,0 @@
-This document compares the D3D10/D3D11 device driver interface with Gallium.
-It is written from the perspective of a developer implementing a D3D10/D3D11 driver as a Gallium state tracker.
-
-Note that naming and other cosmetic differences are not noted, since they don't really matter and would severely clutter the document.
-Gallium/OpenGL terminology is used in preference to D3D terminology.
-
-NOTE: this document tries to be complete but most likely isn't fully complete and also not fully correct: please submit patches if you spot anything incorrect
-
-Also note that this is specifically for the DirectX 10/11 Windows Vista/7 DDI interfaces.
-DirectX 9 has both user-mode (for Vista) and kernel mode (pre-Vista) interfaces, but they are significantly different from Gallium due to the presence of a lot of fixed function functionality.
-
-The user-visible DirectX 10/11 interfaces are distinct from the kernel DDI, but they match very closely.
-
-* Accessing Microsoft documentation
-
-See http://msdn.microsoft.com/en-us/library/dd445501.aspx ("D3D11DDI_DEVICEFUNCS") for D3D documentation.
-
-Also see http://download.microsoft.com/download/f/2/d/f2d5ee2c-b7ba-4cd0-9686-b6508b5479a1/direct3d10_web.pdf ("The Direct3D 10 System" by David Blythe) for an introduction to Direct3D 10 and the rationale for its design.
-
-The Windows Driver Kit contains the actual headers, as well as shader bytecode documentation.
-
-To get the headers from Linux, run the following, in a dedicated directory:
-wget http://download.microsoft.com/download/4/A/2/4A25C7D5-EFBE-4182-B6A9-AE6850409A78/GRMWDK_EN_7600_1.ISO
-sudo mount -o loop GRMWDK_EN_7600_1.ISO /mnt/tmp
-cabextract -x /mnt/tmp/wdk/headers_cab001.cab
-rename 's/^_(.*)_[0-9]*$/$1/' *
-sudo umount /mnt/tmp
-
-d3d10umddi.h contains the DDI interface analyzed in this document: note that it is much easier to read this online on MSDN.
-d3d{10,11}TokenizedProgramFormat.hpp contains the shader bytecode definitions: this is not available on MSDN.
-d3d9types.h contains DX9 shader bytecode, and DX9 types
-d3dumddi.h contains the DirectX 9 DDI interface
-
-* Glossary
-
-BC1: DXT1
-BC2: DXT3
-BC3: DXT5
-BC5: RGTC1
-BC6H: BPTC float
-BC7: BPTC
-CS = compute shader: OpenCL-like shader
-DS = domain shader: tessellation evaluation shader
-HS = hull shader: tessellation control shader
-IA = input assembler: primitive assembly
-Input layout: vertex elements
-OM = output merger: blender
-PS = pixel shader: fragment shader
-Primitive topology: primitive type
-Resource: buffer or texture
-Shader resource (view): sampler view
-SO = stream out: transform feedback
-Unordered access view: view supporting random read/write access (usually from compute shaders)
-
-* Legend
-
--: features D3D11 has and Gallium lacks
-+: features Gallium has and D3D11 lacks
-!: differences between D3D11 and Gallium
-*: possible improvements to Gallium
->: references to comparisons of special enumerations
-#: comment
-
-* Gallium functions with no direct D3D10/D3D11 equivalent
-
-clear
- + Gallium supports clearing both render targets and depth/stencil with a single call
-
-fence_signalled
-fence_finish
- + D3D10/D3D11 don't appear to support explicit fencing; queries can often substitute though, and flushing is supported
-
-set_clip_state
- + Gallium supports fixed function user clip planes, D3D10/D3D11 only support using the vertex shader for them
-
-set_polygon_stipple
- + Gallium supports polygon stipple
-
-clearRT/clearDS
- + Gallium supports subrectangle fills of surfaces, D3D10 only supports full clears of views
-
-* DirectX 10/11 DDI functions and Gallium equivalents
-
-AbandonCommandList (D3D11 only)
- - Gallium does not support deferred contexts
-
-CalcPrivateBlendStateSize
-CalcPrivateDepthStencilStateSize
-CalcPrivateDepthStencilViewSize
-CalcPrivateElementLayoutSize
-CalcPrivateGeometryShaderWithStreamOutput
-CalcPrivateOpenedResourceSize
-CalcPrivateQuerySize
-CalcPrivateRasterizerStateSize
-CalcPrivateRenderTargetViewSize
-CalcPrivateResourceSize
-CalcPrivateSamplerSize
-CalcPrivateShaderResourceViewSize
-CalcPrivateShaderSize
-CalcDeferredContextHandleSize (D3D11 only)
-CalcPrivateCommandListSize (D3D11 only)
-CalcPrivateDeferredContextSize (D3D11 only)
-CalcPrivateTessellationShaderSize (D3D11 only)
-CalcPrivateUnorderedAccessViewSize (D3D11 only)
- ! D3D11 allocates private objects itself, using the size computed here
- * Gallium could do something similar to be able to put the private data inline into state tracker objects: this would allow them to fit in the same cacheline and improve performance
-
-CheckDeferredContextHandleSizes (D3D11 only)
- - Gallium does not support deferred contexts
-
-CheckFormatSupport -> screen->is_format_supported
- ! Gallium passes usages to this function, D3D11 returns them
- - Gallium does not differentiate between blendable and non-blendable render targets
- ! Gallium includes sample count directly, D3D11 uses additional query
-
-CheckMultisampleQualityLevels
- ! is merged with is_format_supported
-
-CommandListExecute (D3D11 only)
- - Gallium does not support command lists
-
-CopyStructureCount (D3D11 only)
- - Gallium does not support unordered access views (views that can be written to arbitrarily from compute shaders)
-
-ClearDepthStencilView -> clear_depth_stencil
-ClearRenderTargetView -> clear_render_target
- # D3D11 is not totally clear about whether this applies to any view or only a "currently-bound view"
- + Gallium allows to clear both depth/stencil and render target(s) in a single operation
- + Gallium supports double-precision depth values (but not rgba values!)
- * May want to also support double-precision rgba or use "float" for "depth"
-
-ClearUnorderedAccessViewFloat (D3D11 only)
-ClearUnorderedAccessViewUint (D3D11 only)
- - Gallium does not support unordered access views (views that can be written to arbitrarily from compute shaders)
-
-CreateBlendState (extended in D3D10.1) -> create_blend_state
- # D3D10 does not support per-RT blend modes (but per-RT blending), only D3D10.1 does
- + Gallium supports logic ops
- + Gallium supports dithering
- + Gallium supports using the broadcast alpha component of the blend constant color
-
-CreateCommandList (D3D11 only)
- - Gallium does not support command lists
-
-CreateComputeShader (D3D11 only)
- - Gallium does not support compute shaders
-
-CreateDeferredContext (D3D11 only)
- - Gallium does not support deferred contexts
-
-CreateDomainShader (D3D11 only)
- - Gallium does not support domain shaders
-
-CreateHullShader (D3D11 only)
- - Gallium does not support hull shaders
-
-CreateUnorderedAccessView (D3D11 only)
- - Gallium does not support unordered access views
-
-CreateDepthStencilState -> create_depth_stencil_alpha_state
- ! D3D11 has both a global stencil enable, and front/back enables; Gallium has only front/back enables
- + Gallium has per-face writemask/valuemasks, D3D11 uses the same value for back and front
- + Gallium supports the alpha test, which D3D11 lacks
-
-CreateDepthStencilView -> create_surface
-CreateRenderTargetView -> create_surface
- ! Gallium merges depthstencil and rendertarget views into pipe_surface
- - lack of render-to-buffer support
- + Gallium supports using 3D texture zslices as a depth/stencil buffer (in theory)
-
-CreateElementLayout -> create_vertex_elements_state
- ! D3D11 allows sparse vertex elements (via InputRegister); in Gallium they must be specified sequentially
- ! D3D11 has an extra flag (InputSlotClass) that is the same as instance_divisor == 0
-
-CreateGeometryShader -> create_gs_state
-CreateGeometryShaderWithStreamOutput -> create_gs_state + create_stream_output_state
-CreatePixelShader -> create_fs_state
-CreateVertexShader -> create_vs_state
- > bytecode is different (see D3d10tokenizedprogramformat.hpp)
- ! D3D11 describes input/outputs separately from bytecode; Gallium has the tgsi_scan.c module to extract it from TGSI
- @ TODO: look into DirectX 10/11 semantics specification and bytecode
-
-CheckCounter
-CheckCounterInfo
-CreateQuery -> create_query
- ! D3D11 implements fences with "event" queries
- * others are performance counters, we may want them but they are not critical
-
-CreateRasterizerState
- + Gallium, like OpenGL, supports PIPE_POLYGON_MODE_POINT
- + Gallium, like OpenGL, supports per-face polygon fill modes
- + Gallium, like OpenGL, supports culling everything
- + Gallium, like OpenGL, supports two-side lighting; D3D11 only has the facing attribute
- + Gallium, like OpenGL, supports per-fill-mode polygon offset enables
- + Gallium, like OpenGL, supports polygon smoothing
- + Gallium, like OpenGL, supports polygon stipple
- + Gallium, like OpenGL, supports point smoothing
- + Gallium, like OpenGL, supports point sprites
- + Gallium supports specifying point quad rasterization
- + Gallium, like OpenGL, supports per-point point size
- + Gallium, like OpenGL, supports line smoothing
- + Gallium, like OpenGL, supports line stipple
- + Gallium supports line last pixel rule specification
- + Gallium, like OpenGL, supports provoking vertex convention
- + Gallium supports D3D9 rasterization rules
- + Gallium supports fixed line width
- + Gallium supports fixed point size
-
-CreateResource -> texture_create or buffer_create
- ! D3D11 passes the dimensions of all mipmap levels to the create call, while Gallium has an implicit floor(x/2) rule
- # Note that hardware often has the implicit rule, so the D3D11 interface seems to make little sense
- # Also, the D3D11 API does not allow the user to specify mipmap sizes, so this really seems a dubious decision on Microsoft's part
- - D3D11 supports specifying initial data to write in the resource
- - Gallium does not support unordered access buffers
- ! D3D11 specifies mapping flags (i.e. read/write/discard);:it's unclear what they are used for here
- - D3D11 supports odd things in the D3D10_DDI_RESOURCE_MISC_FLAG enum (D3D10_DDI_RESOURCE_MISC_DISCARD_ON_PRESENT, D3D11_DDI_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS, D3D11_DDI_RESOURCE_MISC_BUFFER_STRUCTURED)
- - Gallium does not support indirect draw call parameter buffers
- ! D3D11 supports specifying hardware modes and other stuff here for scanout resources
- ! D3D11 implements cube maps as 2D array textures
-
-CreateSampler
- - D3D11 supports a monochrome convolution filter for "text filtering"
- + Gallium supports non-normalized coordinates
- + Gallium supports CLAMP, MIRROR_CLAMP and MIRROR_CLAMP_TO_BORDER
- + Gallium supports setting min/max/mip filters and anisotropy independently
-
-CreateShaderResourceView (extended in D3D10.1) -> create_sampler_view
- + Gallium supports specifying a swizzle
- ! D3D11 implements "cube views" as views into a 2D array texture
-
-CsSetConstantBuffers (D3D11 only)
-CsSetSamplers (D3D11 only)
-CsSetShader (D3D11 only)
-CsSetShaderResources (D3D11 only)
-CsSetShaderWithIfaces (D3D11 only)
-CsSetUnorderedAccessViews (D3D11 only)
- - Gallium does not support compute shaders
-
-DestroyBlendState
-DestroyCommandList (D3D11 only)
-DestroyDepthStencilState
-DestroyDepthStencilView
-DestroyDevice
-DestroyElementLayout
-DestroyQuery
-DestroyRasterizerState
-DestroyRenderTargetView
-DestroyResource
-DestroySampler
-DestroyShader
-DestroyShaderResourceView
-DestroyUnorderedAccessView (D3D11 only)
- # these are trivial
-
-Dispatch (D3D11 only)
- - Gallium does not support compute shaders
-
-DispatchIndirect (D3D11 only)
- - Gallium does not support compute shaders
-
-Draw -> draw_vbo
- ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better
-
-DrawAuto -> draw_auto
-
-DrawIndexed -> draw_vbo
- ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better
- + D3D11 lacks explicit range, which is required for OpenGL
-
-DrawIndexedInstanced -> draw_vbo
- ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better
-
-DrawIndexedInstancedIndirect (D3D11 only)
- # this allows to use an hardware buffer to specify the parameters for multiple draw_vbo calls
- - Gallium does not support draw call parameter buffers and indirect draw
-
-DrawInstanced -> draw_vbo
- ! D3D11 sets primitive modes separately with IaSetTopology: it's not obvious which is better
-
-DrawInstancedIndirect (D3D11 only)
- # this allows to use an hardware buffer to specify the parameters for multiple draw_vbo calls
- - Gallium does not support draw call parameter buffers and indirect draws
-
-DsSetConstantBuffers (D3D11 only)
-DsSetSamplers (D3D11 only)
-DsSetShader (D3D11 only)
-DsSetShaderResources (D3D11 only)
-DsSetShaderWithIfaces (D3D11 only)
- - Gallium does not support domain shaders
-
-Flush -> flush
- ! Gallium supports fencing, D3D11 just has a dumb glFlush-like function
-
-GenMips
- - Gallium lacks a mipmap generation interface, and does this manually with the 3D engine
- * it may be useful to add a mipmap generation interface, since the hardware (especially older cards) may have a better way than using the 3D engine
-
-GsSetConstantBuffers -> for(i = StartBuffer; i < NumBuffers; ++i) set_constant_buffer(PIPE_SHADER_GEOMETRY, i, phBuffers[i])
-
-GsSetSamplers
- - Gallium does not support sampling in geometry shaders
-
-GsSetShader -> bind_gs_state
-
-GsSetShaderWithIfaces (D3D11 only)
- - Gallium does not support shader interfaces
-
-GsSetShaderResources
- - Gallium does not support sampling in geometry shaders
-
-HsSetConstantBuffers (D3D11 only)
-HsSetSamplers (D3D11 only)
-HsSetShader (D3D11 only)
-HsSetShaderResources (D3D11 only)
-HsSetShaderWithIfaces (D3D11 only)
- - Gallium does not support hull shaders
-
-IaSetIndexBuffer -> set_index_buffer
- + Gallium supports 8-bit indices
- # the D3D11 interface allows index-size-unaligned byte offsets into the index buffer; most drivers will abort with an assertion
-
-IaSetInputLayout -> bind_vertex_elements_state
-
-IaSetTopology
- ! Gallium passes the topology = primitive type to the draw calls
- * may want to add an interface for this
- - Gallium lacks support for DirectX 11 tessellated primitives
- + Gallium supports line loops, triangle fans, quads, quad strips and polygons
-
-IaSetVertexBuffers -> set_vertex_buffers
- - Gallium only allows setting all vertex buffers at once, while D3D11 supports setting a subset
-
-OpenResource -> texture_from_handle
-
-PsSetConstantBuffers -> for(i = StartBuffer; i < NumBuffers; ++i) set_constant_buffer(PIPE_SHADER_FRAGMENT, i, phBuffers[i])
- * may want to split into fragment/vertex-specific versions
-
-PsSetSamplers -> bind_fragment_sampler_states
- * may want to allow binding subsets instead of all at once
-
-PsSetShader -> bind_fs_state
-
-PsSetShaderWithIfaces (D3D11 only)
- - Gallium does not support shader interfaces
-
-PsSetShaderResources -> set_sampler_views
- * may want to allow binding subsets instead of all at once
-
-QueryBegin -> begin_query
-
-QueryEnd -> end_query
-
-QueryGetData -> get_query_result
- - D3D11 supports reading an arbitrary data chunk for query results, Gallium only supports reading a 64-bit integer
- + D3D11 doesn't seem to support actually waiting for the query result (?!)
- - D3D11 supports optionally not flushing command buffers here and instead returning DXGI_DDI_ERR_WASSTILLDRAWING
-
-RecycleCommandList (D3D11 only)
-RecycleCreateCommandList (D3D11 only)
-RecycleDestroyCommandList (D3D11 only)
- - Gallium does not support command lists
-
-RecycleCreateDeferredContext (D3D11 only)
- - Gallium does not support deferred contexts
-
-RelocateDeviceFuncs
- - Gallium does not support moving pipe_context, while D3D11 seems to, using this
-
-ResetPrimitiveID (D3D10.1+ only, #ifdef D3D10PSGP)
- # used to do vertex processing on the GPU on Intel G45 chipsets when it is faster this way (see www.intel.com/Assets/PDF/whitepaper/322931.pdf)
- # presumably this resets the primitive id system value
- - Gallium does not support vertex pipeline bypass anymore
-
-ResourceCopy
-ResourceCopyRegion
-ResourceConvert (D3D10.1+ only)
-ResourceConvertRegion (D3D10.1+ only)
- -> resource_copy_region
-
-ResourceIsStagingBusy ->
- - Gallium lacks this
- + Gallium can use fences
-
-ResourceReadAfterWriteHazard
- - Gallium lacks this
-
-ResourceResolveSubresource -> blit
-
-ResourceMap
-ResourceUnmap
-DynamicConstantBufferMapDiscard
-DynamicConstantBufferUnmap
-DynamicIABufferMapDiscard
-DynamicIABufferMapNoOverwrite
-DynamicIABufferUnmap
-DynamicResourceMapDiscard
-DynamicResourceUnmap
-StagingResourceMap
-StagingResourceUnmap
- -> transfer functions
- ! Gallium and D3D have different semantics for transfers
- * D3D separates vertex/index buffers from constant buffers
- ! D3D separates some buffer flags into specialized calls
-
-ResourceUpdateSubresourceUP -> transfer functionality, transfer_inline_write in gallium-resources
-DefaultConstantBufferUpdateSubresourceUP -> transfer functionality, transfer_inline_write in gallium-resources
-
-SetBlendState -> bind_blend_state, set_blend_color and set_sample_mask
- ! D3D11 fuses bind_blend_state, set_blend_color and set_sample_mask in a single function
-
-SetDepthStencilState -> bind_depth_stencil_alpha_state and set_stencil_ref
- ! D3D11 fuses bind_depth_stencil_alpha_state and set_stencil_ref in a single function
-
-SetPredication -> render_condition
- # here both D3D11 and Gallium seem very limited (hardware is too, probably though)
- # ideally, we should support nested conditional rendering, as well as more complex tests (checking for an arbitrary range, after an AND with arbitrary mask )
- # of couse, hardware support is probably as limited as OpenGL/D3D11
- + Gallium, like NV_conditional_render, supports by-region and wait flags
- - D3D11 supports predication conditional on being equal any value (along with occlusion predicates); Gallium only supports on non-zero
-
-SetRasterizerState -> bind_rasterizer_state
-
-SetRenderTargets (extended in D3D11) -> set_framebuffer_state
- ! Gallium passed a width/height here, D3D11 does not
- ! Gallium lacks ClearTargets (but this is redundant and the driver can trivially compute this if desired)
- - Gallium does not support unordered access views
- - Gallium does not support geometry shader selection of texture array image / 3D texture zslice
-
-SetResourceMinLOD (D3D11 only) -> pipe_sampler_view::tex::first_level
-
-SetScissorRects
- - Gallium lacks support for multiple geometry-shader-selectable scissor rectangles D3D11 has
-
-SetTextFilterSize
- - Gallium lacks support for text filters
-
-SetVertexPipelineOutput (D3D10.1+ only)
- # used to do vertex processing on the GPU on Intel G45 chipsets when it is faster this way (see www.intel.com/Assets/PDF/whitepaper/322931.pdf)
- - Gallium does not support vertex pipeline bypass anymore
-
-SetViewports
- - Gallium lacks support for multiple geometry-shader-selectable viewports D3D11 has
-
-ShaderResourceViewReadAfterWriteHazard
- - Gallium lacks support for this
- + Gallium has texture_barrier
-
-SoSetTargets -> set_stream_output_buffers
-
-VsSetConstantBuffers -> for(i = StartBuffer; i < NumBuffers; ++i) set_constant_buffer(PIPE_SHADER_VERTEX, i, phBuffers[i])
- * may want to split into fragment/vertex-specific versions
-
-VsSetSamplers -> bind_vertex_sampler_states
- * may want to allow binding subsets instead of all at once
-
-VsSetShader -> bind_vs_state
-
-VsSetShaderWithIfaces (D3D11 only)
- - Gallium does not support shader interfaces
-
-VsSetShaderResources -> set_sampler_views
- * may want to allow binding subsets instead of all at once
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index 0908ee7e058..a7d08d2c7f9 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -131,14 +131,14 @@ from a shader without an associated sampler. This means that they
have no support for floating point coordinates, address wrap modes or
filtering.
-Shader resources are specified for all the shader stages at once using
-the ``set_shader_resources`` method. When binding texture resources,
-the ``level``, ``first_layer`` and ``last_layer`` pipe_surface fields
-specify the mipmap level and the range of layers the texture will be
-constrained to. In the case of buffers, ``first_element`` and
-``last_element`` specify the range within the buffer that will be used
-by the shader resource. Writes to a shader resource are only allowed
-when the ``writable`` flag is set.
+There are 2 types of shader resources: buffers and images.
+
+Buffers are specified using the ``set_shader_buffers`` method.
+
+Images are specified using the ``set_shader_images`` method. When binding
+images, the ``level``, ``first_layer`` and ``last_layer`` pipe_image_view
+fields specify the mipmap level and the range of layers the image will be
+constrained to.
Surfaces
^^^^^^^^
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 8f64817fe5f..2c0da016d08 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -254,6 +254,19 @@ The integer capabilities:
and size must be page-aligned.
* ``PIPE_CAP_DEVICE_RESET_STATUS_QUERY``:
Whether pipe_context::get_device_reset_status is implemented.
+* ``PIPE_CAP_MAX_SHADER_PATCH_VARYINGS``:
+ How many per-patch outputs and inputs are supported between tessellation
+ control and tessellation evaluation shaders, not counting in TESSINNER and
+ TESSOUTER. The minimum allowed value for OpenGL is 30.
+* ``PIPE_CAP_TEXTURE_FLOAT_LINEAR``: Whether the linear minification and
+ magnification filters are supported with single-precision floating-point
+ textures.
+* ``PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR``: Whether the linear minification and
+ magnification filters are supported with half-precision floating-point
+ textures.
+* ``PIPE_CAP_DEPTH_BOUNDS_TEST``: Whether bounds_test, bounds_min, and
+ bounds_max states of pipe_depth_stencil_alpha_state behave according
+ to the GL_EXT_depth_bounds_test specification.
.. _pipe_capf:
@@ -384,6 +397,8 @@ pipe_screen::get_compute_param.
Value type: ``uint32_t``
* ``PIPE_COMPUTE_CAP_IMAGES_SUPPORTED``: Whether images are supported
non-zero means yes, zero means no. Value type: ``uint32_t``
+* ``PIPE_COMPUTE_CAP_SUBGROUP_SIZE``: The size of a basic execution unit in
+ threads. Also known as wavefront size, warp size or SIMD width.
.. _pipe_bind:
@@ -424,8 +439,10 @@ resources might be created and handled quite differently.
process.
* ``PIPE_BIND_GLOBAL``: A buffer that can be mapped into the global
address space of a compute program.
-* ``PIPE_BIND_SHADER_RESOURCE``: A buffer or texture that can be
- bound to the graphics pipeline as a shader resource.
+* ``PIPE_BIND_SHADER_BUFFER``: A buffer without a format that can be bound
+ to a shader and can be used with load, store, and atomic instructions.
+* ``PIPE_BIND_SHADER_IMAGE``: A buffer or texture with a format that can be
+ bound to a shader and can be used with load, store, and atomic instructions.
* ``PIPE_BIND_COMPUTE_RESOURCE``: A buffer or texture that can be
bound to the compute program as a shader resource.
* ``PIPE_BIND_COMMAND_ARGS_BUFFER``: A buffer that may be sourced by the
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 89ca172080e..314c9ca8fa2 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2591,7 +2591,7 @@ Array Declaration
^^^^^^^^^^^^^^^^^^^^^^^^
Declarations can optional have an ArrayID attribute which can be referred by
-indirect addressing operands. An ArrayID of zero is reserved and treaded as
+indirect addressing operands. An ArrayID of zero is reserved and treated as
if no ArrayID is specified.
If an indirect addressing operand refers to a specific declaration by using
@@ -2603,6 +2603,7 @@ not relative to the specified declaration
If no ArrayID is specified with an indirect addressing operand the whole
register file might be accessed by this operand. This is strongly discouraged
and will prevent packing of scalar/vec2 arrays and effective alias analysis.
+This is only legal for TEMP and CONST register files.
Declaration Semantic
^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/src/gallium/drivers/freedreno/Android.mk b/src/gallium/drivers/freedreno/Android.mk
index a6712b2c115..ed51835e1fb 100644
--- a/src/gallium/drivers/freedreno/Android.mk
+++ b/src/gallium/drivers/freedreno/Android.mk
@@ -28,7 +28,9 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
$(C_SOURCES) \
$(a2xx_SOURCES) \
- $(a3xx_SOURCES)
+ $(a3xx_SOURCES) \
+ $(a4xx_SOURCES) \
+ $(ir3_SOURCES)
LOCAL_CFLAGS := \
-Wno-packed-bitfield-compat
@@ -37,6 +39,7 @@ LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/ir3
LOCAL_SHARED_LIBRARIES := libdrm libdrm_freedreno
+LOCAL_STATIC_LIBRARIES := libmesa_glsl
LOCAL_MODULE := libmesa_pipe_freedreno
include $(GALLIUM_COMMON_MK)
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am
index cbf62c6daae..dff95ba5270 100644
--- a/src/gallium/drivers/freedreno/Makefile.am
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index f4f6b94c1ea..c4516baf2ec 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -8,15 +8,15 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
-Copyright (C) 2013-2014 by the following authors:
+Copyright (C) 2013-2015 by the following authors:
- Rob Clark (robclark)
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_blend.h b/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
index 7cafcd3747e..3c8d8f7c09f 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_blend.h
@@ -39,7 +39,7 @@ struct fd2_blend_stateobj {
uint32_t rb_colormask;
};
-static INLINE struct fd2_blend_stateobj *
+static inline struct fd2_blend_stateobj *
fd2_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd2_blend_stateobj *)blend;
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_context.c b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
index a0bf01ffd1f..6089ebc1516 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
@@ -67,7 +67,7 @@ create_solid_vertexbuf(struct pipe_context *pctx)
}
static const uint8_t a22x_primtypes[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A2XX,
+ [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_PSIZE,
[PIPE_PRIM_LINES] = DI_PT_LINELIST,
[PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
@@ -77,7 +77,7 @@ static const uint8_t a22x_primtypes[PIPE_PRIM_MAX] = {
};
static const uint8_t a20x_primtypes[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A2XX,
+ [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_PSIZE,
[PIPE_PRIM_LINES] = DI_PT_LINELIST,
[PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
[PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST,
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_context.h b/src/gallium/drivers/freedreno/a2xx/fd2_context.h
index de845f07a85..74147107930 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.h
@@ -40,7 +40,7 @@ struct fd2_context {
struct pipe_resource *solid_vertexbuf;
};
-static INLINE struct fd2_context *
+static inline struct fd2_context *
fd2_context(struct fd_context *ctx)
{
return (struct fd2_context *)ctx;
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h b/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h
index adc0653132b..9e53cd3be75 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_rasterizer.h
@@ -43,7 +43,7 @@ struct fd2_rasterizer_stateobj {
uint32_t pa_su_sc_mode_cntl;
};
-static INLINE struct fd2_rasterizer_stateobj *
+static inline struct fd2_rasterizer_stateobj *
fd2_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd2_rasterizer_stateobj *)rast;
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_texture.h b/src/gallium/drivers/freedreno/a2xx/fd2_texture.h
index 4fffa08b3c3..5c9236851bd 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_texture.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_texture.h
@@ -42,7 +42,7 @@ struct fd2_sampler_stateobj {
uint32_t tex0, tex3, tex4, tex5;
};
-static INLINE struct fd2_sampler_stateobj *
+static inline struct fd2_sampler_stateobj *
fd2_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd2_sampler_stateobj *)samp;
@@ -54,7 +54,7 @@ struct fd2_pipe_sampler_view {
uint32_t tex0, tex2, tex3;
};
-static INLINE struct fd2_pipe_sampler_view *
+static inline struct fd2_pipe_sampler_view *
fd2_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd2_pipe_sampler_view *)pview;
diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h b/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h
index dda1e552174..15609ad0267 100644
--- a/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_zsa.h
@@ -44,7 +44,7 @@ struct fd2_zsa_stateobj {
uint32_t rb_stencilrefmask_bf;
};
-static INLINE struct fd2_zsa_stateobj *
+static inline struct fd2_zsa_stateobj *
fd2_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd2_zsa_stateobj *)zsa;
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index a3bc74eda85..8e8cf6a03f2 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -8,13 +8,13 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark (robclark)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
index 4f6eeb74481..142df7c300f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
@@ -32,6 +32,8 @@
#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "freedreno_util.h"
+
struct fd3_blend_stateobj {
struct pipe_blend_state base;
struct {
@@ -42,10 +44,10 @@ struct fd3_blend_stateobj {
/* Blend control bits for alpha channel */
uint32_t blend_control_alpha;
uint32_t control;
- } rb_mrt[4];
+ } rb_mrt[A3XX_MAX_RENDER_TARGETS];
};
-static INLINE struct fd3_blend_stateobj *
+static inline struct fd3_blend_stateobj *
fd3_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd3_blend_stateobj *)blend;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 7e5a99ea571..dc33783e398 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -88,7 +88,7 @@ create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
}
static const uint8_t primtypes[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A3XX,
+ [PIPE_PRIM_POINTS] = DI_PT_POINTLIST,
[PIPE_PRIM_LINES] = DI_PT_LINELIST,
[PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
@@ -121,6 +121,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
fd3_gmem_init(pctx);
fd3_texture_init(pctx);
fd3_prog_init(pctx);
+ fd3_emit_init(pctx);
pctx = fd_context_init(&fd3_ctx->base, pscreen, primtypes, priv);
if (!pctx)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
index 77e4605e550..6e20b2ff9bc 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -112,7 +112,7 @@ struct fd3_context {
struct ir3_shader_key last_key;
};
-static INLINE struct fd3_context *
+static inline struct fd3_context *
fd3_context(struct fd_context *ctx)
{
return (struct fd3_context *)ctx;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index b5838b58eb2..a9498835011 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -60,6 +60,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct pipe_draw_info *info = emit->info;
enum pc_di_primtype primtype = ctx->primtypes[info->mode];
+ if (!(fd3_emit_get_vp(emit) && fd3_emit_get_fp(emit)))
+ return;
+
fd3_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
@@ -79,8 +82,8 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
info->restart_index : 0xffffffff);
if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
- info->mode == PIPE_PRIM_POINTS)
- primtype = DI_PT_POINTLIST_A2XX;
+ (info->mode == PIPE_PRIM_POINTS))
+ primtype = DI_PT_POINTLIST_PSIZE;
fd_draw_emit(ctx, ring,
primtype,
@@ -240,10 +243,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
.vtx = &fd3_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = {
- .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
- fd3_half_precision(pfb->cbufs[1]) &&
- fd3_half_precision(pfb->cbufs[2]) &&
- fd3_half_precision(pfb->cbufs[3])),
+ .half_precision = fd_half_precision(pfb),
},
};
@@ -321,7 +321,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
}
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
@@ -342,7 +342,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
fd3_emit_vertex_bufs(ring, &emit);
- fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+ fd3_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 07cc2266d08..752e7f88cb9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -43,19 +43,26 @@
#include "fd3_format.h"
#include "fd3_zsa.h"
+static const enum adreno_state_block sb[] = {
+ [SHADER_VERTEX] = SB_VERT_SHADER,
+ [SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
*/
void
-fd3_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc)
{
uint32_t i, sz;
enum adreno_state_src src;
+ debug_assert((regid % 4) == 0);
+ debug_assert((sizedwords % 4) == 0);
+
if (prsc) {
sz = 0;
src = SS_INDIRECT;
@@ -67,7 +74,7 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
CP_LOAD_STATE_0_STATE_SRC(src) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
if (prsc) {
struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd3_emit_constant(struct fd_ringbuffer *ring,
}
static void
-emit_constants(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
- struct fd_constbuf_stateobj *constbuf,
- struct ir3_shader_variant *shader,
- bool emit_immediates)
+fd3_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+ uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
{
- uint32_t enabled_mask = constbuf->enabled_mask;
- uint32_t max_const;
- int i;
+ uint32_t i;
- // XXX TODO only emit dirty consts.. but we need to keep track if
- // they are clobbered by a clear, gmem2mem, or mem2gmem..
- constbuf->dirty_mask = enabled_mask;
+ debug_assert((regid % 4) == 0);
+ debug_assert((num % 4) == 0);
- /* in particular, with binning shader we may end up with unused
- * consts, ie. we could end up w/ constlen that is smaller
- * than first_immediate. In that case truncate the user consts
- * early to avoid HLSQ lockup caused by writing too many consts
- */
- max_const = MIN2(shader->first_driver_param, shader->constlen);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+ CP_LOAD_STATE_0_NUM_UNIT(num/2));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
- /* emit user constants: */
- if (enabled_mask & 1) {
- const unsigned index = 0;
- struct pipe_constant_buffer *cb = &constbuf->cb[index];
- unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
- // I expect that size should be a multiple of vec4's:
- assert(size == align(size, 4));
-
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, 4 * max_const);
-
- if (size && constbuf->dirty_mask & (1 << index)) {
- fd3_emit_constant(ring, sb, 0,
- cb->buffer_offset, size,
- cb->user_buffer, cb->buffer);
- constbuf->dirty_mask &= ~(1 << index);
- }
-
- enabled_mask &= ~(1 << index);
- }
-
- if (shader->constlen > shader->first_driver_param) {
- uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
- /* emit ubos: */
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param * 2) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE_0_NUM_UNIT(params * 2));
- OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
- for (i = 1; i <= params * 4; i++) {
- struct pipe_constant_buffer *cb = &constbuf->cb[i];
- assert(!cb->user_buffer);
- if ((enabled_mask & (1 << i)) && cb->buffer)
- OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
- else
- OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
- }
- }
-
- /* emit shader immediates: */
- if (shader && emit_immediates) {
- int size = shader->immediates_count;
- uint32_t base = shader->first_immediate;
-
- /* truncate size to avoid writing constants that shader
- * does not use:
- */
- size = MIN2(size + base, shader->constlen) - base;
-
- /* convert out of vec4: */
- base *= 4;
- size *= 4;
-
- if (size > 0) {
- fd3_emit_constant(ring, sb, base,
- 0, size, shader->immediates[0].val, NULL);
+ for (i = 0; i < num; i++) {
+ if (bos[i]) {
+ if (write) {
+ OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+ } else {
+ OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+ }
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
}
}
}
@@ -302,14 +251,15 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
for (i = 0; i < tex->num_textures; i++) {
static const struct fd3_pipe_sampler_view dummy_view = {
+ .base.target = PIPE_TEXTURE_1D, /* anything !PIPE_BUFFER */
.base.u.tex.first_level = 1,
};
const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
fd3_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
struct fd_resource *rsc = fd_resource(view->base.texture);
- unsigned start = view->base.u.tex.first_level;
- unsigned end = view->base.u.tex.last_level;
+ unsigned start = fd_sampler_first_level(&view->base);
+ unsigned end = fd_sampler_last_level(&view->base);;
for (j = 0; j < (end - start + 1); j++) {
struct fd_resource_slice *slice =
@@ -392,6 +342,7 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
format = fd3_gmem_restore_format(rsc->base.b.format);
}
+ /* note: PIPE_BUFFER disallowed for surfaces */
unsigned lvl = psurf[i]->u.tex.level;
struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
@@ -444,7 +395,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
uint32_t total_in = 0;
const struct fd_vertex_state *vtx = emit->vtx;
struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
- unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+ unsigned vertex_regid = regid(63, 0);
+ unsigned instance_regid = regid(63, 0);
+ unsigned vtxcnt_regid = regid(63, 0);
for (i = 0; i < vp->inputs_count; i++) {
uint8_t semantic = sem2name(vp->inputs[i].semantic);
@@ -452,14 +405,17 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
vertex_regid = vp->inputs[i].regid;
else if (semantic == TGSI_SEMANTIC_INSTANCEID)
instance_regid = vp->inputs[i].regid;
+ else if (semantic == IR3_SEMANTIC_VTXCNT)
+ vtxcnt_regid = vp->inputs[i].regid;
else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
last = i;
}
/* hw doesn't like to be configured for zero vbo's, it seems: */
- if (vtx->vtx->num_elements == 0 &&
- vertex_regid == regid(63, 0) &&
- instance_regid == regid(63, 0))
+ if ((vtx->vtx->num_elements == 0) &&
+ (vertex_regid == regid(63, 0)) &&
+ (instance_regid == regid(63, 0)) &&
+ (vtxcnt_regid == regid(63, 0)))
return;
for (i = 0, j = 0; i <= last; i++) {
@@ -472,8 +428,9 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
enum pipe_format pfmt = elem->src_format;
enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
bool switchnext = (i != last) ||
- vertex_regid != regid(63, 0) ||
- instance_regid != regid(63, 0);
+ (vertex_regid != regid(63, 0)) ||
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
bool isint = util_format_is_pure_integer(pfmt);
uint32_t fs = util_format_get_blocksize(pfmt);
@@ -512,6 +469,10 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
A3XX_VFD_CONTROL_1_REGID4INST(instance_regid));
+
+ OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
+ OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
+ A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(vtxcnt_regid));
}
void
@@ -669,33 +630,12 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, HLSQ_FLUSH);
- if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
- /* evil hack to deal sanely with clear path: */
- (emit->prog == &ctx->prog)) {
- fd_wfi(ctx, ring);
- emit_constants(ring, SB_VERT_SHADER,
- &ctx->constbuf[PIPE_SHADER_VERTEX],
- vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
- if (!emit->key.binning_pass) {
- emit_constants(ring, SB_FRAG_SHADER,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
- }
- }
-
- /* emit driver params every time */
- if (emit->info && emit->prog == &ctx->prog) {
- uint32_t vertex_params[4] = {
- emit->info->indexed ? emit->info->index_bias : emit->info->start,
- 0,
- 0,
- 0
- };
- if (vp->constlen >= vp->first_driver_param + 4) {
- fd3_emit_constant(ring, SB_VERT_SHADER,
- (vp->first_driver_param + 4) * 4,
- 0, 4, vertex_params, NULL);
- }
+ if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+ ir3_emit_consts(vp, ring, emit->info, dirty);
+ if (!emit->key.binning_pass)
+ ir3_emit_consts(fp, ring, emit->info, dirty);
+ /* mark clean after emitting consts: */
+ ctx->prog.dirty = 0;
}
if ((dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) && ctx->blend) {
@@ -930,3 +870,11 @@ fd3_emit_restore(struct fd_context *ctx)
ctx->needs_rb_fbd = true;
}
+
+void
+fd3_emit_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->emit_const = fd3_emit_const;
+ ctx->emit_const_bo = fd3_emit_const_bo;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
index 8f21919c9a7..795654706a7 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -37,10 +37,8 @@
#include "ir3_shader.h"
struct fd_ringbuffer;
-enum adreno_state_block;
-void fd3_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+void fd3_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc);
@@ -90,4 +88,6 @@ void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
void fd3_emit_restore(struct fd_context *ctx);
+void fd3_emit_init(struct pipe_context *pctx);
+
#endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.h b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
index 6afc3015901..05c5ea3d247 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.h
@@ -41,27 +41,4 @@ enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
-static INLINE bool
-fd3_half_precision(const struct pipe_surface *surface)
-{
- enum pipe_format format;
- if (!surface)
- return true;
-
- format = surface->format;
-
- /* colors are provided in consts, which go through cov.f32f16, which will
- * break these values
- */
- if (util_format_is_pure_integer(format))
- return false;
-
- /* avoid losing precision on 32-bit float formats */
- if (util_format_is_float(format) &&
- util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
- return false;
-
- return true;
-}
-
#endif /* FD3_FORMAT_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index 7d3975761dd..9a5b45e2fcb 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -57,7 +57,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
tile_mode = LINEAR;
}
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < A3XX_MAX_RENDER_TARGETS; i++) {
enum pipe_format pformat = 0;
enum a3xx_color_fmt format = 0;
enum a3xx_color_swap swap = WZYX;
@@ -537,10 +537,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
/* NOTE: They all use the same VP, this is for vtx bufs. */
.prog = &ctx->blit_prog[0],
.key = {
- .half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
- fd3_half_precision(pfb->cbufs[1]) &&
- fd3_half_precision(pfb->cbufs[2]) &&
- fd3_half_precision(pfb->cbufs[3]))
+ .half_precision = fd_half_precision(pfb),
},
};
float x0, y0, x1, y1;
@@ -654,6 +651,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
+ emit.fp = NULL; /* frag shader changed so clear cache */
fd3_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
}
@@ -674,6 +672,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
emit.prog = &ctx->blit_zs;
emit.key.half_precision = false;
}
+ emit.fp = NULL; /* frag shader changed so clear cache */
fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 57fcaa9020e..b5360797745 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -51,7 +51,7 @@ create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state
enum shader_t type)
{
struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
- so->shader = ir3_shader_create(pctx, cso->tokens, type);
+ so->shader = ir3_shader_create(pctx, cso, type);
return so;
}
@@ -136,6 +136,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
int constmode;
int i, j, k;
+ debug_assert(nr <= ARRAY_SIZE(color_regid));
+
vp = fd3_emit_get_vp(emit);
if (emit->key.binning_pass) {
@@ -202,12 +204,12 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
} else {
- for (int i = 0; i < fp->outputs_count; i++) {
+ for (i = 0; i < fp->outputs_count; i++) {
ir3_semantic sem = fp->outputs[i].semantic;
unsigned idx = sem2idx(sem);
if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
continue;
- assert(idx < 4);
+ debug_assert(idx < ARRAY_SIZE(color_regid));
color_regid[idx] = fp->outputs[i].regid;
}
}
@@ -449,10 +451,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
}
- OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
- OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
- A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
-
if (vpbuffer == BUFFER)
emit_shader(ring, vp);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_query.c b/src/gallium/drivers/freedreno/a3xx/fd3_query.c
index 7abab543427..8fc0a0d4229 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_query.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_query.c
@@ -64,7 +64,7 @@ occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
OUT_PKT3(ring, CP_DRAW_INDX, 3);
OUT_RING(ring, 0x00000000);
- OUT_RING(ring, DRAW(DI_PT_POINTLIST_A2XX, DI_SRC_SEL_AUTO_INDEX,
+ OUT_RING(ring, DRAW(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
INDEX_SIZE_IGN, USE_VISIBILITY, 0));
OUT_RING(ring, 0); /* NumIndices */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
index 7e9c1f51f59..765d9719524 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
@@ -44,7 +44,7 @@ struct fd3_rasterizer_stateobj {
uint32_t pc_prim_vtx_cntl;
};
-static INLINE struct fd3_rasterizer_stateobj *
+static inline struct fd3_rasterizer_stateobj *
fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd3_rasterizer_stateobj *)rast;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
index 094dcf376e5..722fe360202 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
@@ -105,7 +105,7 @@ void
fd3_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
- screen->max_rts = 4;
+ screen->max_rts = A3XX_MAX_RENDER_TARGETS;
screen->compiler = ir3_compiler_create(screen->gpu_id);
pscreen->context_create = fd3_context_create;
pscreen->is_format_supported = fd3_screen_is_format_supported;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index a278bf5c603..c30658d0e7b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -210,8 +210,8 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = cso->u.tex.first_level;
- unsigned miplevels = cso->u.tex.last_level - lvl;
+ unsigned lvl = fd_sampler_first_level(cso);
+ unsigned miplevels = fd_sampler_last_level(cso) - lvl;
uint32_t sz2 = 0;
if (!so)
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.h b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
index c38fd847f27..d5afb03cd7a 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
@@ -43,7 +43,7 @@ struct fd3_sampler_stateobj {
bool saturate_s, saturate_t, saturate_r;
};
-static INLINE struct fd3_sampler_stateobj *
+static inline struct fd3_sampler_stateobj *
fd3_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd3_sampler_stateobj *)samp;
@@ -54,7 +54,7 @@ struct fd3_pipe_sampler_view {
uint32_t texconst0, texconst1, texconst2, texconst3;
};
-static INLINE struct fd3_pipe_sampler_view *
+static inline struct fd3_pipe_sampler_view *
fd3_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd3_pipe_sampler_view *)pview;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
index 352c3dd5432..d4dc5954da5 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
@@ -45,7 +45,7 @@ struct fd3_zsa_stateobj {
uint32_t rb_stencilrefmask_bf;
};
-static INLINE struct fd3_zsa_stateobj *
+static inline struct fd3_zsa_stateobj *
fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd3_zsa_stateobj *)zsa;
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 0e7d3cf6db1..563f70ac5eb 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -8,13 +8,13 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark (robclark)
@@ -227,6 +227,7 @@ enum a4xx_depth_format {
DEPTH4_NONE = 0,
DEPTH4_16 = 1,
DEPTH4_24_8 = 2,
+ DEPTH4_32 = 3,
};
enum a4xx_tess_spacing {
@@ -570,6 +571,15 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
return ((val) << A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT) & A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK;
}
+#define REG_A4XX_RB_SAMPLE_COUNT_CONTROL 0x000020fa
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK 0xfffffffc
+#define A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT 2
+static inline uint32_t A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR(uint32_t val)
+{
+ return ((val >> 2) << A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__SHIFT) & A4XX_RB_SAMPLE_COUNT_CONTROL_ADDR__MASK;
+}
+
#define REG_A4XX_RB_RENDER_COMPONENTS 0x000020fb
#define A4XX_RB_RENDER_COMPONENTS_RT0__MASK 0x0000000f
#define A4XX_RB_RENDER_COMPONENTS_RT0__SHIFT 0
@@ -811,6 +821,23 @@ static inline uint32_t A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op v
#define REG_A4XX_RB_STENCIL_CONTROL2 0x00002107
#define A4XX_RB_STENCIL_CONTROL2_STENCIL_BUFFER 0x00000001
+#define REG_A4XX_RB_STENCIL_INFO 0x00002108
+#define A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL 0x00000001
+#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK 0xfffff000
+#define A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT 12
+static inline uint32_t A4XX_RB_STENCIL_INFO_STENCIL_BASE(uint32_t val)
+{
+ return ((val >> 12) << A4XX_RB_STENCIL_INFO_STENCIL_BASE__SHIFT) & A4XX_RB_STENCIL_INFO_STENCIL_BASE__MASK;
+}
+
+#define REG_A4XX_RB_STENCIL_PITCH 0x00002109
+#define A4XX_RB_STENCIL_PITCH__MASK 0xffffffff
+#define A4XX_RB_STENCIL_PITCH__SHIFT 0
+static inline uint32_t A4XX_RB_STENCIL_PITCH(uint32_t val)
+{
+ return ((val >> 5) << A4XX_RB_STENCIL_PITCH__SHIFT) & A4XX_RB_STENCIL_PITCH__MASK;
+}
+
#define REG_A4XX_RB_STENCILREFMASK 0x0000210b
#define A4XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff
#define A4XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0
@@ -1167,6 +1194,8 @@ static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578
#define REG_A4XX_SP_VS_STATUS 0x00000ec0
+#define REG_A4XX_SP_MODE_CONTROL 0x00000ec3
+
#define REG_A4XX_SP_PERFCTR_SP_SEL_11 0x00000ecf
#define REG_A4XX_SP_SP_CTRL_REG 0x000022c0
@@ -1432,6 +1461,20 @@ static inline uint32_t A4XX_SP_FS_MRT_REG_MRTFORMAT(enum a4xx_color_fmt val)
return ((val) << A4XX_SP_FS_MRT_REG_MRTFORMAT__SHIFT) & A4XX_SP_FS_MRT_REG_MRTFORMAT__MASK;
}
+#define REG_A4XX_SP_CS_CTRL_REG0 0x00002300
+
+#define REG_A4XX_SP_CS_OBJ_OFFSET_REG 0x00002301
+
+#define REG_A4XX_SP_CS_OBJ_START 0x00002302
+
+#define REG_A4XX_SP_CS_PVT_MEM_PARAM 0x00002303
+
+#define REG_A4XX_SP_CS_PVT_MEM_ADDR 0x00002304
+
+#define REG_A4XX_SP_CS_PVT_MEM_SIZE 0x00002305
+
+#define REG_A4XX_SP_CS_LENGTH_REG 0x00002306
+
#define REG_A4XX_SP_HS_OBJ_OFFSET_REG 0x0000230d
#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
#define A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
@@ -1454,6 +1497,76 @@ static inline uint32_t A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A4XX_SP_HS_LENGTH_REG 0x00002312
+#define REG_A4XX_SP_DS_PARAM_REG 0x0000231a
+#define A4XX_SP_DS_PARAM_REG_POSREGID__MASK 0x000000ff
+#define A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT 0
+static inline uint32_t A4XX_SP_DS_PARAM_REG_POSREGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_DS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK 0xfff00000
+#define A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT 20
+static inline uint32_t A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_DS_PARAM_REG_TOTALGSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_DS_OUT(uint32_t i0) { return 0x0000231b + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_DS_OUT_REG(uint32_t i0) { return 0x0000231b + 0x1*i0; }
+#define A4XX_SP_DS_OUT_REG_A_REGID__MASK 0x000001ff
+#define A4XX_SP_DS_OUT_REG_A_REGID__SHIFT 0
+static inline uint32_t A4XX_SP_DS_OUT_REG_A_REGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK 0x00001e00
+#define A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT 9
+static inline uint32_t A4XX_SP_DS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_B_REGID__MASK 0x01ff0000
+#define A4XX_SP_DS_OUT_REG_B_REGID__SHIFT 16
+static inline uint32_t A4XX_SP_DS_OUT_REG_B_REGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_DS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK 0x1e000000
+#define A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT 25
+static inline uint32_t A4XX_SP_DS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_DS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_DS_VPC_DST(uint32_t i0) { return 0x0000232c + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_DS_VPC_DST_REG(uint32_t i0) { return 0x0000232c + 0x1*i0; }
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT 0
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT 8
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT 16
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK 0xff000000
+#define A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT 24
+static inline uint32_t A4XX_SP_DS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+ return ((val) << A4XX_SP_DS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_DS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
#define REG_A4XX_SP_DS_OBJ_OFFSET_REG 0x00002334
#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
#define A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
@@ -1476,6 +1589,82 @@ static inline uint32_t A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A4XX_SP_DS_LENGTH_REG 0x00002339
+#define REG_A4XX_SP_GS_PARAM_REG 0x00002341
+#define A4XX_SP_GS_PARAM_REG_POSREGID__MASK 0x000000ff
+#define A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT 0
+static inline uint32_t A4XX_SP_GS_PARAM_REG_POSREGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_PARAM_REG_POSREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_POSREGID__MASK;
+}
+#define A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK 0x0000ff00
+#define A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT 8
+static inline uint32_t A4XX_SP_GS_PARAM_REG_PRIMREGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_PARAM_REG_PRIMREGID__SHIFT) & A4XX_SP_GS_PARAM_REG_PRIMREGID__MASK;
+}
+#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK 0xfff00000
+#define A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT 20
+static inline uint32_t A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__SHIFT) & A4XX_SP_GS_PARAM_REG_TOTALGSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_GS_OUT(uint32_t i0) { return 0x00002342 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_GS_OUT_REG(uint32_t i0) { return 0x00002342 + 0x1*i0; }
+#define A4XX_SP_GS_OUT_REG_A_REGID__MASK 0x000001ff
+#define A4XX_SP_GS_OUT_REG_A_REGID__SHIFT 0
+static inline uint32_t A4XX_SP_GS_OUT_REG_A_REGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_OUT_REG_A_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_A_REGID__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK 0x00001e00
+#define A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT 9
+static inline uint32_t A4XX_SP_GS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_OUT_REG_A_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_B_REGID__MASK 0x01ff0000
+#define A4XX_SP_GS_OUT_REG_B_REGID__SHIFT 16
+static inline uint32_t A4XX_SP_GS_OUT_REG_B_REGID(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_OUT_REG_B_REGID__SHIFT) & A4XX_SP_GS_OUT_REG_B_REGID__MASK;
+}
+#define A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK 0x1e000000
+#define A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT 25
+static inline uint32_t A4XX_SP_GS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_OUT_REG_B_COMPMASK__SHIFT) & A4XX_SP_GS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A4XX_SP_GS_VPC_DST(uint32_t i0) { return 0x00002353 + 0x1*i0; }
+
+static inline uint32_t REG_A4XX_SP_GS_VPC_DST_REG(uint32_t i0) { return 0x00002353 + 0x1*i0; }
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT 0
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC0__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT 8
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC1__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT 16
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC2__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK 0xff000000
+#define A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT 24
+static inline uint32_t A4XX_SP_GS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+ return ((val) << A4XX_SP_GS_VPC_DST_REG_OUTLOC3__SHIFT) & A4XX_SP_GS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
#define REG_A4XX_SP_GS_OBJ_OFFSET_REG 0x0000235b
#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
#define A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
@@ -1677,6 +1866,18 @@ static inline uint32_t A4XX_VFD_CONTROL_3_REGID_VTXCNT(uint32_t val)
{
return ((val) << A4XX_VFD_CONTROL_3_REGID_VTXCNT__SHIFT) & A4XX_VFD_CONTROL_3_REGID_VTXCNT__MASK;
}
+#define A4XX_VFD_CONTROL_3_REGID_TESSX__MASK 0x00ff0000
+#define A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT 16
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSX(uint32_t val)
+{
+ return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSX__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSX__MASK;
+}
+#define A4XX_VFD_CONTROL_3_REGID_TESSY__MASK 0xff000000
+#define A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT 24
+static inline uint32_t A4XX_VFD_CONTROL_3_REGID_TESSY(uint32_t val)
+{
+ return ((val) << A4XX_VFD_CONTROL_3_REGID_TESSY__SHIFT) & A4XX_VFD_CONTROL_3_REGID_TESSY__MASK;
+}
#define REG_A4XX_VFD_CONTROL_4 0x00002204
@@ -1758,6 +1959,8 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
#define REG_A4XX_TPL1_DEBUG_ECO_CONTROL 0x00000f00
+#define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03
+
#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b
#define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380
@@ -1800,6 +2003,10 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
#define REG_A4XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x000023a1
+#define REG_A4XX_TPL1_TP_CS_BORDER_COLOR_BASE_ADDR 0x000023a4
+
+#define REG_A4XX_TPL1_TP_CS_SAMPLER_BASE_ADDR 0x000023a5
+
#define REG_A4XX_TPL1_TP_CS_TEXMEMOBJ_BASE_ADDR 0x000023a6
#define REG_A4XX_GRAS_TSE_STATUS 0x00000c80
@@ -2078,6 +2285,8 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
#define REG_A4XX_HLSQ_DEBUG_ECO_CONTROL 0x00000e04
+#define REG_A4XX_HLSQ_MODE_CONTROL 0x00000e05
+
#define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e
#define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0
@@ -2158,6 +2367,8 @@ static inline uint32_t A4XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
return ((val) << A4XX_HLSQ_CONTROL_3_REG_REGID__SHIFT) & A4XX_HLSQ_CONTROL_3_REG_REGID__MASK;
}
+#define REG_A4XX_HLSQ_CONTROL_4_REG 0x000023c4
+
#define REG_A4XX_HLSQ_VS_CONTROL_REG 0x000023c5
#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x000000ff
#define A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0
@@ -2293,6 +2504,36 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
return ((val) << A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__SHIFT) & A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH__MASK;
}
+#define REG_A4XX_HLSQ_CS_CONTROL 0x000023ca
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_0 0x000023cd
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_1 0x000023ce
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_2 0x000023cf
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_3 0x000023d0
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_4 0x000023d1
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_5 0x000023d2
+
+#define REG_A4XX_HLSQ_CL_NDRANGE_6 0x000023d3
+
+#define REG_A4XX_HLSQ_CL_CONTROL_0 0x000023d4
+
+#define REG_A4XX_HLSQ_CL_CONTROL_1 0x000023d5
+
+#define REG_A4XX_HLSQ_CL_KERNEL_CONST 0x000023d6
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_X 0x000023d7
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Y 0x000023d8
+
+#define REG_A4XX_HLSQ_CL_KERNEL_GROUP_Z 0x000023d9
+
+#define REG_A4XX_HLSQ_CL_WG_OFFSET 0x000023da
+
#define REG_A4XX_HLSQ_UPDATE_CONTROL 0x000023db
#define REG_A4XX_PC_BINNING_COMMAND 0x00000d00
@@ -2389,16 +2630,10 @@ static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
#define REG_A4XX_UNKNOWN_0D01 0x00000d01
-#define REG_A4XX_UNKNOWN_0E05 0x00000e05
-
#define REG_A4XX_UNKNOWN_0E42 0x00000e42
#define REG_A4XX_UNKNOWN_0EC2 0x00000ec2
-#define REG_A4XX_UNKNOWN_0EC3 0x00000ec3
-
-#define REG_A4XX_UNKNOWN_0F03 0x00000f03
-
#define REG_A4XX_UNKNOWN_2001 0x00002001
#define REG_A4XX_UNKNOWN_209B 0x0000209b
@@ -2439,6 +2674,8 @@ static inline uint32_t A4XX_UNKNOWN_20F7(float val)
#define REG_A4XX_UNKNOWN_22D7 0x000022d7
+#define REG_A4XX_UNKNOWN_2352 0x00002352
+
#define REG_A4XX_TEX_SAMP_0 0x00000000
#define A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR 0x00000001
#define A4XX_TEX_SAMP_0_XY_MAG__MASK 0x00000006
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
index 396caa532fc..d5e823ef69d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
@@ -61,7 +61,7 @@ fd4_blend_state_create(struct pipe_context *pctx,
struct fd4_blend_stateobj *so;
// enum a3xx_rop_code rop = ROP_COPY;
bool reads_dest = false;
- int i;
+ unsigned i, mrt_blend = 0;
if (cso->logicop_enable) {
// rop = cso->logicop_func; /* maps 1:1 */
@@ -84,11 +84,6 @@ fd4_blend_state_create(struct pipe_context *pctx,
}
}
- if (cso->independent_blend_enable) {
- DBG("Unsupported! independent blend state");
- return NULL;
- }
-
so = CALLOC_STRUCT(fd4_blend_stateobj);
if (!so)
return NULL;
@@ -96,7 +91,12 @@ fd4_blend_state_create(struct pipe_context *pctx,
so->base = *cso;
for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
- const struct pipe_rt_blend_state *rt = &cso->rt[i];
+ const struct pipe_rt_blend_state *rt;
+
+ if (cso->independent_blend_enable)
+ rt = &cso->rt[i];
+ else
+ rt = &cso->rt[0];
so->rb_mrt[i].blend_control =
A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
@@ -115,7 +115,7 @@ fd4_blend_state_create(struct pipe_context *pctx,
A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
A4XX_RB_MRT_CONTROL_BLEND |
A4XX_RB_MRT_CONTROL_BLEND2;
- so->rb_fs_output |= A4XX_RB_FS_OUTPUT_ENABLE_BLEND(1);
+ mrt_blend |= (1 << i);
}
if (reads_dest)
@@ -125,5 +125,7 @@ fd4_blend_state_create(struct pipe_context *pctx,
so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
}
+ so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend);
+
return so;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
index 33641da5e2c..7620d00a625 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
@@ -32,17 +32,19 @@
#include "pipe/p_state.h"
#include "pipe/p_context.h"
+#include "freedreno_util.h"
+
struct fd4_blend_stateobj {
struct pipe_blend_state base;
struct {
uint32_t control;
uint32_t buf_info;
uint32_t blend_control;
- } rb_mrt[8];
+ } rb_mrt[A4XX_MAX_RENDER_TARGETS];
uint32_t rb_fs_output;
};
-static INLINE struct fd4_blend_stateobj *
+static inline struct fd4_blend_stateobj *
fd4_blend_stateobj(struct pipe_blend_state *blend)
{
return (struct fd4_blend_stateobj *)blend;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
index 2321876dd48..e172d350517 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -86,7 +86,7 @@ create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
}
static const uint8_t primtypes[PIPE_PRIM_MAX] = {
- [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A3XX,
+ [PIPE_PRIM_POINTS] = DI_PT_POINTLIST,
[PIPE_PRIM_LINES] = DI_PT_LINELIST,
[PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP,
[PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP,
@@ -119,6 +119,7 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv)
fd4_gmem_init(pctx);
fd4_texture_init(pctx);
fd4_prog_init(pctx);
+ fd4_emit_init(pctx);
pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
if (!pctx)
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
index 53e1bf6a2e6..0b749916841 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -90,7 +90,7 @@ struct fd4_context {
struct ir3_shader_key last_key;
};
-static INLINE struct fd4_context *
+static inline struct fd4_context *
fd4_context(struct fd_context *ctx)
{
return (struct fd4_context *)ctx;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
index de5a306af60..2bd2ca23d54 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -48,6 +48,9 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
{
const struct pipe_draw_info *info = emit->info;
+ if (!(fd4_emit_get_vp(emit) && fd4_emit_get_fp(emit)))
+ return;
+
fd4_emit_state(ctx, ring, emit);
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
@@ -108,7 +111,6 @@ static void
fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd4_emit emit = {
.vtx = &ctx->vtx,
.prog = &ctx->prog,
@@ -129,8 +131,9 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
.fsaturate_t = fd4_ctx->fsaturate_t,
.fsaturate_r = fd4_ctx->fsaturate_r,
},
- .format = fd4_emit_format(pfb->cbufs[0]),
- .pformat = pipe_surface_format(pfb->cbufs[0]),
+ .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+ .sprite_coord_enable = ctx->rasterizer ? ctx->rasterizer->sprite_coord_enable : false,
+ .sprite_coord_mode = ctx->rasterizer ? ctx->rasterizer->sprite_coord_mode : false,
};
unsigned dirty;
@@ -170,20 +173,16 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
struct fd4_context *fd4_ctx = fd4_context(ctx);
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
unsigned dirty = ctx->dirty;
- unsigned ce, i;
+ unsigned i;
struct fd4_emit emit = {
.vtx = &fd4_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
.key = {
- .half_precision = true,
+ .half_precision = fd_half_precision(pfb),
},
- .format = fd4_emit_format(pfb->cbufs[0]),
};
- uint32_t colr = 0;
-
- if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
- colr = pack_rgba(pfb->cbufs[0]->format, color->f);
dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
dirty |= FD_DIRTY_PROG;
@@ -257,16 +256,15 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR) {
OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
- ce = 0xf;
- } else {
- ce = 0x0;
}
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = (buffers & (PIPE_CLEAR_COLOR0 << i)) ? 0xf : 0x0;
+
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
A4XX_RB_MRT_CONTROL_B11 |
- A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+ A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
@@ -277,6 +275,16 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
}
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+
fd4_emit_vertex_bufs(ring, &emit);
OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
@@ -285,14 +293,8 @@ fd4_clear(struct fd_context *ctx, unsigned buffers,
OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
- OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW0 */
- OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW1 */
- OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW2 */
- OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */
-
/* until fastclear works: */
- fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+ fd4_emit_const(ring, SHADER_FRAGMENT, 0, 0, 4, color->ui, NULL);
OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
index 1bd376ca6ec..b89a30a7c4b 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
@@ -106,6 +106,7 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
{
struct pipe_index_buffer *idx = &ctx->indexbuf;
struct fd_bo *idx_bo = NULL;
+ enum pc_di_primtype primtype = ctx->primtypes[info->mode];
enum a4xx_index_size idx_type;
enum pc_di_src_sel src_sel;
uint32_t idx_size, idx_offset;
@@ -126,7 +127,12 @@ fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
src_sel = DI_SRC_SEL_AUTO_INDEX;
}
- fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
+ /* points + psize -> spritelist: */
+ if (ctx->rasterizer && ctx->rasterizer->point_size_per_vertex &&
+ (info->mode == PIPE_PRIM_POINTS))
+ primtype = DI_PT_POINTLIST_PSIZE;
+
+ fd4_draw(ctx, ring, primtype, vismode, src_sel,
info->count, info->instance_count,
idx_type, idx_size, idx_offset, idx_bo);
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 4b6eb646aa7..b75be29e523 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -43,19 +43,26 @@
#include "fd4_format.h"
#include "fd4_zsa.h"
+static const enum adreno_state_block sb[] = {
+ [SHADER_VERTEX] = SB_VERT_SHADER,
+ [SHADER_FRAGMENT] = SB_FRAG_SHADER,
+};
+
/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
*/
void
-fd4_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc)
{
uint32_t i, sz;
enum adreno_state_src src;
+ debug_assert((regid % 4) == 0);
+ debug_assert((sizedwords % 4) == 0);
+
if (prsc) {
sz = 0;
src = 0x2; // TODO ??
@@ -67,7 +74,7 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
CP_LOAD_STATE_0_STATE_SRC(src) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
if (prsc) {
struct fd_bo *bo = fd_resource(prsc)->bo;
@@ -84,89 +91,31 @@ fd4_emit_constant(struct fd_ringbuffer *ring,
}
static void
-emit_constants(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
- struct fd_constbuf_stateobj *constbuf,
- struct ir3_shader_variant *shader,
- bool emit_immediates)
+fd4_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+ uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets)
{
- uint32_t enabled_mask = constbuf->enabled_mask;
- uint32_t max_const;
- int i;
+ uint32_t i;
- // XXX TODO only emit dirty consts.. but we need to keep track if
- // they are clobbered by a clear, gmem2mem, or mem2gmem..
- constbuf->dirty_mask = enabled_mask;
+ debug_assert((regid % 4) == 0);
+ debug_assert((num % 4) == 0);
- /* in particular, with binning shader we may end up with unused
- * consts, ie. we could end up w/ constlen that is smaller
- * than first_immediate. In that case truncate the user consts
- * early to avoid HLSQ lockup caused by writing too many consts
- */
- max_const = MIN2(shader->first_driver_param, shader->constlen);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + num);
+ OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+ CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+ CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) |
+ CP_LOAD_STATE_0_NUM_UNIT(num/4));
+ OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+ CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
- /* emit user constants: */
- if (enabled_mask & 1) {
- const unsigned index = 0;
- struct pipe_constant_buffer *cb = &constbuf->cb[index];
- unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
-
- // I expect that size should be a multiple of vec4's:
- assert(size == align(size, 4));
-
- /* and even if the start of the const buffer is before
- * first_immediate, the end may not be:
- */
- size = MIN2(size, 4 * max_const);
-
- if (size && (constbuf->dirty_mask & (1 << index))) {
- fd4_emit_constant(ring, sb, 0,
- cb->buffer_offset, size,
- cb->user_buffer, cb->buffer);
- constbuf->dirty_mask &= ~(1 << index);
- }
-
- enabled_mask &= ~(1 << index);
- }
-
- /* emit ubos: */
- if (shader->constlen > shader->first_driver_param) {
- uint32_t params = MIN2(4, shader->constlen - shader->first_driver_param);
- OUT_PKT3(ring, CP_LOAD_STATE, 2 + params * 4);
- OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(shader->first_driver_param) |
- CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
- CP_LOAD_STATE_0_STATE_BLOCK(sb) |
- CP_LOAD_STATE_0_NUM_UNIT(params));
- OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
- CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
-
- for (i = 1; i <= params * 4; i++) {
- struct pipe_constant_buffer *cb = &constbuf->cb[i];
- assert(!cb->user_buffer);
- if ((enabled_mask & (1 << i)) && cb->buffer)
- OUT_RELOC(ring, fd_resource(cb->buffer)->bo, cb->buffer_offset, 0, 0);
- else
- OUT_RING(ring, 0xbad00000 | ((i - 1) << 16));
- }
- }
-
- /* emit shader immediates: */
- if (shader && emit_immediates) {
- int size = shader->immediates_count;
- uint32_t base = shader->first_immediate;
-
- /* truncate size to avoid writing constants that shader
- * does not use:
- */
- size = MIN2(size + base, shader->constlen) - base;
-
- /* convert out of vec4: */
- base *= 4;
- size *= 4;
-
- if (size > 0) {
- fd4_emit_constant(ring, sb, base,
- 0, size, shader->immediates[0].val, NULL);
+ for (i = 0; i < num; i++) {
+ if (bos[i]) {
+ if (write) {
+ OUT_RELOCW(ring, bos[i], offsets[i], 0, 0);
+ } else {
+ OUT_RELOC(ring, bos[i], offsets[i], 0, 0);
+ }
+ } else {
+ OUT_RING(ring, 0xbad00000 | (i << 16));
}
}
}
@@ -223,15 +172,19 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
fd4_pipe_sampler_view(tex->textures[i]) :
&dummy_view;
- struct fd_resource *rsc = fd_resource(view->base.texture);
- unsigned start = view->base.u.tex.first_level;
- uint32_t offset = fd_resource_offset(rsc, start, 0);
+ unsigned start = fd_sampler_first_level(&view->base);
OUT_RING(ring, view->texconst0);
OUT_RING(ring, view->texconst1);
OUT_RING(ring, view->texconst2);
OUT_RING(ring, view->texconst3);
- OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+ if (view->base.texture) {
+ struct fd_resource *rsc = fd_resource(view->base.texture);
+ uint32_t offset = fd_resource_offset(rsc, start, 0);
+ OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
+ } else {
+ OUT_RING(ring, 0x00000000);
+ }
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
@@ -244,51 +197,110 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
* special cases..
*/
void
-fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, unsigned nr_bufs,
+ struct pipe_surface **bufs)
{
- struct fd_resource *rsc = fd_resource(psurf->texture);
- unsigned lvl = psurf->u.tex.level;
- struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
- uint32_t offset = fd_resource_offset(rsc, lvl, psurf->u.tex.first_layer);
- enum pipe_format format = fd4_gmem_restore_format(psurf->format);
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS];
+ int i;
- debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = (i < nr_bufs) ? 0xf : 0;
+ }
/* output sampler state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 4);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * nr_bufs));
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
- CP_LOAD_STATE_0_NUM_UNIT(1));
+ CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
- A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
- A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
- A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
- A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
- OUT_RING(ring, 0x00000000);
+ for (i = 0; i < nr_bufs; i++) {
+ OUT_RING(ring, A4XX_TEX_SAMP_0_XY_MAG(A4XX_TEX_NEAREST) |
+ A4XX_TEX_SAMP_0_XY_MIN(A4XX_TEX_NEAREST) |
+ A4XX_TEX_SAMP_0_WRAP_S(A4XX_TEX_CLAMP_TO_EDGE) |
+ A4XX_TEX_SAMP_0_WRAP_T(A4XX_TEX_CLAMP_TO_EDGE) |
+ A4XX_TEX_SAMP_0_WRAP_R(A4XX_TEX_REPEAT));
+ OUT_RING(ring, 0x00000000);
+ }
/* emit texture state: */
- OUT_PKT3(ring, CP_LOAD_STATE, 10);
+ OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * nr_bufs));
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
- CP_LOAD_STATE_0_NUM_UNIT(1));
+ CP_LOAD_STATE_0_NUM_UNIT(nr_bufs));
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
- OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
- A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
- fd4_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
- PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
- OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(psurf->width) |
- A4XX_TEX_CONST_1_HEIGHT(psurf->height));
- OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
- OUT_RING(ring, 0x00000000);
- OUT_RELOC(ring, rsc->bo, offset, 0, 0);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
- OUT_RING(ring, 0x00000000);
+ for (i = 0; i < nr_bufs; i++) {
+ if (bufs[i]) {
+ struct fd_resource *rsc = fd_resource(bufs[i]->texture);
+ /* note: PIPE_BUFFER disallowed for surfaces */
+ unsigned lvl = bufs[i]->u.tex.level;
+ struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
+ uint32_t offset = fd_resource_offset(rsc, lvl, bufs[i]->u.tex.first_layer);
+ enum pipe_format format = fd4_gmem_restore_format(bufs[i]->format);
+
+ /* The restore blit_zs shader expects stencil in sampler 0,
+ * and depth in sampler 1
+ */
+ if (rsc->stencil && (i == 0)) {
+ rsc = rsc->stencil;
+ format = fd4_gmem_restore_format(rsc->base.b.format);
+ }
+
+ /* z32 restore is accomplished using depth write. If there is
+ * no stencil component (ie. PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ * then no render target:
+ *
+ * (The same applies for z32_s8x24, since for stencil sampler
+ * state the above 'if' will replace 'format' with s8)
+ */
+ if ((format == PIPE_FORMAT_Z32_FLOAT) ||
+ (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT))
+ mrt_comp[i] = 0;
+
+ debug_assert(bufs[i]->u.tex.first_layer == bufs[i]->u.tex.last_layer);
+
+ OUT_RING(ring, A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(format)) |
+ A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
+ fd4_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+ PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
+ OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(bufs[i]->width) |
+ A4XX_TEX_CONST_1_HEIGHT(bufs[i]->height));
+ OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp));
+ OUT_RING(ring, 0x00000000);
+ OUT_RELOC(ring, rsc->bo, offset, 0, 0);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ } else {
+ OUT_RING(ring, A4XX_TEX_CONST_0_FMT(0) |
+ A4XX_TEX_CONST_0_TYPE(A4XX_TEX_2D) |
+ A4XX_TEX_CONST_0_SWIZ_X(A4XX_TEX_ONE) |
+ A4XX_TEX_CONST_0_SWIZ_Y(A4XX_TEX_ONE) |
+ A4XX_TEX_CONST_0_SWIZ_Z(A4XX_TEX_ONE) |
+ A4XX_TEX_CONST_0_SWIZ_W(A4XX_TEX_ONE));
+ OUT_RING(ring, A4XX_TEX_CONST_1_WIDTH(0) |
+ A4XX_TEX_CONST_1_HEIGHT(0));
+ OUT_RING(ring, A4XX_TEX_CONST_2_PITCH(0));
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
}
void
@@ -298,7 +310,9 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
uint32_t total_in = 0;
const struct fd_vertex_state *vtx = emit->vtx;
struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
- unsigned vertex_regid = regid(63, 0), instance_regid = regid(63, 0);
+ unsigned vertex_regid = regid(63, 0);
+ unsigned instance_regid = regid(63, 0);
+ unsigned vtxcnt_regid = regid(63, 0);
for (i = 0; i < vp->inputs_count; i++) {
uint8_t semantic = sem2name(vp->inputs[i].semantic);
@@ -306,6 +320,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
vertex_regid = vp->inputs[i].regid;
else if (semantic == TGSI_SEMANTIC_INSTANCEID)
instance_regid = vp->inputs[i].regid;
+ else if (semantic == IR3_SEMANTIC_VTXCNT)
+ vtxcnt_regid = vp->inputs[i].regid;
else if ((i < vtx->vtx->num_elements) && vp->inputs[i].compmask)
last = i;
}
@@ -313,7 +329,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
/* hw doesn't like to be configured for zero vbo's, it seems: */
if ((vtx->vtx->num_elements == 0) &&
(vertex_regid == regid(63, 0)) &&
- (instance_regid == regid(63, 0)))
+ (instance_regid == regid(63, 0)) &&
+ (vtxcnt_regid == regid(63, 0)))
return;
for (i = 0, j = 0; i <= last; i++) {
@@ -327,7 +344,8 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
bool switchnext = (i != last) ||
(vertex_regid != regid(63, 0)) ||
- (instance_regid != regid(63, 0));
+ (instance_regid != regid(63, 0)) ||
+ (vtxcnt_regid != regid(63, 0));
bool isint = util_format_is_pure_integer(pfmt);
uint32_t fs = util_format_get_blocksize(pfmt);
uint32_t off = vb->buffer_offset + elem->src_offset;
@@ -368,7 +386,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
A4XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
A4XX_VFD_CONTROL_1_REGID4INST(instance_regid));
OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */
- OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(regid(63, 0)));
+ OUT_RING(ring, A4XX_VFD_CONTROL_3_REGID_VTXCNT(vtxcnt_regid));
OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */
/* cache invalidate, otherwise vertex fetch could see
@@ -389,6 +407,25 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
emit_marker(ring, 5);
+ if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) {
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
+
+ for (unsigned i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+ }
+
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+ }
+
if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control;
@@ -513,43 +550,24 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
}
- if (dirty & FD_DIRTY_PROG)
- fd4_program_emit(ring, emit);
-
- if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
- /* evil hack to deal sanely with clear path: */
- (emit->prog == &ctx->prog)) {
- fd_wfi(ctx, ring);
- emit_constants(ring, SB_VERT_SHADER,
- &ctx->constbuf[PIPE_SHADER_VERTEX],
- vp, emit->prog->dirty & FD_SHADER_DIRTY_VP);
- if (!emit->key.binning_pass) {
- emit_constants(ring, SB_FRAG_SHADER,
- &ctx->constbuf[PIPE_SHADER_FRAGMENT],
- fp, emit->prog->dirty & FD_SHADER_DIRTY_FP);
- }
+ if (dirty & FD_DIRTY_PROG) {
+ struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+ fd4_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
}
- /* emit driver params every time */
- if (emit->info && emit->prog == &ctx->prog) {
- uint32_t vertex_params[4] = {
- emit->info->indexed ? emit->info->index_bias : emit->info->start,
- 0,
- 0,
- 0
- };
- if (vp->constlen >= vp->first_driver_param + 4) {
- fd4_emit_constant(ring, SB_VERT_SHADER,
- (vp->first_driver_param + 4) * 4,
- 0, 4, vertex_params, NULL);
- }
+ if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */
+ ir3_emit_consts(vp, ring, emit->info, dirty);
+ if (!emit->key.binning_pass)
+ ir3_emit_consts(fp, ring, emit->info, dirty);
+ /* mark clean after emitting consts: */
+ ctx->prog.dirty = 0;
}
if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
uint32_t i;
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, blend->rb_mrt[i].control);
@@ -607,10 +625,10 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
OUT_RING(ring, 0x00000000);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1);
+ OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1);
OUT_RING(ring, 0x00000006);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1);
+ OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1);
OUT_RING(ring, 0x0000003a);
OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
@@ -629,7 +647,7 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000012);
- OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1);
+ OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1);
OUT_RING(ring, 0x00000000);
OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
@@ -752,9 +770,6 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
- OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
- OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(0xf));
-
OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
@@ -763,3 +778,11 @@ fd4_emit_restore(struct fd_context *ctx)
ctx->needs_rb_fbd = true;
}
+
+void
+fd4_emit_init(struct pipe_context *pctx)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ ctx->emit_const = fd4_emit_const;
+ ctx->emit_const_bo = fd4_emit_const_bo;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
index 7d059f8e532..ab7850e50b0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
@@ -37,15 +37,13 @@
#include "ir3_shader.h"
struct fd_ringbuffer;
-enum adreno_state_block;
-void fd4_emit_constant(struct fd_ringbuffer *ring,
- enum adreno_state_block sb,
+void fd4_emit_const(struct fd_ringbuffer *ring, enum shader_t type,
uint32_t regid, uint32_t offset, uint32_t sizedwords,
const uint32_t *dwords, struct pipe_resource *prsc);
void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
- struct pipe_surface *psurf);
+ unsigned nr_bufs, struct pipe_surface **bufs);
/* grouped together emit-state for prog/vertex/state emit: */
struct fd4_emit {
@@ -53,10 +51,12 @@ struct fd4_emit {
const struct fd_program_stateobj *prog;
const struct pipe_draw_info *info;
struct ir3_shader_key key;
- enum a4xx_color_fmt format;
- enum pipe_format pformat;
uint32_t dirty;
+ uint32_t sprite_coord_enable; /* bitmask */
+ bool sprite_coord_mode;
+ bool rasterflat;
+
/* cached to avoid repeated lookups of same variants: */
struct ir3_shader_variant *vp, *fp;
/* TODO: other shader stages.. */
@@ -96,4 +96,6 @@ void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
void fd4_emit_restore(struct fd_context *ctx);
+void fd4_emit_init(struct pipe_context *pctx);
+
#endif /* FD4_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
index 29abe0b0cc3..3e0045449eb 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -89,6 +89,14 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(L8_UNORM, 8_UNORM, R8_UNORM, WZYX),
_T(I8_UNORM, 8_UNORM, NONE, WZYX),
+ /* NOTE: should be TFMT_8_UINT (which then gets remapped to
+ * TFMT_8_UNORM for mem2gmem in _gmem_restore_format()), but
+ * we don't know TFMT_8_UINT yet.. so just use TFMT_8_UNORM
+ * for now.. sampling from stencil as a texture might not
+ * work right, but at least should be fine for zsbuf..
+ */
+ _T(S8_UINT, 8_UNORM, R8_UNORM, WZYX),
+
/* 16-bit */
V_(R16_UNORM, 16_UNORM, NONE, WZYX),
V_(R16_SNORM, 16_SNORM, NONE, WZYX),
@@ -96,7 +104,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R16_SINT, 16_SINT, R16_SINT, WZYX),
V_(R16_USCALED, 16_UINT, NONE, WZYX),
V_(R16_SSCALED, 16_UINT, NONE, WZYX),
- VT(R16_FLOAT, 16_FLOAT, NONE, WZYX),
+ VT(R16_FLOAT, 16_FLOAT, R16_FLOAT,WZYX),
_T(A16_UINT, 16_UINT, NONE, WZYX),
_T(A16_SINT, 16_SINT, NONE, WZYX),
@@ -132,7 +140,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R32_SINT, 32_SINT, R32_SINT, WZYX),
V_(R32_USCALED, 32_UINT, NONE, WZYX),
V_(R32_SSCALED, 32_UINT, NONE, WZYX),
- VT(R32_FLOAT, 32_FLOAT, NONE, WZYX),
+ VT(R32_FLOAT, 32_FLOAT, R32_FLOAT,WZYX),
V_(R32_FIXED, 32_FIXED, NONE, WZYX),
_T(A32_UINT, 32_UINT, NONE, WZYX),
@@ -148,7 +156,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX),
V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX),
V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX),
- VT(R16G16_FLOAT, 16_16_FLOAT, NONE, WZYX),
+ VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT,WZYX),
_T(L16A16_UINT, 16_16_UINT, NONE, WZYX),
_T(L16A16_SINT, 16_16_SINT, NONE, WZYX),
@@ -191,7 +199,8 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
_T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
_T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
- /*_T(Z32_FLOAT, Z32_FLOAT, R8G8B8A8_UNORM, WZYX),*/
+ _T(Z32_FLOAT, 32_FLOAT, R8G8B8A8_UNORM, WZYX),
+ _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,R8G8B8A8_UNORM, WZYX),
/* 48-bit */
V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX),
@@ -218,7 +227,7 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
VT(R32G32_SINT, 32_32_SINT, R32G32_SINT, WZYX),
V_(R32G32_USCALED, 32_32_UINT, NONE, WZYX),
V_(R32G32_SSCALED, 32_32_SINT, NONE, WZYX),
- VT(R32G32_FLOAT, 32_32_FLOAT, NONE, WZYX),
+ VT(R32G32_FLOAT, 32_32_FLOAT, R32G32_FLOAT,WZYX),
V_(R32G32_FIXED, 32_32_FIXED, NONE, WZYX),
_T(L32A32_UINT, 32_32_UINT, NONE, WZYX),
@@ -282,6 +291,9 @@ fd4_pipe2swap(enum pipe_format format)
enum a4xx_tex_fetchsize
fd4_pipe2fetchsize(enum pipe_format format)
{
+ if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ format = PIPE_FORMAT_Z32_FLOAT;
+
switch (util_format_get_blocksizebits(format)) {
case 8: return TFETCH4_1_BYTE;
case 16: return TFETCH4_2_BYTE;
@@ -312,6 +324,8 @@ fd4_gmem_restore_format(enum pipe_format format)
return PIPE_FORMAT_R8G8B8A8_UNORM;
case PIPE_FORMAT_Z16_UNORM:
return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_S8_UINT:
+ return PIPE_FORMAT_R8_UNORM;
default:
return format;
}
@@ -328,6 +342,9 @@ fd4_pipe2depth(enum pipe_format format)
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
return DEPTH4_24_8;
+ case PIPE_FORMAT_Z32_FLOAT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return DEPTH4_32;
default:
return ~0;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
index 9a905062071..81c37f72565 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -44,12 +44,6 @@
#include "fd4_format.h"
#include "fd4_zsa.h"
-static const struct ir3_shader_key key = {
- // XXX should set this based on render target format! We don't
- // want half_precision if float32 render target!!!
- .half_precision = true,
-};
-
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
@@ -63,7 +57,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
tile_mode = TILE4_LINEAR;
}
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
enum a4xx_color_fmt format = 0;
enum a3xx_color_swap swap = WZYX;
struct fd_resource *rsc = NULL;
@@ -74,11 +68,23 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
if ((i < nr_bufs) && bufs[i]) {
struct pipe_surface *psurf = bufs[i];
+ enum pipe_format pformat = 0;
rsc = fd_resource(psurf->texture);
+ pformat = psurf->format;
+
+ /* In case we're drawing to Z32F_S8, the "color" actually goes to
+ * the stencil
+ */
+ if (rsc->stencil) {
+ rsc = rsc->stencil;
+ pformat = rsc->base.b.format;
+ bases++;
+ }
+
slice = fd_resource_slice(rsc, psurf->u.tex.level);
- format = fd4_pipe2color(psurf->format);
- swap = fd4_pipe2swap(psurf->format);
+ format = fd4_pipe2color(pformat);
+ swap = fd4_pipe2swap(pformat);
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
@@ -94,6 +100,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
} else {
stride = slice->pitch * rsc->cpp;
}
+ } else if ((i < nr_bufs) && bases) {
+ base = bases[i];
}
OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
@@ -101,7 +109,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
- if (bin_w || (i >= nr_bufs)) {
+ if (bin_w || (i >= nr_bufs) || !bufs[i]) {
OUT_RING(ring, base);
OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
} else {
@@ -115,30 +123,26 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
}
}
-static uint32_t
-depth_base(struct fd_context *ctx)
-{
- struct fd_gmem_stateobj *gmem = &ctx->gmem;
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
- uint32_t cpp = 4;
- if (pfb->cbufs[0]) {
- struct fd_resource *rsc =
- fd_resource(pfb->cbufs[0]->texture);
- cpp = rsc->cpp;
- }
- return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
-}
-
/* transfer from gmem to system memory (ie. normal RAM) */
static void
-emit_gmem2mem_surf(struct fd_context *ctx,
+emit_gmem2mem_surf(struct fd_context *ctx, bool stencil,
uint32_t base, struct pipe_surface *psurf)
{
struct fd_ringbuffer *ring = ctx->ring;
struct fd_resource *rsc = fd_resource(psurf->texture);
- struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
- uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
+ enum pipe_format pformat = psurf->format;
+ struct fd_resource_slice *slice;
+ uint32_t offset;
+
+ if (stencil) {
+ debug_assert(rsc->stencil);
+ rsc = rsc->stencil;
+ pformat = rsc->base.b.format;
+ }
+
+ slice = &rsc->slices[psurf->u.tex.level];
+ offset = fd_resource_offset(rsc, psurf->u.tex.level,
psurf->u.tex.first_layer);
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
@@ -150,10 +154,10 @@ emit_gmem2mem_surf(struct fd_context *ctx,
OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
- A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) |
+ A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(pformat)) |
A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
- A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(psurf->format)));
+ A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(pformat)));
fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -163,13 +167,15 @@ static void
fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
{
struct fd4_context *fd4_ctx = fd4_context(ctx);
+ struct fd_gmem_stateobj *gmem = &ctx->gmem;
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd4_emit emit = {
.vtx = &fd4_ctx->solid_vbuf_state,
.prog = &ctx->solid_prog,
- .key = key,
- .format = fd4_emit_format(pfb->cbufs[0]),
+ .key = {
+ .half_precision = true,
+ },
};
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
@@ -238,16 +244,26 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
- fd4_program_emit(ring, &emit);
+ fd4_program_emit(ring, &emit, 0, NULL);
fd4_emit_vertex_bufs(ring, &emit);
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
- uint32_t base = depth_base(ctx);
- emit_gmem2mem_surf(ctx, base, pfb->zsbuf);
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ if (!rsc->stencil || (ctx->resolve & FD_BUFFER_DEPTH))
+ emit_gmem2mem_surf(ctx, false, ctx->gmem.zsbuf_base[0], pfb->zsbuf);
+ if (rsc->stencil && (ctx->resolve & FD_BUFFER_STENCIL))
+ emit_gmem2mem_surf(ctx, true, ctx->gmem.zsbuf_base[1], pfb->zsbuf);
}
if (ctx->resolve & FD_BUFFER_COLOR) {
- emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
+ unsigned i;
+ for (i = 0; i < pfb->nr_cbufs; i++) {
+ if (!pfb->cbufs[i])
+ continue;
+ if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
+ continue;
+ emit_gmem2mem_surf(ctx, false, gmem->cbuf_base[i], pfb->cbufs[i]);
+ }
}
OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
@@ -260,14 +276,25 @@ fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
/* transfer from system memory to gmem */
static void
-emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
- struct pipe_surface *psurf, uint32_t bin_w)
+emit_mem2gmem_surf(struct fd_context *ctx, uint32_t *bases,
+ struct pipe_surface **bufs, uint32_t nr_bufs, uint32_t bin_w)
{
struct fd_ringbuffer *ring = ctx->ring;
+ struct pipe_surface *zsbufs[2];
- emit_mrt(ring, 1, &psurf, &base, bin_w);
+ emit_mrt(ring, nr_bufs, bufs, bases, bin_w);
- fd4_emit_gmem_restore_tex(ring, psurf);
+ if (bufs[0] && (bufs[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
+ /* The gmem_restore_tex logic will put the first buffer's stencil
+ * as color. Supply it with the proper information to make that
+ * happen.
+ */
+ zsbufs[0] = zsbufs[1] = bufs[0];
+ bufs = zsbufs;
+ nr_bufs = 2;
+ }
+
+ fd4_emit_gmem_restore_tex(ring, nr_bufs, bufs);
fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
DI_SRC_SEL_AUTO_INDEX, 2, 1, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -282,10 +309,14 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd4_emit emit = {
.vtx = &fd4_ctx->blit_vbuf_state,
+ .sprite_coord_enable = 1,
+ /* NOTE: They all use the same VP, this is for vtx bufs. */
.prog = &ctx->blit_prog[0],
- .key = key,
- .format = fd4_emit_format(pfb->cbufs[0]),
+ .key = {
+ .half_precision = fd_half_precision(pfb),
+ },
};
+ unsigned char mrt_comp[A4XX_MAX_RENDER_TARGETS] = {0};
float x0, y0, x1, y1;
unsigned bin_w = tile->bin_w;
unsigned bin_h = tile->bin_h;
@@ -304,7 +335,9 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, fui(x1));
OUT_RING(ring, fui(y1));
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
+ mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0;
+
OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
A4XX_RB_MRT_CONTROL_B11 |
@@ -319,6 +352,16 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
}
+ OUT_PKT0(ring, REG_A4XX_RB_RENDER_COMPONENTS, 1);
+ OUT_RING(ring, A4XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) |
+ A4XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) |
+ A4XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) |
+ A4XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) |
+ A4XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) |
+ A4XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) |
+ A4XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) |
+ A4XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7]));
+
OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
OUT_RING(ring, 0x8); /* XXX RB_RENDER_CONTROL */
@@ -381,7 +424,6 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */
- fd4_program_emit(ring, &emit);
fd4_emit_vertex_bufs(ring, &emit);
/* for gmem pitch/base calculations, we need to use the non-
@@ -390,11 +432,46 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
bin_w = gmem->bin_w;
bin_h = gmem->bin_h;
- if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
- emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);
+ if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR)) {
+ emit.prog = &ctx->blit_prog[pfb->nr_cbufs - 1];
+ emit.fp = NULL; /* frag shader changed so clear cache */
+ fd4_program_emit(ring, &emit, pfb->nr_cbufs, pfb->cbufs);
+ emit_mem2gmem_surf(ctx, gmem->cbuf_base, pfb->cbufs, pfb->nr_cbufs, bin_w);
+ }
- if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
- emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);
+ if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+ switch (pfb->zsbuf->format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ case PIPE_FORMAT_Z32_FLOAT:
+ emit.prog = (pfb->zsbuf->format == PIPE_FORMAT_Z32_FLOAT) ?
+ &ctx->blit_z : &ctx->blit_zs;
+ emit.key.half_precision = false;
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+ OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+ A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+ A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS) |
+ A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+ OUT_RING(ring, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE);
+
+ OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+ OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */
+
+ break;
+ default:
+ /* Non-float can use a regular color write. It's split over 8-bit
+ * components, so half precision is always sufficient.
+ */
+ emit.prog = &ctx->blit_prog[0];
+ emit.key.half_precision = true;
+ break;
+ }
+ emit.fp = NULL; /* frag shader changed so clear cache */
+ fd4_program_emit(ring, &emit, 1, &pfb->zsbuf);
+ emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
+ }
OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
@@ -534,21 +611,35 @@ fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
struct fd_ringbuffer *ring = ctx->ring;
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_gmem_stateobj *gmem = &ctx->gmem;
- uint32_t reg;
- OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
- reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
if (pfb->zsbuf) {
- reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format));
- }
- OUT_RING(ring, reg);
- if (pfb->zsbuf) {
- uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
+ struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+ uint32_t cpp = rsc->cpp;
+
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
+ OUT_RING(ring, A4XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]) |
+ A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd4_pipe2depth(pfb->zsbuf->format)));
OUT_RING(ring, A4XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(cpp * gmem->bin_w));
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
+ if (rsc->stencil) {
+ OUT_RING(ring, A4XX_RB_STENCIL_INFO_SEPARATE_STENCIL |
+ A4XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
+ OUT_RING(ring, A4XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
+ } else {
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+ }
} else {
+ OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, 0x00000000);
+
+ OUT_PKT0(ring, REG_A4XX_RB_STENCIL_INFO, 2);
+ OUT_RING(ring, 0); /* RB_STENCIL_INFO */
+ OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
}
OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
@@ -586,7 +677,7 @@ fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
- emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+ emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem->cbuf_base, gmem->bin_w);
/* setup scissor/offset for current tile: */
OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index e8f5837f7ce..1a6d0142132 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -31,8 +31,6 @@
#include "util/u_memory.h"
#include "util/u_inlines.h"
#include "util/u_format.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
#include "freedreno_program.h"
@@ -53,7 +51,7 @@ create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state
enum shader_t type)
{
struct fd4_shader_stateobj *so = CALLOC_STRUCT(fd4_shader_stateobj);
- so->shader = ir3_shader_create(pctx, cso->tokens, type);
+ so->shader = ir3_shader_create(pctx, cso, type);
return so;
}
@@ -213,14 +211,17 @@ setup_stages(struct fd4_emit *emit, struct stage *s)
}
void
-fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
+fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
+ int nr, struct pipe_surface **bufs)
{
struct stage s[MAX_STAGES];
- uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+ uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
uint32_t face_regid, coord_regid, zwcoord_regid;
int constmode;
int i, j, k;
+ debug_assert(nr <= ARRAY_SIZE(color_regid));
+
setup_stages(emit, s);
/* blob seems to always use constmode currently: */
@@ -232,11 +233,30 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
psize_regid = ir3_find_output_regid(s[VS].v,
ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
- color_regid = ir3_find_output_regid(s[FS].v,
- ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+ if (s[FS].v->color0_mrt) {
+ color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
+ color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
+ ir3_find_output_regid(s[FS].v, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+ } else {
+ const struct ir3_shader_variant *fp = s[FS].v;
+ memset(color_regid, 0, sizeof(color_regid));
+ for (i = 0; i < fp->outputs_count; i++) {
+ ir3_semantic sem = fp->outputs[i].semantic;
+ unsigned idx = sem2idx(sem);
+ if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
+ continue;
+ debug_assert(idx < ARRAY_SIZE(color_regid));
+ color_regid[idx] = fp->outputs[i].regid;
+ }
+ }
+
+ /* adjust regids for alpha output formats. there is no alpha render
+ * format, so it's just treated like red
+ */
+ for (i = 0; i < nr; i++)
+ if (util_format_is_alpha(pipe_surface_format(bufs[i])))
+ color_regid[i] += 3;
- if (util_format_is_alpha(emit->pformat))
- color_regid += 3;
/* TODO get these dynamically: */
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
@@ -419,29 +439,24 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
A4XX_RB_RENDER_CONTROL2_WCOORD));
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
- OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(1) |
+ OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(MAX2(1, nr)) |
COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z));
OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
- if (s[FS].v->writes_pos) {
- OUT_RING(ring, 0x00000001 |
- A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
- A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
- } else {
- OUT_RING(ring, 0x00000001);
- }
+ OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr)) |
+ COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
+ A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid) |
- A4XX_SP_FS_MRT_REG_MRTFORMAT(emit->format) |
- COND(emit->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
- OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+ for (i = 0; i < 8; i++) {
+ enum a4xx_color_fmt format = 0;
+ if (i < nr)
+ format = fd4_emit_format(bufs[i]);
+ OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
+ A4XX_SP_FS_MRT_REG_MRTFORMAT(format) |
+ COND(emit->key.half_precision,
+ A4XX_SP_FS_MRT_REG_HALF_PRECISION));
+ }
if (emit->key.binning_pass) {
OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
@@ -450,10 +465,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
OUT_RING(ring, 0x00000000);
} else {
- uint32_t vinterp[8], flatshade[2];
+ uint32_t vinterp[8], vpsrepl[8];
memset(vinterp, 0, sizeof(vinterp));
- memset(flatshade, 0, sizeof(flatshade));
+ memset(vpsrepl, 0, sizeof(vpsrepl));
/* looks like we need to do int varyings in the frag
* shader on a4xx (no flatshad reg? or a420.0 bug?):
@@ -470,29 +485,40 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
* something like the code below instead of workaround
* in the shader:
*/
-#if 0
- /* figure out VARYING_INTERP / FLAT_SHAD register values: */
+ /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
uint32_t interp = s[FS].v->inputs[j].interpolate;
+
+ /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
+ * instead.. rather than -8 everywhere else..
+ */
+ uint32_t inloc = s[FS].v->inputs[j].inloc - 8;
+
+ /* currently assuming varyings aligned to 4 (not
+ * packed):
+ */
+ debug_assert((inloc % 4) == 0);
+
if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
- /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
- * instead.. rather than -8 everywhere else..
- */
- uint32_t loc = s[FS].v->inputs[j].inloc - 8;
-
- /* currently assuming varyings aligned to 4 (not
- * packed):
- */
- debug_assert((loc % 4) == 0);
+ uint32_t loc = inloc;
for (i = 0; i < 4; i++, loc++) {
vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
- flatshade[loc / 32] |= 1 << (loc % 32);
+ //flatshade[loc / 32] |= 1 << (loc % 32);
}
}
+
+ /* Replace the .xy coordinates with S/T from the point sprite. Set
+ * interpolation bits for .zw such that they become .01
+ */
+ if (emit->sprite_coord_enable & (1 << sem2idx(s[FS].v->inputs[j].semantic))) {
+ vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+ << ((inloc % 16) * 2);
+ vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+ vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+ }
}
-#endif
OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
@@ -509,7 +535,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
for (i = 0; i < 8; i++)
- OUT_RING(ring, s[FS].v->shader->vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
+ OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */
}
if (s[VS].instrlen)
@@ -520,19 +546,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
emit_shader(ring, s[FS].v);
}
-/* hack.. until we figure out how to deal w/ vpsrepl properly.. */
-static void
-fix_blit_fp(struct pipe_context *pctx)
-{
- struct fd_context *ctx = fd_context(pctx);
- struct fd4_shader_stateobj *so = ctx->blit_prog[0].fp;
-
- so->shader->vpsrepl[0] = 0x99999999;
- so->shader->vpsrepl[1] = 0x99999999;
- so->shader->vpsrepl[2] = 0x99999999;
- so->shader->vpsrepl[3] = 0x99999999;
-}
-
void
fd4_prog_init(struct pipe_context *pctx)
{
@@ -543,6 +556,4 @@ fd4_prog_init(struct pipe_context *pctx)
pctx->delete_vs_state = fd4_vp_state_delete;
fd_prog_init(pctx);
-
- fix_blit_fp(pctx);
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.h b/src/gallium/drivers/freedreno/a4xx/fd4_program.h
index 52306a4c60d..8dfccaf9d74 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.h
@@ -39,7 +39,8 @@ struct fd4_shader_stateobj {
struct fd4_emit;
-void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit);
+void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
+ int nr, struct pipe_surface **bufs);
void fd4_prog_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
index 6db1c11b94b..4f69e0c1694 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_query.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
@@ -31,9 +31,93 @@
#include "freedreno_util.h"
#include "fd4_query.h"
+#include "fd4_draw.h"
#include "fd4_format.h"
+
+struct fd_rb_samp_ctrs {
+ uint64_t ctr[16];
+};
+
+/*
+ * Occlusion Query:
+ *
+ * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
+ * interpret results
+ */
+
+static struct fd_hw_sample *
+occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ struct fd_hw_sample *samp =
+ fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));
+
+ /* low bits of sample addr should be zero (since they are control
+ * flags in RB_SAMPLE_COUNT_CONTROL):
+ */
+ debug_assert((samp->offset & 0x3) == 0);
+
+ /* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
+ * HW_QUERY_BASE_REG register:
+ */
+ OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+ OUT_RING(ring, CP_REG(REG_A4XX_RB_SAMPLE_COUNT_CONTROL) | 0x80000000);
+ OUT_RING(ring, HW_QUERY_BASE_REG);
+ OUT_RING(ring, A4XX_RB_SAMPLE_COUNT_CONTROL_COPY |
+ samp->offset);
+
+ OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, 3);
+ OUT_RING(ring, DRAW4(DI_PT_POINTLIST_PSIZE, DI_SRC_SEL_AUTO_INDEX,
+ INDEX4_SIZE_32_BIT, USE_VISIBILITY));
+ OUT_RING(ring, 1); /* NumInstances */
+ OUT_RING(ring, 0); /* NumIndices */
+
+ fd_event_write(ctx, ring, ZPASS_DONE);
+
+ return samp;
+}
+
+static uint64_t
+count_samples(const struct fd_rb_samp_ctrs *start,
+ const struct fd_rb_samp_ctrs *end)
+{
+ return end->ctr[0] - start->ctr[0];
+}
+
+static void
+occlusion_counter_accumulate_result(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result)
+{
+ uint64_t n = count_samples(start, end);
+ result->u64 += n;
+}
+
+static void
+occlusion_predicate_accumulate_result(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result)
+{
+ uint64_t n = count_samples(start, end);
+ result->b |= (n > 0);
+}
+
+static const struct fd_hw_sample_provider occlusion_counter = {
+ .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
+ .active = FD_STAGE_DRAW,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_counter_accumulate_result,
+};
+
+static const struct fd_hw_sample_provider occlusion_predicate = {
+ .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
+ .active = FD_STAGE_DRAW,
+ .get_sample = occlusion_get_sample,
+ .accumulate_result = occlusion_predicate_accumulate_result,
+};
+
void fd4_query_context_init(struct pipe_context *pctx)
{
- /* TODO */
+ fd_hw_query_register_provider(pctx, &occlusion_counter);
+ fd_hw_query_register_provider(pctx, &occlusion_predicate);
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
index e54b606a285..dc7e98b149d 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -50,7 +50,7 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
if (cso->point_size_per_vertex) {
psize_min = util_get_min_point_size(cso);
- psize_max = 8192;
+ psize_max = 4092;
} else {
/* Force the point size to be as if the vertex output was disabled. */
psize_min = cso->point_size;
@@ -67,9 +67,9 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
*/
so->gras_cl_clip_cntl = 0x80000; /* ??? */
so->gras_su_point_minmax =
- A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
- A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
- so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
+ A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+ A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+ so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size);
so->gras_su_poly_offset_scale =
A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
so->gras_su_poly_offset_offset =
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
index 06c728f2f1f..64e81a9983b 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
@@ -44,7 +44,7 @@ struct fd4_rasterizer_stateobj {
uint32_t pc_prim_vtx_cntl;
};
-static INLINE struct fd4_rasterizer_stateobj *
+static inline struct fd4_rasterizer_stateobj *
fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
{
return (struct fd4_rasterizer_stateobj *)rast;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
index e8cbb2d201a..d8ea414f300 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
@@ -102,7 +102,7 @@ void
fd4_screen_init(struct pipe_screen *pscreen)
{
struct fd_screen *screen = fd_screen(pscreen);
- screen->max_rts = 1;
+ screen->max_rts = A4XX_MAX_RENDER_TARGETS;
screen->compiler = ir3_compiler_create(screen->gpu_id);
pscreen->context_create = fd4_context_create;
pscreen->is_format_supported = fd4_screen_is_format_supported;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 6ba25d0816d..d2bc5fee6c0 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -150,8 +150,8 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
{
struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
struct fd_resource *rsc = fd_resource(prsc);
- unsigned lvl = cso->u.tex.first_level;
- unsigned miplevels = cso->u.tex.last_level - lvl;
+ unsigned lvl = fd_sampler_first_level(cso);
+ unsigned miplevels = fd_sampler_last_level(cso) - lvl;
if (!so)
return NULL;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
index 579ed87f14b..84ee7ecb50c 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
@@ -42,7 +42,7 @@ struct fd4_sampler_stateobj {
uint32_t texsamp0, texsamp1;
};
-static INLINE struct fd4_sampler_stateobj *
+static inline struct fd4_sampler_stateobj *
fd4_sampler_stateobj(struct pipe_sampler_state *samp)
{
return (struct fd4_sampler_stateobj *)samp;
@@ -53,7 +53,7 @@ struct fd4_pipe_sampler_view {
uint32_t texconst0, texconst1, texconst2, texconst3, textconst4;
};
-static INLINE struct fd4_pipe_sampler_view *
+static inline struct fd4_pipe_sampler_view *
fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
{
return (struct fd4_pipe_sampler_view *)pview;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
index 033317cf620..6a92a9b6785 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
@@ -47,7 +47,7 @@ struct fd4_zsa_stateobj {
uint32_t rb_stencilrefmask_bf;
};
-static INLINE struct fd4_zsa_stateobj *
+static inline struct fd4_zsa_stateobj *
fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
{
return (struct fd4_zsa_stateobj *)zsa;
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index b23aa830770..00b6acba065 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -8,15 +8,15 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
-Copyright (C) 2013-2014 by the following authors:
+Copyright (C) 2013-2015 by the following authors:
- Rob Clark (robclark)
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index 2b24c5b4e78..98a90e26679 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -8,13 +8,13 @@ http://github.com/freedreno/envytools/
git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2014-11-13 22:44:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14895 bytes, from 2015-04-19 15:23:28)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-04-12 18:16:35)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 59314 bytes, from 2015-04-19 16:21:40)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 66709 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63728 bytes, from 2015-08-05 18:07:28)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark (robclark)
@@ -67,7 +67,7 @@ enum vgt_event_type {
enum pc_di_primtype {
DI_PT_NONE = 0,
- DI_PT_POINTLIST_A2XX = 1,
+ DI_PT_POINTLIST_PSIZE = 1,
DI_PT_LINELIST = 2,
DI_PT_LINESTRIP = 3,
DI_PT_TRILIST = 4,
@@ -75,7 +75,7 @@ enum pc_di_primtype {
DI_PT_TRISTRIP = 6,
DI_PT_LINELOOP = 7,
DI_PT_RECTLIST = 8,
- DI_PT_POINTLIST_A3XX = 9,
+ DI_PT_POINTLIST = 9,
DI_PT_LINE_ADJ = 10,
DI_PT_LINESTRIP_ADJ = 11,
DI_PT_TRI_ADJ = 12,
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 668ef3629bf..8e6d43150ce 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -94,9 +94,7 @@ void
fd_context_render(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
- struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct fd_resource *rsc, *rsc_tmp;
- int i;
DBG("needs_flush: %d", ctx->needs_flush);
@@ -118,20 +116,11 @@ fd_context_render(struct pipe_context *pctx)
ctx->gmem_reason = 0;
ctx->num_draws = 0;
- for (i = 0; i < pfb->nr_cbufs; i++)
- if (pfb->cbufs[i])
- fd_resource(pfb->cbufs[i]->texture)->dirty = false;
- if (pfb->zsbuf) {
- rsc = fd_resource(pfb->zsbuf->texture);
- rsc->dirty = false;
- if (rsc->stencil)
- rsc->stencil->dirty = false;
- }
-
/* go through all the used resources and clear their reading flag */
LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &ctx->used_resources, list) {
- assert(rsc->reading);
- rsc->reading = false;
+ debug_assert(rsc->status != 0);
+ rsc->status = 0;
+ rsc->pending_ctx = NULL;
list_delinit(&rsc->list);
}
@@ -144,8 +133,10 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
{
fd_context_render(pctx);
- if (fence)
+ if (fence) {
+ fd_screen_fence_ref(pctx->screen, fence, NULL);
*fence = fd_fence_create(pctx);
+ }
}
void
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index e420f1e5bd9..509a90fdf23 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -82,6 +82,20 @@ struct fd_vertex_stateobj {
unsigned num_elements;
};
+struct fd_streamout_stateobj {
+ struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
+ unsigned num_targets;
+ /* Track offset from vtxcnt for streamout data. This counter
+ * is just incremented by # of vertices on each draw until
+ * reset or new streamout buffer bound.
+ *
+ * When we eventually have GS, the CPU won't actually know the
+ * number of vertices per draw, so I think we'll have to do
+ * something more clever.
+ */
+ unsigned offsets[PIPE_MAX_SO_BUFFERS];
+};
+
/* group together the vertex and vertexbuf state.. for ease of passing
* around, and because various internal operations (gmem<->mem, etc)
* need their own vertex state:
@@ -179,7 +193,7 @@ struct fd_context {
struct fd_program_stateobj solid_prog; // TODO move to screen?
/* shaders used by mem->gmem blits: */
- struct fd_program_stateobj blit_prog[8]; // TODO move to screen?
+ struct fd_program_stateobj blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen?
struct fd_program_stateobj blit_z, blit_zs;
/* do we need to mem2gmem before rendering. We don't, if for example,
@@ -319,6 +333,7 @@ struct fd_context {
FD_DIRTY_VTXBUF = (1 << 15),
FD_DIRTY_INDEXBUF = (1 << 16),
FD_DIRTY_SCISSOR = (1 << 17),
+ FD_DIRTY_STREAMOUT = (1 << 18),
} dirty;
struct pipe_blend_state *blend;
@@ -339,6 +354,7 @@ struct fd_context {
struct pipe_viewport_state viewport;
struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
struct pipe_index_buffer indexbuf;
+ struct fd_streamout_stateobj streamout;
/* GMEM/tile handling fxns: */
void (*emit_tile_init)(struct fd_context *ctx);
@@ -351,18 +367,25 @@ struct fd_context {
void (*emit_sysmem_prep)(struct fd_context *ctx);
/* draw: */
- void (*draw_vbo)(struct fd_context *pctx, const struct pipe_draw_info *info);
+ void (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info);
void (*clear)(struct fd_context *ctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil);
+
+ /* constant emit: (note currently not used/needed for a2xx) */
+ void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type,
+ uint32_t regid, uint32_t offset, uint32_t sizedwords,
+ const uint32_t *dwords, struct pipe_resource *prsc);
+ void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write,
+ uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets);
};
-static INLINE struct fd_context *
+static inline struct fd_context *
fd_context(struct pipe_context *pctx)
{
return (struct fd_context *)pctx;
}
-static INLINE struct pipe_scissor_state *
+static inline struct pipe_scissor_state *
fd_context_get_scissor(struct fd_context *ctx)
{
if (ctx->rasterizer && ctx->rasterizer->scissor)
@@ -370,13 +393,13 @@ fd_context_get_scissor(struct fd_context *ctx)
return &ctx->disabled_scissor;
}
-static INLINE bool
+static inline bool
fd_supported_prim(struct fd_context *ctx, unsigned prim)
{
return (1 << prim) & ctx->primtype_mask;
}
-static INLINE void
+static inline void
fd_reset_wfi(struct fd_context *ctx)
{
ctx->needs_wfi = true;
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index c9e317c7dc9..6831a58749c 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -40,7 +40,8 @@
#include "freedreno_util.h"
static void
-resource_reading(struct fd_context *ctx, struct pipe_resource *prsc)
+resource_used(struct fd_context *ctx, struct pipe_resource *prsc,
+ enum fd_resource_status status)
{
struct fd_resource *rsc;
@@ -48,9 +49,29 @@ resource_reading(struct fd_context *ctx, struct pipe_resource *prsc)
return;
rsc = fd_resource(prsc);
- rsc->reading = true;
+ rsc->status |= status;
+ if (rsc->stencil)
+ rsc->stencil->status |= status;
+
+ /* TODO resources can actually be shared across contexts,
+ * so I'm not sure a single list-head will do the trick?
+ */
+ debug_assert((rsc->pending_ctx == ctx) || !rsc->pending_ctx);
list_delinit(&rsc->list);
list_addtail(&rsc->list, &ctx->used_resources);
+ rsc->pending_ctx = ctx;
+}
+
+static void
+resource_read(struct fd_context *ctx, struct pipe_resource *prsc)
+{
+ resource_used(ctx, prsc, FD_PENDING_READ);
+}
+
+static void
+resource_written(struct fd_context *ctx, struct pipe_resource *prsc)
+{
+ resource_used(ctx, prsc, FD_PENDING_WRITE);
}
static void
@@ -59,7 +80,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
struct fd_context *ctx = fd_context(pctx);
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
- unsigned i, buffers = 0;
+ unsigned i, prims, buffers = 0;
/* if we supported transform feedback, we'd have to disable this: */
if (((scissor->maxx - scissor->minx) *
@@ -69,6 +90,8 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
/* emulate unsupported primitives: */
if (!fd_supported_prim(ctx, info->mode)) {
+ if (ctx->streamout.num_targets > 0)
+ debug_error("stream-out with emulated prims");
util_primconvert_save_index_buffer(ctx->primconvert, &ctx->indexbuf);
util_primconvert_save_rasterizer_state(ctx->primconvert, ctx->rasterizer);
util_primconvert_draw_vbo(ctx->primconvert, info);
@@ -83,17 +106,13 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
if (fd_depth_enabled(ctx)) {
buffers |= FD_BUFFER_DEPTH;
- fd_resource(pfb->zsbuf->texture)->dirty = true;
+ resource_written(ctx, pfb->zsbuf->texture);
ctx->gmem_reason |= FD_GMEM_DEPTH_ENABLED;
}
if (fd_stencil_enabled(ctx)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
buffers |= FD_BUFFER_STENCIL;
- if (rsc->stencil)
- rsc->stencil->dirty = true;
- else
- rsc->dirty = true;
+ resource_written(ctx, pfb->zsbuf->texture);
ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
}
@@ -108,7 +127,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
surf = pfb->cbufs[i]->texture;
- fd_resource(surf)->dirty = true;
+ resource_written(ctx, surf);
buffers |= PIPE_CLEAR_COLOR0 << i;
if (surf->nr_samples > 1)
@@ -120,32 +139,38 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
/* Skip over buffer 0, that is sent along with the command stream */
for (i = 1; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
- resource_reading(ctx, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
- resource_reading(ctx, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
+ resource_read(ctx, ctx->constbuf[PIPE_SHADER_VERTEX].cb[i].buffer);
+ resource_read(ctx, ctx->constbuf[PIPE_SHADER_FRAGMENT].cb[i].buffer);
}
/* Mark VBOs as being read */
for (i = 0; i < ctx->vtx.vertexbuf.count; i++) {
assert(!ctx->vtx.vertexbuf.vb[i].user_buffer);
- resource_reading(ctx, ctx->vtx.vertexbuf.vb[i].buffer);
+ resource_read(ctx, ctx->vtx.vertexbuf.vb[i].buffer);
}
/* Mark index buffer as being read */
- resource_reading(ctx, ctx->indexbuf.buffer);
+ resource_read(ctx, ctx->indexbuf.buffer);
/* Mark textures as being read */
for (i = 0; i < ctx->verttex.num_textures; i++)
if (ctx->verttex.textures[i])
- resource_reading(ctx, ctx->verttex.textures[i]->texture);
+ resource_read(ctx, ctx->verttex.textures[i]->texture);
for (i = 0; i < ctx->fragtex.num_textures; i++)
if (ctx->fragtex.textures[i])
- resource_reading(ctx, ctx->fragtex.textures[i]->texture);
+ resource_read(ctx, ctx->fragtex.textures[i]->texture);
+
+ /* Mark streamout buffers as being written.. */
+ for (i = 0; i < ctx->streamout.num_targets; i++)
+ if (ctx->streamout.targets[i])
+ resource_written(ctx, ctx->streamout.targets[i]->buffer);
ctx->num_draws++;
+ prims = u_reduced_prims_for_vertices(info->mode, info->count);
+
ctx->stats.draw_calls++;
- ctx->stats.prims_emitted +=
- u_reduced_prims_for_vertices(info->mode, info->count);
+ ctx->stats.prims_emitted += prims;
/* any buffers that haven't been cleared yet, we need to restore: */
ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared);
@@ -159,6 +184,9 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW);
ctx->draw_vbo(ctx, info);
+ for (i = 0; i < ctx->streamout.num_targets; i++)
+ ctx->streamout.offsets[i] += prims;
+
/* if an app (or, well, piglit test) does many thousands of draws
* without flush (or anything which implicitly flushes, like
* changing render targets), we can exceed the ringbuffer size.
@@ -216,15 +244,10 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR)
for (i = 0; i < pfb->nr_cbufs; i++)
if (buffers & (PIPE_CLEAR_COLOR0 << i))
- fd_resource(pfb->cbufs[i]->texture)->dirty = true;
+ resource_written(ctx, pfb->cbufs[i]->texture);
if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
- struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
- if (rsc->stencil && buffers & PIPE_CLEAR_STENCIL)
- rsc->stencil->dirty = true;
- if (!rsc->stencil || buffers & PIPE_CLEAR_DEPTH)
- rsc->dirty = true;
-
+ resource_written(ctx, pfb->zsbuf->texture);
ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
}
@@ -242,7 +265,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
FD_DIRTY_SAMPLE_MASK |
FD_DIRTY_PROG |
FD_DIRTY_CONSTBUF |
- FD_DIRTY_BLEND;
+ FD_DIRTY_BLEND |
+ FD_DIRTY_FRAMEBUFFER;
if (fd_mesa_debug & FD_DBG_DCLEAR)
ctx->dirty = 0xffffffff;
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c b/src/gallium/drivers/freedreno/freedreno_fence.c
index 375e58f7022..04a9feacd58 100644
--- a/src/gallium/drivers/freedreno/freedreno_fence.c
+++ b/src/gallium/drivers/freedreno/freedreno_fence.c
@@ -69,6 +69,9 @@ boolean fd_screen_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
{
+ if (!timeout)
+ return fd_screen_fence_signalled(screen, fence);
+
if (fd_pipe_wait(fence->screen->pipe, fence->timestamp))
return false;
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index c105378ec4e..648db9baee5 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -82,7 +82,7 @@ total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
{
uint32_t total = 0, i;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < MAX_RENDER_TARGETS; i++) {
if (cbuf_cpp[i]) {
gmem->cbuf_base[i] = align(total, 0x4000);
total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
@@ -113,7 +113,7 @@ calculate_tiles(struct fd_context *ctx)
uint32_t nbins_x = 1, nbins_y = 1;
uint32_t bin_w, bin_h;
uint32_t max_width = bin_width(ctx);
- uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp[2] = {0};
+ uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
uint32_t i, j, t, xoff, yoff;
uint32_t tpp_x, tpp_y;
bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
@@ -162,12 +162,17 @@ calculate_tiles(struct fd_context *ctx)
bin_w = align(width / nbins_x, 32);
}
+ if (fd_mesa_debug & FD_DBG_MSGS) {
+ debug_printf("binning input: cbuf cpp:");
+ for (i = 0; i < pfb->nr_cbufs; i++)
+ debug_printf(" %d", cbuf_cpp[i]);
+ debug_printf(", zsbuf cpp: %d; %dx%d\n",
+ zsbuf_cpp[0], width, height);
+ }
+
/* then find a bin width/height that satisfies the memory
* constraints:
*/
- DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d",
- cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp[0],
- width, height);
while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
if (bin_w > bin_h) {
nbins_x++;
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h
index 5867235db90..38b557eb077 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.h
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.h
@@ -31,6 +31,8 @@
#include "pipe/p_context.h"
+#include "freedreno_util.h"
+
/* per-pipe configuration for hw binning: */
struct fd_vsc_pipe {
struct fd_bo *bo;
@@ -47,9 +49,9 @@ struct fd_tile {
struct fd_gmem_stateobj {
struct pipe_scissor_state scissor;
- uint32_t cbuf_base[4];
+ uint32_t cbuf_base[MAX_RENDER_TARGETS];
uint32_t zsbuf_base[2];
- uint8_t cbuf_cpp[4];
+ uint8_t cbuf_cpp[MAX_RENDER_TARGETS];
uint8_t zsbuf_cpp[2];
uint16_t bin_h, nbins_y;
uint16_t bin_w, nbins_x;
diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c
index 5e344e69146..e6a647852a3 100644
--- a/src/gallium/drivers/freedreno/freedreno_program.c
+++ b/src/gallium/drivers/freedreno/freedreno_program.c
@@ -96,7 +96,11 @@ fd_prog_blit(struct pipe_context *pctx, int rts, bool depth)
{
int i;
struct ureg_src tc;
- struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ struct ureg_program *ureg;
+
+ debug_assert(rts <= MAX_RENDER_TARGETS);
+
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
if (!ureg)
return NULL;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 95f79df565e..709ad4eb55b 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -42,6 +42,14 @@
#include
+
+static bool
+pending(struct fd_resource *rsc, enum fd_resource_status status)
+{
+ return (rsc->status & status) ||
+ (rsc->stencil && (rsc->stencil->status & status));
+}
+
static void
fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
{
@@ -72,11 +80,11 @@ fd_invalidate_resource(struct fd_context *ctx, struct pipe_resource *prsc)
/* Textures */
for (i = 0; i < ctx->verttex.num_textures && !(ctx->dirty & FD_DIRTY_VERTTEX); i++) {
- if (ctx->verttex.textures[i]->texture == prsc)
+ if (ctx->verttex.textures[i] && (ctx->verttex.textures[i]->texture == prsc))
ctx->dirty |= FD_DIRTY_VERTTEX;
}
for (i = 0; i < ctx->fragtex.num_textures && !(ctx->dirty & FD_DIRTY_FRAGTEX); i++) {
- if (ctx->fragtex.textures[i]->texture == prsc)
+ if (ctx->fragtex.textures[i] && (ctx->fragtex.textures[i]->texture == prsc))
ctx->dirty |= FD_DIRTY_FRAGTEX;
}
}
@@ -97,7 +105,8 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
rsc->bo = fd_bo_new(screen->dev, size, flags);
rsc->timestamp = 0;
- rsc->dirty = rsc->reading = false;
+ rsc->status = 0;
+ rsc->pending_ctx = NULL;
list_delinit(&rsc->list);
util_range_set_empty(&rsc->valid_buffer_range);
}
@@ -238,8 +247,9 @@ fd_resource_transfer_map(struct pipe_context *pctx,
/* If the GPU is writing to the resource, or if it is reading from the
* resource and we're trying to write to it, flush the renders.
*/
- if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty) ||
- ((ptrans->usage & PIPE_TRANSFER_WRITE) && rsc->reading))
+ if (((ptrans->usage & PIPE_TRANSFER_WRITE) &&
+ pending(rsc, FD_PENDING_READ | FD_PENDING_WRITE)) ||
+ pending(rsc, FD_PENDING_WRITE))
fd_context_render(pctx);
/* The GPU keeps track of how the various bo's are being used, and
@@ -646,6 +656,8 @@ fd_blitter_pipe_begin(struct fd_context *ctx)
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
+ util_blitter_save_so_targets(ctx->blitter, ctx->streamout.num_targets,
+ ctx->streamout.targets);
util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
util_blitter_save_scissor(ctx->blitter, &ctx->scissor);
@@ -675,7 +687,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
{
struct fd_resource *rsc = fd_resource(prsc);
- if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty))
+ if (pending(rsc, FD_PENDING_WRITE | FD_PENDING_READ))
fd_context_render(pctx);
}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 0634923fcb2..7549becaa1f 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -60,6 +60,15 @@ struct fd_resource_slice {
uint32_t size0; /* size of first layer in slice */
};
+/* status of queued up but not flushed reads and write operations.
+ * In _transfer_map() we need to know if queued up rendering needs
+ * to be flushed to preserve the order of cpu and gpu access.
+ */
+enum fd_resource_status {
+ FD_PENDING_WRITE = 0x01,
+ FD_PENDING_READ = 0x02,
+};
+
struct fd_resource {
struct u_resource base;
struct fd_bo *bo;
@@ -68,17 +77,23 @@ struct fd_resource {
uint32_t layer_size;
struct fd_resource_slice slices[MAX_MIP_LEVELS];
uint32_t timestamp;
- bool dirty, reading;
/* buffer range that has been initialized */
struct util_range valid_buffer_range;
/* reference to the resource holding stencil data for a z32_s8 texture */
+ /* TODO rename to secondary or auxiliary? */
struct fd_resource *stencil;
+ /* pending read/write state: */
+ enum fd_resource_status status;
+ /* resources accessed by queued but not flushed draws are tracked
+ * in the used_resources list.
+ */
struct list_head list;
+ struct fd_context *pending_ctx;
};
-static INLINE struct fd_resource *
+static inline struct fd_resource *
fd_resource(struct pipe_resource *ptex)
{
return (struct fd_resource *)ptex;
@@ -89,13 +104,13 @@ struct fd_transfer {
void *staging;
};
-static INLINE struct fd_transfer *
+static inline struct fd_transfer *
fd_transfer(struct pipe_transfer *ptrans)
{
return (struct fd_transfer *)ptrans;
}
-static INLINE struct fd_resource_slice *
+static inline struct fd_resource_slice *
fd_resource_slice(struct fd_resource *rsc, unsigned level)
{
assert(level <= rsc->base.b.last_level);
@@ -103,7 +118,7 @@ fd_resource_slice(struct fd_resource *rsc, unsigned level)
}
/* get offset for specified mipmap level and texture/array layer */
-static INLINE uint32_t
+static inline uint32_t
fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
{
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index b3b5462b437..b55f5b36ca9 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -68,7 +68,8 @@ static const struct debug_named_value debug_options[] = {
{"fraghalf", FD_DBG_FRAGHALF, "Use half-precision in fragment shader"},
{"nobin", FD_DBG_NOBIN, "Disable hw binning"},
{"optmsgs", FD_DBG_OPTMSGS,"Enable optimizer debug messages"},
- {"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 120 (rather than 130) on a3xx+"},
+ {"glsl120", FD_DBG_GLSL120,"Temporary flag to force GLSL 1.20 (rather than 1.30) on a3xx+"},
+ {"shaderdb", FD_DBG_SHADERDB, "Enable shaderdb output"},
DEBUG_NAMED_VALUE_END
};
@@ -163,9 +164,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_CUBE_MAP_ARRAY:
- case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
- case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
- case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_COMPUTE:
@@ -175,10 +173,23 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_PRIMITIVE_RESTART:
case PIPE_CAP_TGSI_INSTANCEID:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
- return is_a3xx(screen) || is_a4xx(screen);
-
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ return is_a3xx(screen) || is_a4xx(screen);
+
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ /* ignoring first/last_element.. but I guess that should be
+ * easy to add..
+ */
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ /* I think 32k on a4xx.. and we could possibly emulate more
+ * by pretending 2d/rect textures and splitting high bits
+ * of index into 2nd dimension..
+ */
+ return 16383;
+
case PIPE_CAP_DEPTH_CLIP_DISABLE:
return is_a3xx(screen);
@@ -188,7 +199,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_GLSL_FEATURE_LEVEL:
if (glsl120)
return 120;
- return (is_a3xx(screen) || is_a4xx(screen)) ? 130 : 120;
+ return is_ir3(screen) ? 130 : 120;
/* Unsupported features. */
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
@@ -218,6 +229,10 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
@@ -225,9 +240,17 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* Stream output. */
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ if (is_ir3(screen))
+ return PIPE_MAX_SO_BUFFERS;
+ return 0;
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ if (is_ir3(screen))
+ return 1;
+ return 0;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ if (is_ir3(screen))
+ return 16 * 4; /* should only be shader out limit? */
return 0;
/* Geometry shader output, unsupported. */
@@ -258,9 +281,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_QUERY_TIMESTAMP:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
- /* TODO still missing on a4xx, but we lie to get gl2..
- * it's not a feature, it's a bug!
- */
return is_a3xx(screen) || is_a4xx(screen);
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
@@ -357,7 +377,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
*/
return ((is_a3xx(screen) || is_a4xx(screen)) ? 4096 : 64) * sizeof(float[4]);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
- return (is_a3xx(screen) || is_a4xx(screen)) ? 16 : 1;
+ return is_ir3(screen) ? 16 : 1;
case PIPE_SHADER_CAP_MAX_PREDS:
return 0; /* nothing uses this */
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
@@ -379,7 +399,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_INTEGERS:
if (glsl120)
return 0;
- return (is_a3xx(screen) || is_a4xx(screen)) ? 1 : 0;
+ return is_ir3(screen) ? 1 : 0;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
return 16;
@@ -546,7 +566,6 @@ fd_screen_create(struct fd_device *dev)
pscreen->get_timestamp = fd_screen_get_timestamp;
pscreen->fence_reference = fd_screen_fence_ref;
- pscreen->fence_signalled = fd_screen_fence_signalled;
pscreen->fence_finish = fd_screen_fence_finish;
util_format_s3tc_init();
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index dbc2808262a..4e5c3a61958 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -56,7 +56,7 @@ struct fd_screen {
int64_t cpu_gpu_time_delta;
};
-static INLINE struct fd_screen *
+static inline struct fd_screen *
fd_screen(struct pipe_screen *pscreen)
{
return (struct fd_screen *)pscreen;
@@ -73,6 +73,7 @@ struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen,
struct pipe_screen * fd_screen_create(struct fd_device *dev);
/* is a3xx patch revision 0? */
+/* TODO a306.0 probably doesn't need this.. be more clever?? */
static inline boolean
is_a3xx_p0(struct fd_screen *screen)
{
@@ -91,4 +92,11 @@ is_a4xx(struct fd_screen *screen)
return (screen->gpu_id >= 400) && (screen->gpu_id < 500);
}
+/* is it using the ir3 compiler (shader isa introduced with a3xx)? */
+static inline boolean
+is_ir3(struct fd_screen *screen)
+{
+ return is_a3xx(screen) || is_a4xx(screen);
+}
+
#endif /* FREEDRENO_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
index 77aa4f21d3b..7bf8bdb4507 100644
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -300,6 +300,67 @@ fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
ctx->dirty |= FD_DIRTY_VTXSTATE;
}
+static struct pipe_stream_output_target *
+fd_create_stream_output_target(struct pipe_context *pctx,
+ struct pipe_resource *prsc, unsigned buffer_offset,
+ unsigned buffer_size)
+{
+ struct pipe_stream_output_target *target;
+
+ target = CALLOC_STRUCT(pipe_stream_output_target);
+ if (!target)
+ return NULL;
+
+ pipe_reference_init(&target->reference, 1);
+ pipe_resource_reference(&target->buffer, prsc);
+
+ target->context = pctx;
+ target->buffer_offset = buffer_offset;
+ target->buffer_size = buffer_size;
+
+ return target;
+}
+
+static void
+fd_stream_output_target_destroy(struct pipe_context *pctx,
+ struct pipe_stream_output_target *target)
+{
+ pipe_resource_reference(&target->buffer, NULL);
+ FREE(target);
+}
+
+static void
+fd_set_stream_output_targets(struct pipe_context *pctx,
+ unsigned num_targets, struct pipe_stream_output_target **targets,
+ const unsigned *offsets)
+{
+ struct fd_context *ctx = fd_context(pctx);
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+ unsigned i;
+
+ debug_assert(num_targets <= ARRAY_SIZE(so->targets));
+
+ for (i = 0; i < num_targets; i++) {
+ boolean changed = targets[i] != so->targets[i];
+ boolean append = (offsets[i] == (unsigned)-1);
+
+ if (!changed && append)
+ continue;
+
+ so->offsets[i] = 0;
+
+ pipe_so_target_reference(&so->targets[i], targets[i]);
+ }
+
+ for (; i < so->num_targets; i++) {
+ pipe_so_target_reference(&so->targets[i], NULL);
+ }
+
+ so->num_targets = num_targets;
+
+ ctx->dirty |= FD_DIRTY_STREAMOUT;
+}
+
void
fd_state_init(struct pipe_context *pctx)
{
@@ -328,4 +389,8 @@ fd_state_init(struct pipe_context *pctx)
pctx->create_vertex_elements_state = fd_vertex_state_create;
pctx->delete_vertex_elements_state = fd_vertex_state_delete;
pctx->bind_vertex_elements_state = fd_vertex_state_bind;
+
+ pctx->create_stream_output_target = fd_create_stream_output_target;
+ pctx->stream_output_target_destroy = fd_stream_output_target_destroy;
+ pctx->set_stream_output_targets = fd_set_stream_output_targets;
}
diff --git a/src/gallium/drivers/freedreno/freedreno_surface.c b/src/gallium/drivers/freedreno/freedreno_surface.c
index 250fe4bc0f5..70c44eb79c3 100644
--- a/src/gallium/drivers/freedreno/freedreno_surface.c
+++ b/src/gallium/drivers/freedreno/freedreno_surface.c
@@ -41,7 +41,8 @@ fd_create_surface(struct pipe_context *pctx,
// struct fd_resource* tex = fd_resource(ptex);
struct fd_surface* surface = CALLOC_STRUCT(fd_surface);
- assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+ debug_assert(ptex->target != PIPE_BUFFER);
+ debug_assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
if (surface) {
struct pipe_surface *psurf = &surface->base;
diff --git a/src/gallium/drivers/freedreno/freedreno_surface.h b/src/gallium/drivers/freedreno/freedreno_surface.h
index 3293f33dd84..2de37cee2dd 100644
--- a/src/gallium/drivers/freedreno/freedreno_surface.h
+++ b/src/gallium/drivers/freedreno/freedreno_surface.h
@@ -40,7 +40,7 @@ struct fd_surface {
uint16_t depth;
};
-static INLINE struct fd_surface *
+static inline struct fd_surface *
fd_surface(struct pipe_surface *psurf)
{
return (struct fd_surface *)psurf;
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
index deb0e602ce2..7129a1bddd1 100644
--- a/src/gallium/drivers/freedreno/freedreno_util.h
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -40,6 +40,7 @@
#include "util/u_dynarray.h"
#include "util/u_pack_color.h"
+#include "disasm.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
@@ -53,6 +54,12 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
/* TBD if it is same on a2xx, but for now: */
#define MAX_MIP_LEVELS A3XX_MAX_MIP_LEVELS
+#define A2XX_MAX_RENDER_TARGETS 1
+#define A3XX_MAX_RENDER_TARGETS 4
+#define A4XX_MAX_RENDER_TARGETS 8
+
+#define MAX_RENDER_TARGETS A4XX_MAX_RENDER_TARGETS
+
#define FD_DBG_MSGS 0x0001
#define FD_DBG_DISASM 0x0002
#define FD_DBG_DCLEAR 0x0004
@@ -64,6 +71,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
#define FD_DBG_NOBIN 0x0100
#define FD_DBG_OPTMSGS 0x0200
#define FD_DBG_GLSL120 0x0400
+#define FD_DBG_SHADERDB 0x0800
extern int fd_mesa_debug;
extern bool fd_binning_enabled;
@@ -108,6 +116,58 @@ pipe_surface_format(struct pipe_surface *psurf)
return psurf->format;
}
+static inline bool
+fd_surface_half_precision(const struct pipe_surface *psurf)
+{
+ enum pipe_format format;
+
+ if (!psurf)
+ return true;
+
+ format = psurf->format;
+
+ /* colors are provided in consts, which go through cov.f32f16, which will
+ * break these values
+ */
+ if (util_format_is_pure_integer(format))
+ return false;
+
+ /* avoid losing precision on 32-bit float formats */
+ if (util_format_is_float(format) &&
+ util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 32)
+ return false;
+
+ return true;
+}
+
+static inline unsigned
+fd_sampler_first_level(const struct pipe_sampler_view *view)
+{
+ if (view->target == PIPE_BUFFER)
+ return 0;
+ return view->u.tex.first_level;
+}
+
+static inline unsigned
+fd_sampler_last_level(const struct pipe_sampler_view *view)
+{
+ if (view->target == PIPE_BUFFER)
+ return 0;
+ return view->u.tex.last_level;
+}
+
+static inline bool
+fd_half_precision(struct pipe_framebuffer_state *pfb)
+{
+ unsigned i;
+
+ for (i = 0; i < pfb->nr_cbufs; i++)
+ if (!fd_surface_half_precision(pfb->cbufs[i]))
+ return false;
+
+ return true;
+}
+
#define LOG_DWORDS 0
static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
index 48ae7c71b9f..83ed5ffdca0 100644
--- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
+++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
@@ -103,7 +103,7 @@ static void print_reg(reg_t reg, bool full, bool r, bool c, bool im,
} else if ((reg.num == REG_P0) && !c) {
printf("p0.%c", component[reg.comp]);
} else {
- printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+ printf("%s%c%d.%c", full ? "" : "h", type, reg.num & 0x3f, component[reg.comp]);
}
}
@@ -122,6 +122,32 @@ static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im,
print_reg(reg, full, r, c, im, neg, abs, addr_rel);
}
+/* TODO switch to using reginfo struct everywhere, since more readable
+ * than passing a bunch of bools to print_reg_src
+ */
+
+struct reginfo {
+ reg_t reg;
+ bool full;
+ bool r;
+ bool c;
+ bool im;
+ bool neg;
+ bool abs;
+ bool addr_rel;
+};
+
+static void print_src(struct reginfo *info)
+{
+ print_reg_src(info->reg, info->full, info->r, info->c, info->im,
+ info->neg, info->abs, info->addr_rel);
+}
+
+//static void print_dst(struct reginfo *info)
+//{
+// print_reg_dst(info->reg, info->full, info->addr_rel);
+//}
+
static void print_instr_cat0(instr_t *instr)
{
instr_cat0_t *cat0 = &instr->cat0;
@@ -454,10 +480,70 @@ static void print_instr_cat6(instr_t *instr)
{
instr_cat6_t *cat6 = &instr->cat6;
char sd = 0, ss = 0; /* dst/src address space */
- bool full = type_size(cat6->type) == 32;
bool nodst = false;
+ struct reginfo dst, src1, src2;
+ int src1off = 0, dstoff = 0;
- printf(".%s ", type[cat6->type]);
+ memset(&dst, 0, sizeof(dst));
+ memset(&src1, 0, sizeof(src1));
+ memset(&src2, 0, sizeof(src2));
+
+ switch (cat6->opc) {
+ case OPC_RESINFO:
+ case OPC_RESFMT:
+ dst.full = type_size(cat6->type) == 32;
+ src1.full = type_size(cat6->type) == 32;
+ src2.full = type_size(cat6->type) == 32;
+ break;
+ case OPC_L2G:
+ case OPC_G2L:
+ dst.full = true;
+ src1.full = true;
+ src2.full = true;
+ break;
+ case OPC_STG:
+ case OPC_STL:
+ case OPC_STP:
+ case OPC_STI:
+ case OPC_STLW:
+ case OPC_STGB_4D_4:
+ case OPC_STIB:
+ dst.full = true;
+ src1.full = type_size(cat6->type) == 32;
+ src2.full = type_size(cat6->type) == 32;
+ break;
+ default:
+ dst.full = type_size(cat6->type) == 32;
+ src1.full = true;
+ src2.full = true;
+ break;
+ }
+
+ switch (cat6->opc) {
+ case OPC_PREFETCH:
+ case OPC_RESINFO:
+ break;
+ case OPC_ATOMIC_ADD:
+ case OPC_ATOMIC_SUB:
+ case OPC_ATOMIC_XCHG:
+ case OPC_ATOMIC_INC:
+ case OPC_ATOMIC_DEC:
+ case OPC_ATOMIC_CMPXCHG:
+ case OPC_ATOMIC_MIN:
+ case OPC_ATOMIC_MAX:
+ case OPC_ATOMIC_AND:
+ case OPC_ATOMIC_OR:
+ case OPC_ATOMIC_XOR:
+ ss = cat6->g ? 'g' : 'l';
+ printf(".%c", ss);
+ printf(".%s", type[cat6->type]);
+ break;
+ default:
+ dst.im = cat6->g && !cat6->dst_off;
+ printf(".%s", type[cat6->type]);
+ break;
+ }
+ printf(" ");
switch (cat6->opc) {
case OPC_STG:
@@ -499,68 +585,65 @@ static void print_instr_cat6(instr_t *instr)
break;
case OPC_STI:
- full = false; // XXX or inverts??
+ dst.full = false; // XXX or inverts??
break;
}
- if (cat6->has_off) {
- if (!nodst) {
- if (sd)
- printf("%c[", sd);
- print_reg_dst((reg_t)(cat6->a.dst), full, false);
- if (sd)
- printf("]");
- printf(", ");
- }
- if (ss)
- printf("%c[", ss);
- print_reg_src((reg_t)(cat6->a.src1), true,
- false, false, cat6->a.src1_im, false, false, false);
- if (cat6->a.off)
- printf("%+d", cat6->a.off);
- if (ss)
- printf("]");
- printf(", ");
- print_reg_src((reg_t)(cat6->a.src2), full,
- false, false, cat6->a.src2_im, false, false, false);
+ if (cat6->dst_off) {
+ dst.reg = (reg_t)(cat6->c.dst);
+ dstoff = cat6->c.off;
} else {
- if (!nodst) {
- if (sd)
- printf("%c[", sd);
- print_reg_dst((reg_t)(cat6->b.dst), full, false);
- if (sd)
- printf("]");
- printf(", ");
- }
- if (ss)
- printf("%c[", ss);
- print_reg_src((reg_t)(cat6->b.src1), true,
- false, false, cat6->b.src1_im, false, false, false);
- if (ss)
- printf("]");
- printf(", ");
- print_reg_src((reg_t)(cat6->b.src2), full,
- false, false, cat6->b.src2_im, false, false, false);
+ dst.reg = (reg_t)(cat6->d.dst);
}
- if (debug & PRINT_VERBOSE) {
- switch (cat6->opc) {
- case OPC_LDG:
- case OPC_LDP:
- /* load instructions: */
- if (cat6->a.dummy2|cat6->a.dummy3)
- printf("\t{6: %x,%x}", cat6->a.dummy2, cat6->a.dummy3);
- break;
- case OPC_STG:
- case OPC_STP:
- case OPC_STI:
- /* store instructions: */
- if (cat6->b.dummy2|cat6->b.dummy2)
- printf("\t{6: %x,%x}", cat6->b.dummy2, cat6->b.dummy3);
- if (cat6->b.ignore0)
- printf("\t{?? %x}", cat6->b.ignore0);
- break;
- }
+ if (cat6->src_off) {
+ src1.reg = (reg_t)(cat6->a.src1);
+ src1.im = cat6->a.src1_im;
+ src2.reg = (reg_t)(cat6->a.src2);
+ src2.im = cat6->a.src2_im;
+ src1off = cat6->a.off;
+ } else {
+ src1.reg = (reg_t)(cat6->b.src1);
+ src1.im = cat6->b.src1_im;
+ src2.reg = (reg_t)(cat6->b.src2);
+ src2.im = cat6->b.src2_im;
+ }
+
+ if (!nodst) {
+ if (sd)
+ printf("%c[", sd);
+ /* note: dst might actually be a src (ie. address to store to) */
+ print_src(&dst);
+ if (dstoff)
+ printf("%+d", dstoff);
+ if (sd)
+ printf("]");
+ printf(", ");
+ }
+
+ if (ss)
+ printf("%c[", ss);
+
+ /* can have a larger than normal immed, so hack: */
+ if (src1.im) {
+ printf("%u", src1.reg.dummy13);
+ } else {
+ print_src(&src1);
+ }
+
+ if (src1off)
+ printf("%+d", src1off);
+ if (ss)
+ printf("]");
+
+ switch (cat6->opc) {
+ case OPC_RESINFO:
+ case OPC_RESFMT:
+ break;
+ default:
+ printf(", ");
+ print_src(&src2);
+ break;
}
}
@@ -711,19 +794,19 @@ struct opc_info {
OPC(6, OPC_LDLW, ldlw),
OPC(6, OPC_STLW, stlw),
OPC(6, OPC_RESFMT, resfmt),
- OPC(6, OPC_RESINFO, resinf),
- OPC(6, OPC_ATOMIC_ADD_L, atomic.add.l),
- OPC(6, OPC_ATOMIC_SUB_L, atomic.sub.l),
- OPC(6, OPC_ATOMIC_XCHG_L, atomic.xchg.l),
- OPC(6, OPC_ATOMIC_INC_L, atomic.inc.l),
- OPC(6, OPC_ATOMIC_DEC_L, atomic.dec.l),
- OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l),
- OPC(6, OPC_ATOMIC_MIN_L, atomic.min.l),
- OPC(6, OPC_ATOMIC_MAX_L, atomic.max.l),
- OPC(6, OPC_ATOMIC_AND_L, atomic.and.l),
- OPC(6, OPC_ATOMIC_OR_L, atomic.or.l),
- OPC(6, OPC_ATOMIC_XOR_L, atomic.xor.l),
- OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.4d),
+ OPC(6, OPC_RESINFO, resinfo),
+ OPC(6, OPC_ATOMIC_ADD, atomic.add),
+ OPC(6, OPC_ATOMIC_SUB, atomic.sub),
+ OPC(6, OPC_ATOMIC_XCHG, atomic.xchg),
+ OPC(6, OPC_ATOMIC_INC, atomic.inc),
+ OPC(6, OPC_ATOMIC_DEC, atomic.dec),
+ OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
+ OPC(6, OPC_ATOMIC_MIN, atomic.min),
+ OPC(6, OPC_ATOMIC_MAX, atomic.max),
+ OPC(6, OPC_ATOMIC_AND, atomic.and),
+ OPC(6, OPC_ATOMIC_OR, atomic.or),
+ OPC(6, OPC_ATOMIC_XOR, atomic.xor),
+ OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.3d),
OPC(6, OPC_STGB_4D_4, stgb.4d.4),
OPC(6, OPC_STIB, stib),
OPC(6, OPC_LDC_4, ldc.4),
diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
index efb07ea479e..c3fb68d511c 100644
--- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
+++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
@@ -173,17 +173,17 @@ typedef enum {
OPC_STLW = 11,
OPC_RESFMT = 14,
OPC_RESINFO = 15,
- OPC_ATOMIC_ADD_L = 16,
- OPC_ATOMIC_SUB_L = 17,
- OPC_ATOMIC_XCHG_L = 18,
- OPC_ATOMIC_INC_L = 19,
- OPC_ATOMIC_DEC_L = 20,
- OPC_ATOMIC_CMPXCHG_L = 21,
- OPC_ATOMIC_MIN_L = 22,
- OPC_ATOMIC_MAX_L = 23,
- OPC_ATOMIC_AND_L = 24,
- OPC_ATOMIC_OR_L = 25,
- OPC_ATOMIC_XOR_L = 26,
+ OPC_ATOMIC_ADD = 16,
+ OPC_ATOMIC_SUB = 17,
+ OPC_ATOMIC_XCHG = 18,
+ OPC_ATOMIC_INC = 19,
+ OPC_ATOMIC_DEC = 20,
+ OPC_ATOMIC_CMPXCHG = 21,
+ OPC_ATOMIC_MIN = 22,
+ OPC_ATOMIC_MAX = 23,
+ OPC_ATOMIC_AND = 24,
+ OPC_ATOMIC_OR = 25,
+ OPC_ATOMIC_XOR = 26,
OPC_LDGB_TYPED_4D = 27,
OPC_STGB_4D_4 = 28,
OPC_STIB = 29,
@@ -575,7 +575,7 @@ typedef struct PACKED {
uint32_t opc_cat : 3;
} instr_cat5_t;
-/* [src1 + off], src2: */
+/* dword0 encoding for src_off: [src1 + off], src2: */
typedef struct PACKED {
/* dword0: */
uint32_t mustbe1 : 1;
@@ -586,37 +586,50 @@ typedef struct PACKED {
uint32_t src2 : 8;
/* dword1: */
- uint32_t dst : 8;
- uint32_t dummy2 : 9;
- uint32_t type : 3;
- uint32_t dummy3 : 2;
- uint32_t opc : 5;
- uint32_t jmp_tgt : 1;
- uint32_t sync : 1;
- uint32_t opc_cat : 3;
+ uint32_t dword1;
} instr_cat6a_t;
-/* [src1], src2: */
+/* dword0 encoding for !src_off: [src1], src2 */
typedef struct PACKED {
/* dword0: */
uint32_t mustbe0 : 1;
- uint32_t src1 : 8;
- uint32_t ignore0 : 13;
+ uint32_t src1 : 13;
+ uint32_t ignore0 : 8;
uint32_t src1_im : 1;
uint32_t src2_im : 1;
uint32_t src2 : 8;
/* dword1: */
- uint32_t dst : 8;
- uint32_t dummy2 : 9;
- uint32_t type : 3;
- uint32_t dummy3 : 2;
- uint32_t opc : 5;
- uint32_t jmp_tgt : 1;
- uint32_t sync : 1;
- uint32_t opc_cat : 3;
+ uint32_t dword1;
} instr_cat6b_t;
+/* dword1 encoding for dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ /* note: there is some weird stuff going on where sometimes
+ * cat6->a.off is involved.. but that seems like a bug in
+ * the blob, since it is used even if !cat6->src_off
+ * It would make sense for there to be some more bits to
+ * bring us to 11 bits worth of offset, but not sure..
+ */
+ int32_t off : 8;
+ uint32_t mustbe1 : 1;
+ uint32_t dst : 8;
+ uint32_t pad1 : 15;
+} instr_cat6c_t;
+
+/* dword1 encoding for !dst_off: */
+typedef struct PACKED {
+ /* dword0: */
+ uint32_t dword0;
+
+ uint32_t dst : 8;
+ uint32_t mustbe0 : 1;
+ uint32_t pad0 : 23;
+} instr_cat6d_t;
+
/* I think some of the other cat6 instructions use additional
* sub-encodings..
*/
@@ -624,16 +637,20 @@ typedef struct PACKED {
typedef union PACKED {
instr_cat6a_t a;
instr_cat6b_t b;
+ instr_cat6c_t c;
+ instr_cat6d_t d;
struct PACKED {
/* dword0: */
- uint32_t has_off : 1;
+ uint32_t src_off : 1;
uint32_t pad1 : 31;
/* dword1: */
- uint32_t dst : 8;
- uint32_t dummy2 : 9;
+ uint32_t pad2 : 8;
+ uint32_t dst_off : 1;
+ uint32_t pad3 : 8;
uint32_t type : 3;
- uint32_t dummy3 : 2;
+ uint32_t g : 1; /* or in some cases it means dst immed */
+ uint32_t pad4 : 1;
uint32_t opc : 5;
uint32_t jmp_tgt : 1;
uint32_t sync : 1;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c
index a166b67d7cf..b24825cff85 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -499,32 +499,51 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
static int emit_cat6(struct ir3_instruction *instr, void *ptr,
struct ir3_info *info)
{
- struct ir3_register *dst = instr->regs[0];
- struct ir3_register *src1 = instr->regs[1];
- struct ir3_register *src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+ struct ir3_register *dst, *src1, *src2;
instr_cat6_t *cat6 = ptr;
- iassert(instr->regs_count >= 2);
+ /* the "dst" for a store instruction is (from the perspective
+ * of data flow in the shader, ie. register use/def, etc) in
+ * fact a register that is read by the instruction, rather
+ * than written:
+ */
+ if (is_store(instr)) {
+ iassert(instr->regs_count >= 3);
- if (instr->cat6.offset || instr->opc == OPC_LDG) {
+ dst = instr->regs[1];
+ src1 = instr->regs[2];
+ src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL;
+ } else {
+ iassert(instr->regs_count >= 2);
+
+ dst = instr->regs[0];
+ src1 = instr->regs[1];
+ src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
+ }
+
+
+ /* TODO we need a more comprehensive list about which instructions
+ * can be encoded which way. Or possibly use IR3_INSTR_0 flag to
+ * indicate to use the src_off encoding even if offset is zero
+ * (but then what to do about dst_off?)
+ */
+ if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) {
instr_cat6a_t *cat6a = ptr;
- cat6->has_off = true;
+ cat6->src_off = true;
- cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
if (src2) {
cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
}
- cat6a->off = instr->cat6.offset;
+ cat6a->off = instr->cat6.src_offset;
} else {
instr_cat6b_t *cat6b = ptr;
- cat6->has_off = false;
+ cat6->src_off = false;
- cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
if (src2) {
@@ -533,10 +552,22 @@ static int emit_cat6(struct ir3_instruction *instr, void *ptr,
}
}
+ if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) {
+ instr_cat6c_t *cat6c = ptr;
+ cat6->dst_off = true;
+ cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ cat6c->off = instr->cat6.dst_offset;
+ } else {
+ instr_cat6d_t *cat6d = ptr;
+ cat6->dst_off = false;
+ cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+ }
+
cat6->type = instr->cat6.type;
cat6->opc = instr->opc;
cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP);
cat6->sync = !!(instr->flags & IR3_INSTR_SY);
+ cat6->g = !!(instr->flags & IR3_INSTR_G);
cat6->opc_cat = 6;
return 0;
@@ -669,7 +700,6 @@ struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
return ir3_instr_create2(block, category, opc, 4);
}
-/* only used by old compiler: */
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
{
struct ir3_instruction *new_instr = instr_create(instr->block,
@@ -707,6 +737,17 @@ struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
return reg;
}
+void
+ir3_instr_set_address(struct ir3_instruction *instr,
+ struct ir3_instruction *addr)
+{
+ if (instr->address != addr) {
+ struct ir3 *ir = instr->block->shader;
+ instr->address = addr;
+ array_insert(ir->indirects, instr);
+ }
+}
+
void
ir3_block_clear_mark(struct ir3_block *block)
{
@@ -723,15 +764,16 @@ ir3_clear_mark(struct ir3 *ir)
}
/* note: this will destroy instr->depth, don't do it until after sched! */
-void
+unsigned
ir3_count_instructions(struct ir3 *ir)
{
- unsigned ip = 0;
+ unsigned cnt = 0;
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- instr->ip = ip++;
+ instr->ip = cnt++;
}
block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip;
block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip;
}
+ return cnt;
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index 9c35a763d58..12f2ebe18db 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -172,6 +172,7 @@ struct ir3_instruction {
IR3_INSTR_P = 0x080,
IR3_INSTR_S = 0x100,
IR3_INSTR_S2EN = 0x200,
+ IR3_INSTR_G = 0x400,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
@@ -209,7 +210,8 @@ struct ir3_instruction {
} cat5;
struct {
type_t type;
- int offset;
+ int src_offset;
+ int dst_offset;
int iim_val;
} cat6;
/* for meta-instructions, just used to hold extra data
@@ -285,6 +287,8 @@ struct ir3_instruction {
/* an instruction can reference at most one address register amongst
* it's src/dst registers. Beyond that, you need to insert mov's.
+ *
+ * NOTE: do not write this directly, use ir3_instr_set_address()
*/
struct ir3_instruction *address;
@@ -365,6 +369,12 @@ struct ir3 {
unsigned predicates_count, predicates_sz;
struct ir3_instruction **predicates;
+ /* Track instructions which do not write a register but other-
+ * wise must not be discarded (such as kill, stg, etc)
+ */
+ unsigned keeps_count, keeps_sz;
+ struct ir3_instruction **keeps;
+
/* List of blocks: */
struct list_head block_list;
@@ -420,6 +430,9 @@ const char *ir3_instr_name(struct ir3_instruction *instr);
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
int num, int flags);
+void ir3_instr_set_address(struct ir3_instruction *instr,
+ struct ir3_instruction *addr);
+
static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
{
if (instr->flags & IR3_INSTR_MARK)
@@ -431,7 +444,7 @@ static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
void ir3_block_clear_mark(struct ir3_block *block);
void ir3_clear_mark(struct ir3 *shader);
-void ir3_count_instructions(struct ir3 *ir);
+unsigned ir3_count_instructions(struct ir3 *ir);
static inline int ir3_instr_regno(struct ir3_instruction *instr,
struct ir3_register *reg)
@@ -547,6 +560,26 @@ is_store(struct ir3_instruction *instr)
return false;
}
+static inline bool is_load(struct ir3_instruction *instr)
+{
+ if (is_mem(instr)) {
+ switch (instr->opc) {
+ case OPC_LDG:
+ case OPC_LDL:
+ case OPC_LDP:
+ case OPC_L2G:
+ case OPC_LDLW:
+ case OPC_LDC_4:
+ case OPC_LDLV:
+ /* probably some others too.. */
+ return true;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
static inline bool is_input(struct ir3_instruction *instr)
{
/* in some cases, ldlv is used to fetch varying without
@@ -1036,6 +1069,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
/* cat6 instructions: */
INSTR2(6, LDLV)
INSTR2(6, LDG)
+INSTR3(6, STG)
/* ************************************************************************* */
/* split this out or find some helper to use.. like main/bitset.h.. */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index ad9d2719d59..ede29f445dc 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -43,127 +43,15 @@
#include "instr-a3xx.h"
#include "ir3.h"
-static void dump_reg(const char *name, uint32_t r)
-{
- if (r != regid(63,0))
- debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
-}
-
-static void dump_semantic(struct ir3_shader_variant *so,
- unsigned sem, const char *name)
-{
- uint32_t regid;
- regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
- dump_reg(name, regid);
-}
-
static void dump_info(struct ir3_shader_variant *so, const char *str)
{
uint32_t *bin;
- const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG";
-
- // for debug, dump some before/after info:
+ const char *type = ir3_shader_stage(so->shader);
// TODO make gpu_id configurable on cmdline
bin = ir3_shader_assemble(so, 320);
- if (fd_mesa_debug & FD_DBG_DISASM) {
- struct ir3 *ir = so->ir;
- struct ir3_register *reg;
- uint8_t regid;
- unsigned i;
-
- debug_printf("; %s: %s\n", type, str);
-
- for (i = 0; i < ir->ninputs; i++) {
- if (!ir->inputs[i]) {
- debug_printf("; in%d unused\n", i);
- continue;
- }
- reg = ir->inputs[i]->regs[0];
- regid = reg->num;
- debug_printf("@in(%sr%d.%c)\tin%d\n",
- (reg->flags & IR3_REG_HALF) ? "h" : "",
- (regid >> 2), "xyzw"[regid & 0x3], i);
- }
-
- for (i = 0; i < ir->noutputs; i++) {
- if (!ir->outputs[i]) {
- debug_printf("; out%d unused\n", i);
- continue;
- }
- /* kill shows up as a virtual output.. skip it! */
- if (is_kill(ir->outputs[i]))
- continue;
- reg = ir->outputs[i]->regs[0];
- regid = reg->num;
- debug_printf("@out(%sr%d.%c)\tout%d\n",
- (reg->flags & IR3_REG_HALF) ? "h" : "",
- (regid >> 2), "xyzw"[regid & 0x3], i);
- }
-
- for (i = 0; i < so->immediates_count; i++) {
- debug_printf("@const(c%d.x)\t", so->first_immediate + i);
- debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
- so->immediates[i].val[0],
- so->immediates[i].val[1],
- so->immediates[i].val[2],
- so->immediates[i].val[3]);
- }
-
- disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
-
- debug_printf("; %s: outputs:", type);
- for (i = 0; i < so->outputs_count; i++) {
- uint8_t regid = so->outputs[i].regid;
- ir3_semantic sem = so->outputs[i].semantic;
- debug_printf(" r%d.%c (%u:%u)",
- (regid >> 2), "xyzw"[regid & 0x3],
- sem2name(sem), sem2idx(sem));
- }
- debug_printf("\n");
- debug_printf("; %s: inputs:", type);
- for (i = 0; i < so->inputs_count; i++) {
- uint8_t regid = so->inputs[i].regid;
- ir3_semantic sem = so->inputs[i].semantic;
- debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
- (regid >> 2), "xyzw"[regid & 0x3],
- sem2name(sem), sem2idx(sem),
- so->inputs[i].compmask,
- so->inputs[i].inloc,
- so->inputs[i].bary);
- }
- debug_printf("\n");
- }
-
- /* print generic shader info: */
- debug_printf("; %s: %u instructions, %d half, %d full\n", type,
- so->info.instrs_count,
- so->info.max_half_reg + 1,
- so->info.max_reg + 1);
-
- /* print shader type specific info: */
- switch (so->type) {
- case SHADER_VERTEX:
- dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
- dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
- break;
- case SHADER_FRAGMENT:
- dump_reg("pos (bary)", so->pos_regid);
- dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
- dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
- /* these two are hard-coded since we don't know how to
- * program them to anything but all 0's...
- */
- if (so->frag_coord)
- debug_printf("; fragcoord: r0.x\n");
- if (so->frag_face)
- debug_printf("; fragface: hr0.x\n");
- break;
- case SHADER_COMPUTE:
- break;
- }
+ debug_printf("; %s: %s\n", type, str);
+ ir3_shader_disasm(so, bin);
free(bin);
-
- debug_printf("\n");
}
@@ -205,8 +93,7 @@ static void print_usage(void)
printf(" --saturate-s MASK - bitmask of samplers to saturate S coord\n");
printf(" --saturate-t MASK - bitmask of samplers to saturate T coord\n");
printf(" --saturate-r MASK - bitmask of samplers to saturate R coord\n");
- printf(" --nocp - disable copy propagation\n");
- printf(" --nir - use NIR compiler\n");
+ printf(" --stream-out - enable stream-out (aka transform feedback)\n");
printf(" --help - show this message\n");
}
@@ -218,6 +105,7 @@ int main(int argc, char **argv)
struct tgsi_parse_context parse;
struct ir3_compiler *compiler;
struct ir3_shader_variant v;
+ struct ir3_shader s;
struct ir3_shader_key key = {};
const char *info;
void *ptr;
@@ -225,6 +113,9 @@ int main(int argc, char **argv)
fd_mesa_debug |= FD_DBG_DISASM;
+ memset(&s, 0, sizeof(s));
+ memset(&v, 0, sizeof(v));
+
/* cmdline args which impact shader variant get spit out in a
* comment on the first line.. a quick/dirty way to preserve
* that info so when ir3test recompiles the shader with a new
@@ -281,6 +172,24 @@ int main(int argc, char **argv)
continue;
}
+ if (!strcmp(argv[n], "--stream-out")) {
+ struct pipe_stream_output_info *so = &s.stream_output;
+ debug_printf(" %s", argv[n]);
+ /* TODO more dynamic config based on number of outputs, etc
+ * rather than just hard-code for first output:
+ */
+ so->num_outputs = 1;
+ so->stride[0] = 4;
+ so->output[0].register_index = 0;
+ so->output[0].start_component = 0;
+ so->output[0].num_components = 4;
+ so->output[0].output_buffer = 0;
+ so->output[0].dst_offset = 2;
+ so->output[0].stream = 0;
+ n++;
+ continue;
+ }
+
if (!strcmp(argv[n], "--help")) {
print_usage();
return 0;
@@ -292,9 +201,6 @@ int main(int argc, char **argv)
filename = argv[n];
- memset(&v, 0, sizeof(v));
- v.key = key;
-
ret = read_file(filename, &ptr, &size);
if (ret) {
print_usage();
@@ -307,16 +213,21 @@ int main(int argc, char **argv)
if (!tgsi_text_translate(ptr, toks, Elements(toks)))
errx(1, "could not parse `%s'", filename);
+ s.tokens = toks;
+
+ v.key = key;
+ v.shader = &s;
+
tgsi_parse_init(&parse, toks);
switch (parse.FullHeader.Processor.Processor) {
case TGSI_PROCESSOR_FRAGMENT:
- v.type = SHADER_FRAGMENT;
+ s.type = v.type = SHADER_FRAGMENT;
break;
case TGSI_PROCESSOR_VERTEX:
- v.type = SHADER_VERTEX;
+ s.type = v.type = SHADER_VERTEX;
break;
case TGSI_PROCESSOR_COMPUTE:
- v.type = SHADER_COMPUTE;
+ s.type = v.type = SHADER_COMPUTE;
break;
}
@@ -324,7 +235,7 @@ int main(int argc, char **argv)
compiler = ir3_compiler_create(320);
info = "NIR compiler";
- ret = ir3_compile_shader_nir(compiler, &v, toks, key);
+ ret = ir3_compile_shader_nir(compiler, &v);
if (ret) {
fprintf(stderr, "compiler failed!\n");
return ret;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
index 86b1161d9cb..697afeba61a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.h
@@ -36,14 +36,13 @@ struct ir3_ra_reg_set;
struct ir3_compiler {
uint32_t gpu_id;
struct ir3_ra_reg_set *set;
+ uint32_t shader_count;
};
struct ir3_compiler * ir3_compiler_create(uint32_t gpu_id);
void ir3_compiler_destroy(struct ir3_compiler *compiler);
int ir3_compile_shader_nir(struct ir3_compiler *compiler,
- struct ir3_shader_variant *so,
- const struct tgsi_token *tokens,
- struct ir3_shader_key key);
+ struct ir3_shader_variant *so);
#endif /* IR3_COMPILER_H_ */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 48b1d8f3606..0ab33455ed1 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -117,10 +117,6 @@ struct ir3_compile {
/* for looking up which system value is which */
unsigned sysval_semantics[8];
- /* list of kill instructions: */
- struct ir3_instruction *kill[16];
- unsigned int kill_count;
-
/* set if we encounter something we can't handle yet, so we
* can bail cleanly and fallback to TGSI compiler f/e
*/
@@ -153,6 +149,7 @@ static struct nir_shader *to_nir(const struct tgsi_token *tokens)
nir_opt_global_to_local(s);
nir_convert_to_ssa(s);
nir_lower_idiv(s);
+ nir_lower_load_const_to_scalar(s);
do {
progress = false;
@@ -261,13 +258,29 @@ compile_init(struct ir3_compiler *compiler,
so->first_driver_param = so->first_immediate = ctx->s->num_uniforms;
- /* one (vec4) slot for vertex id base: */
- if (so->type == SHADER_VERTEX)
- so->first_immediate++;
+ /* Layout of constant registers:
+ *
+ * num_uniform * vec4 - user consts
+ * 4 * vec4 - UBO addresses
+ * if (vertex shader) {
+ * 1 * vec4 - driver params (IR3_DP_*)
+ * 1 * vec4 - stream-out addresses
+ * }
+ *
+ * TODO this could be made more dynamic, to at least skip sections
+ * that we don't need..
+ */
/* reserve 4 (vec4) slots for ubo base addresses: */
so->first_immediate += 4;
+ if (so->type == SHADER_VERTEX) {
+ /* one (vec4) slot for driver params (see ir3_driver_param): */
+ so->first_immediate++;
+ /* one (vec4) slot for stream-output base addresses: */
+ so->first_immediate++;
+ }
+
return ctx;
}
@@ -637,9 +650,8 @@ create_uniform_indirect(struct ir3_compile *ctx, unsigned n,
mov->cat1.dst_type = TYPE_U32;
ir3_reg_create(mov, 0, 0);
ir3_reg_create(mov, n, IR3_REG_CONST | IR3_REG_RELATIV);
- mov->address = address;
- array_insert(ctx->ir->indirects, mov);
+ ir3_instr_set_address(mov, address);
return mov;
}
@@ -677,9 +689,8 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
src->instr = collect;
src->size = arrsz;
src->offset = n;
- mov->address = address;
- array_insert(ctx->ir->indirects, mov);
+ ir3_instr_set_address(mov, address);
return mov;
}
@@ -700,25 +711,21 @@ create_indirect_store(struct ir3_compile *ctx, unsigned arrsz, unsigned n,
dst->size = arrsz;
dst->offset = n;
ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
- mov->address = address;
mov->fanin = collect;
- array_insert(ctx->ir->indirects, mov);
+ ir3_instr_set_address(mov, address);
return mov;
}
static struct ir3_instruction *
-create_input(struct ir3_block *block, struct ir3_instruction *instr,
- unsigned n)
+create_input(struct ir3_block *block, unsigned n)
{
struct ir3_instruction *in;
in = ir3_instr_create(block, -1, OPC_META_INPUT);
in->inout.block = block;
ir3_reg_create(in, n, 0);
- if (instr)
- ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr;
return in;
}
@@ -750,7 +757,7 @@ create_frag_coord(struct ir3_compile *ctx, unsigned comp)
compile_assert(ctx, !ctx->frag_coord[comp]);
- ctx->frag_coord[comp] = create_input(ctx->block, NULL, 0);
+ ctx->frag_coord[comp] = create_input(ctx->block, 0);
switch (comp) {
case 0: /* .x */
@@ -789,7 +796,7 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp)
case 0: /* .x */
compile_assert(ctx, !ctx->frag_face);
- ctx->frag_face = create_input(block, NULL, 0);
+ ctx->frag_face = create_input(block, 0);
ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
/* for faceness, we always get -1 or 0 (int).. but TGSI expects
@@ -817,6 +824,14 @@ create_frag_face(struct ir3_compile *ctx, unsigned comp)
}
}
+static struct ir3_instruction *
+create_driver_param(struct ir3_compile *ctx, enum ir3_driver_param dp)
+{
+ /* first four vec4 sysval's reserved for UBOs: */
+ unsigned r = regid(ctx->so->first_driver_param + 4, dp);
+ return create_uniform(ctx, r);
+}
+
/* helper for instructions that produce multiple consecutive scalar
* outputs which need to have a split/fanout meta instruction inserted
*/
@@ -1218,7 +1233,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction *load =
ir3_LDG(b, addr, 0, create_immed(b, 1), 0);
load->cat6.type = TYPE_U32;
- load->cat6.offset = off + i * 4; /* byte offset */
+ load->cat6.src_offset = off + i * 4; /* byte offset */
dst[i] = load;
}
}
@@ -1307,7 +1322,7 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
* store_output_indirect? or move this into
* create_indirect_store()?
*/
- for (int j = i; j < arr->length; j += 4) {
+ for (int j = i; j < arr->length; j += intr->num_components) {
struct ir3_instruction *split;
split = ir3_instr_create(ctx->block, -1, OPC_META_FO);
@@ -1318,6 +1333,13 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
arr->arr[j] = split;
}
}
+ /* fixup fanout/split neighbors: */
+ for (int i = 0; i < arr->length; i++) {
+ arr->arr[i]->cp.right = (i < (arr->length - 1)) ?
+ arr->arr[i+1] : NULL;
+ arr->arr[i]->cp.left = (i > 0) ?
+ arr->arr[i-1] : NULL;
+ }
break;
}
default:
@@ -1372,6 +1394,11 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
dst[i] = create_uniform_indirect(ctx, n,
get_addr(ctx, src[0]));
}
+ /* NOTE: if relative addressing is used, we set constlen in
+ * the compiler (to worst-case value) since we don't know in
+ * the assembler what the max addr reg value can be:
+ */
+ ctx->so->constlen = ctx->s->num_uniforms;
break;
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ubo_indirect:
@@ -1409,9 +1436,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_base_vertex:
if (!ctx->basevertex) {
- /* first four vec4 sysval's reserved for UBOs: */
- unsigned r = regid(ctx->so->first_driver_param + 4, 0);
- ctx->basevertex = create_uniform(ctx, r);
+ ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
add_sysval_input(ctx, TGSI_SEMANTIC_BASEVERTEX,
ctx->basevertex);
}
@@ -1419,7 +1444,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_vertex_id_zero_base:
if (!ctx->vertex_id) {
- ctx->vertex_id = create_input(ctx->block, NULL, 0);
+ ctx->vertex_id = create_input(ctx->block, 0);
add_sysval_input(ctx, TGSI_SEMANTIC_VERTEXID_NOBASE,
ctx->vertex_id);
}
@@ -1427,7 +1452,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
break;
case nir_intrinsic_load_instance_id:
if (!ctx->instance_id) {
- ctx->instance_id = create_input(ctx->block, NULL, 0);
+ ctx->instance_id = create_input(ctx->block, 0);
add_sysval_input(ctx, TGSI_SEMANTIC_INSTANCEID,
ctx->instance_id);
}
@@ -1456,7 +1481,7 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
kill = ir3_KILL(b, cond, 0);
array_insert(ctx->ir->predicates, kill);
- ctx->kill[ctx->kill_count++] = kill;
+ array_insert(ctx->ir->keeps, kill);
ctx->so->has_kill = true;
break;
@@ -1950,6 +1975,115 @@ emit_cf_list(struct ir3_compile *ctx, struct exec_list *list)
}
}
+/* emit stream-out code. At this point, the current block is the original
+ * (nir) end block, and nir ensures that all flow control paths terminate
+ * into the end block. We re-purpose the original end block to generate
+ * the 'if (vtxcnt < maxvtxcnt)' condition, then append the conditional
+ * block holding stream-out write instructions, followed by the new end
+ * block:
+ *
+ * blockOrigEnd {
+ * p0.x = (vtxcnt < maxvtxcnt)
+ * // succs: blockStreamOut, blockNewEnd
+ * }
+ * blockStreamOut {
+ * ... stream-out instructions ...
+ * // succs: blockNewEnd
+ * }
+ * blockNewEnd {
+ * }
+ */
+static void
+emit_stream_out(struct ir3_compile *ctx)
+{
+ struct ir3_shader_variant *v = ctx->so;
+ struct ir3 *ir = ctx->ir;
+ struct pipe_stream_output_info *strmout =
+ &ctx->so->shader->stream_output;
+ struct ir3_block *orig_end_block, *stream_out_block, *new_end_block;
+ struct ir3_instruction *vtxcnt, *maxvtxcnt, *cond;
+ struct ir3_instruction *bases[PIPE_MAX_SO_BUFFERS];
+
+ /* create vtxcnt input in input block at top of shader,
+ * so that it is seen as live over the entire duration
+ * of the shader:
+ */
+ vtxcnt = create_input(ctx->in_block, 0);
+ add_sysval_input(ctx, IR3_SEMANTIC_VTXCNT, vtxcnt);
+
+ maxvtxcnt = create_driver_param(ctx, IR3_DP_VTXCNT_MAX);
+
+ /* at this point, we are at the original 'end' block,
+ * re-purpose this block to stream-out condition, then
+ * append stream-out block and new-end block
+ */
+ orig_end_block = ctx->block;
+
+ stream_out_block = ir3_block_create(ir);
+ list_addtail(&stream_out_block->node, &ir->block_list);
+
+ new_end_block = ir3_block_create(ir);
+ list_addtail(&new_end_block->node, &ir->block_list);
+
+ orig_end_block->successors[0] = stream_out_block;
+ orig_end_block->successors[1] = new_end_block;
+ stream_out_block->successors[0] = new_end_block;
+
+ /* setup 'if (vtxcnt < maxvtxcnt)' condition: */
+ cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
+ cond->regs[0]->num = regid(REG_P0, 0);
+ cond->cat2.condition = IR3_COND_LT;
+
+ /* condition goes on previous block to the conditional,
+ * since it is used to pick which of the two successor
+ * paths to take:
+ */
+ orig_end_block->condition = cond;
+
+ /* switch to stream_out_block to generate the stream-out
+ * instructions:
+ */
+ ctx->block = stream_out_block;
+
+ /* Calculate base addresses based on vtxcnt. Instructions
+ * generated for bases not used in following loop will be
+ * stripped out in the backend.
+ */
+ for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
+ unsigned stride = strmout->stride[i];
+ struct ir3_instruction *base, *off;
+
+ base = create_uniform(ctx, regid(v->first_driver_param + 5, i));
+
+ /* 24-bit should be enough: */
+ off = ir3_MUL_U(ctx->block, vtxcnt, 0,
+ create_immed(ctx->block, stride * 4), 0);
+
+ bases[i] = ir3_ADD_S(ctx->block, off, 0, base, 0);
+ }
+
+ /* Generate the per-output store instructions: */
+ for (unsigned i = 0; i < strmout->num_outputs; i++) {
+ for (unsigned j = 0; j < strmout->output[i].num_components; j++) {
+ unsigned c = j + strmout->output[i].start_component;
+ struct ir3_instruction *base, *out, *stg;
+
+ base = bases[strmout->output[i].output_buffer];
+ out = ctx->ir->outputs[regid(strmout->output[i].register_index, c)];
+
+ stg = ir3_STG(ctx->block, base, 0, out, 0,
+ create_immed(ctx->block, 1), 0);
+ stg->cat6.type = TYPE_U32;
+ stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4;
+
+ array_insert(ctx->ir->keeps, stg);
+ }
+ }
+
+ /* and finally switch to the new_end_block: */
+ ctx->block = new_end_block;
+}
+
static void
emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
{
@@ -1960,6 +2094,24 @@ emit_function(struct ir3_compile *ctx, nir_function_impl *impl)
* into which we emit the 'end' instruction.
*/
compile_assert(ctx, list_empty(&ctx->block->instr_list));
+
+ /* If stream-out (aka transform-feedback) enabled, emit the
+ * stream-out instructions, followed by a new empty block (into
+ * which the 'end' instruction lands).
+ *
+ * NOTE: it is done in this order, rather than inserting before
+ * we emit end_block, because NIR guarantees that all blocks
+ * flow into end_block, and that end_block has no successors.
+ * So by re-purposing end_block as the first block of stream-
+ * out, we guarantee that all exit paths flow into the stream-
+ * out instructions.
+ */
+ if ((ctx->so->shader->stream_output.num_outputs > 0) &&
+ !ctx->so->key.binning_pass) {
+ debug_assert(ctx->so->type == SHADER_VERTEX);
+ emit_stream_out(ctx);
+ }
+
ir3_END(ctx->block);
}
@@ -1974,7 +2126,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
unsigned semantic_index = in->data.index;
unsigned n = in->data.driver_location;
- DBG("; in: %u:%u, len=%ux%u, loc=%u\n",
+ DBG("; in: %u:%u, len=%ux%u, loc=%u",
semantic_name, semantic_index, array_len,
ncomp, n);
@@ -2045,7 +2197,7 @@ setup_input(struct ir3_compile *ctx, nir_variable *in)
so->inputs[n].inloc + i - 8, use_ldlv);
}
} else {
- instr = create_input(ctx->block, NULL, idx);
+ instr = create_input(ctx->block, idx);
}
ctx->ir->inputs[idx] = instr;
@@ -2069,7 +2221,7 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
unsigned n = out->data.driver_location;
unsigned comp = 0;
- DBG("; out: %u:%u, len=%ux%u, loc=%u\n",
+ DBG("; out: %u:%u, len=%ux%u, loc=%u",
semantic_name, semantic_index, array_len,
ncomp, n);
@@ -2098,6 +2250,10 @@ setup_output(struct ir3_compile *ctx, nir_variable *out)
so->writes_pos = true;
break;
case TGSI_SEMANTIC_COLOR:
+ if (semantic_index == -1) {
+ semantic_index = 0;
+ so->color0_mrt = 1;
+ }
break;
default:
compile_error(ctx, "unknown FS semantic name: %s\n",
@@ -2136,13 +2292,9 @@ emit_instructions(struct ir3_compile *ctx)
ninputs = exec_list_length(&ctx->s->inputs) * 4;
noutputs = exec_list_length(&ctx->s->outputs) * 4;
- /* we need to allocate big enough outputs array so that
- * we can stuff the kill's at the end. Likewise for vtx
- * shaders, we need to leave room for sysvals:
+ /* or vtx shaders, we need to leave room for sysvals:
*/
- if (ctx->so->type == SHADER_FRAGMENT) {
- noutputs += ARRAY_SIZE(ctx->kill);
- } else if (ctx->so->type == SHADER_VERTEX) {
+ if (ctx->so->type == SHADER_VERTEX) {
ninputs += 8;
}
@@ -2153,9 +2305,7 @@ emit_instructions(struct ir3_compile *ctx)
ctx->in_block = ctx->block;
list_addtail(&ctx->block->node, &ctx->ir->block_list);
- if (ctx->so->type == SHADER_FRAGMENT) {
- ctx->ir->noutputs -= ARRAY_SIZE(ctx->kill);
- } else if (ctx->so->type == SHADER_VERTEX) {
+ if (ctx->so->type == SHADER_VERTEX) {
ctx->ir->ninputs -= 8;
}
@@ -2254,13 +2404,13 @@ fixup_frag_inputs(struct ir3_compile *ctx)
so->pos_regid = regid;
/* r0.x */
- instr = create_input(ctx->in_block, NULL, ir->ninputs);
+ instr = create_input(ctx->in_block, ir->ninputs);
instr->regs[0]->num = regid++;
inputs[ir->ninputs++] = instr;
ctx->frag_pos->regs[1]->instr = instr;
/* r0.y */
- instr = create_input(ctx->in_block, NULL, ir->ninputs);
+ instr = create_input(ctx->in_block, ir->ninputs);
instr->regs[0]->num = regid++;
inputs[ir->ninputs++] = instr;
ctx->frag_pos->regs[2]->instr = instr;
@@ -2270,9 +2420,7 @@ fixup_frag_inputs(struct ir3_compile *ctx)
int
ir3_compile_shader_nir(struct ir3_compiler *compiler,
- struct ir3_shader_variant *so,
- const struct tgsi_token *tokens,
- struct ir3_shader_key key)
+ struct ir3_shader_variant *so)
{
struct ir3_compile *ctx;
struct ir3 *ir;
@@ -2282,7 +2430,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
assert(!so->ir);
- ctx = compile_init(compiler, so, tokens);
+ ctx = compile_init(compiler, so, so->shader->tokens);
if (!ctx) {
DBG("INIT failed!");
ret = -1;
@@ -2307,7 +2455,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
fixup_frag_inputs(ctx);
/* at this point, for binning pass, throw away unneeded outputs: */
- if (key.binning_pass) {
+ if (so->key.binning_pass) {
for (i = 0, j = 0; i < so->outputs_count; i++) {
unsigned name = sem2name(so->outputs[i].semantic);
unsigned idx = sem2idx(so->outputs[i].semantic);
@@ -2332,7 +2480,7 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
/* if we want half-precision outputs, mark the output registers
* as half:
*/
- if (key.half_precision) {
+ if (so->key.half_precision) {
for (i = 0; i < ir->noutputs; i++) {
struct ir3_instruction *out = ir->outputs[i];
if (!out)
@@ -2353,15 +2501,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
}
}
- /* at this point, we want the kill's in the outputs array too,
- * so that they get scheduled (since they have no dst).. we've
- * already ensured that the array is big enough in push_block():
- */
- if (so->type == SHADER_FRAGMENT) {
- for (i = 0; i < ctx->kill_count; i++)
- ir->outputs[ir->noutputs++] = ctx->kill[i];
- }
-
if (fd_mesa_debug & FD_DBG_OPTMSGS) {
printf("BEFORE CP:\n");
ir3_print(ir);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
index 8c7c80f7aae..be4e4e81109 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c
@@ -291,7 +291,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
instr->regs[n+1] = src_reg;
if (src_reg->flags & IR3_REG_RELATIV)
- instr->address = reg->instr->address;
+ ir3_instr_set_address(instr, reg->instr->address);
return;
}
@@ -300,7 +300,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
!conflicts(instr->address, reg->instr->address)) {
src_reg->flags = new_flags;
instr->regs[n+1] = src_reg;
- instr->address = reg->instr->address;
+ ir3_instr_set_address(instr, reg->instr->address);
return;
}
@@ -389,7 +389,7 @@ instr_cp(struct ir3_instruction *instr, unsigned *flags)
}
if (instr->address)
- instr->address = instr_cp(instr->address, NULL);
+ ir3_instr_set_address(instr, instr_cp(instr->address, NULL));
return instr;
}
@@ -408,6 +408,10 @@ ir3_cp(struct ir3 *ir)
}
}
+ for (unsigned i = 0; i < ir->keeps_count; i++) {
+ ir->keeps[i] = instr_cp(ir->keeps[i], NULL);
+ }
+
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
if (block->condition)
block->condition = instr_cp(block->condition, NULL);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
index 3a108243479..97df0c2ac99 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -156,6 +156,9 @@ ir3_depth(struct ir3 *ir)
if (ir->outputs[i])
ir3_instr_depth(ir->outputs[i]);
+ for (i = 0; i < ir->keeps_count; i++)
+ ir3_instr_depth(ir->keeps[i]);
+
/* We also need to account for if-condition: */
list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
if (block->condition)
@@ -167,6 +170,15 @@ ir3_depth(struct ir3 *ir)
remove_unused_by_block(block);
}
+ /* note that we can end up with unused indirects, but we should
+ * not end up with unused predicates.
+ */
+ for (i = 0; i < ir->indirects_count; i++) {
+ struct ir3_instruction *instr = ir->indirects[i];
+ if (instr->depth == DEPTH_UNUSED)
+ ir->indirects[i] = NULL;
+ }
+
/* cleanup unused inputs: */
for (i = 0; i < ir->ninputs; i++) {
struct ir3_instruction *in = ir->inputs[i];
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c
index 70d9b08e019..ca28aefd502 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_group.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c
@@ -236,6 +236,11 @@ find_neighbors(struct ir3 *ir)
instr_find_neighbors(instr);
}
}
+
+ for (i = 0; i < ir->keeps_count; i++) {
+ struct ir3_instruction *instr = ir->keeps[i];
+ instr_find_neighbors(instr);
+ }
}
void
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
index f4a4223ae17..e94293f6d6b 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
@@ -182,14 +182,14 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
*/
ctx->has_samp = true;
regmask_set(&needs_sy, n->regs[0]);
- } else if (is_mem(n)) {
+ } else if (is_load(n)) {
regmask_set(&needs_sy, n->regs[0]);
}
/* both tex/sfu appear to not always immediately consume
* their src register(s):
*/
- if (is_tex(n) || is_sfu(n) || is_mem(n)) {
+ if (is_tex(n) || is_sfu(n) || is_load(n)) {
foreach_src(reg, n) {
if (reg_gpr(reg))
regmask_set(&needs_ss_war, reg);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c
index f377982dd5e..07e03d26908 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_print.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c
@@ -175,6 +175,20 @@ print_instr(struct ir3_instruction *instr, int lvl)
printf("]");
}
+ if (instr->cp.left) {
+ printf(", left=_");
+ printf("[");
+ print_instr_name(instr->cp.left);
+ printf("]");
+ }
+
+ if (instr->cp.right) {
+ printf(", right=_");
+ printf("[");
+ print_instr_name(instr->cp.right);
+ printf("]");
+ }
+
if (is_meta(instr)) {
if (instr->opc == OPC_META_FO) {
printf(", off=%d", instr->fo.off);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
index e5aba859fab..eaf3b3c35e8 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c
@@ -241,6 +241,21 @@ ir3_ra_alloc_reg_set(void *memctx)
return set;
}
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+ BITSET_WORD *def; /* variables defined before used in block */
+ BITSET_WORD *use; /* variables used before defined in block */
+ BITSET_WORD *livein; /* which defs reach entry point of block */
+ BITSET_WORD *liveout; /* which defs reach exit point of block */
+};
+
+/* additional instruction-data (per-instruction) */
+struct ir3_ra_instr_data {
+ /* cached instruction 'definer' info: */
+ struct ir3_instruction *defn;
+ int off, sz, cls;
+};
+
/* register-assign context, per-shader */
struct ir3_ra_ctx {
struct ir3 *ir;
@@ -254,14 +269,7 @@ struct ir3_ra_ctx {
unsigned class_base[total_class_count];
unsigned instr_cnt;
unsigned *def, *use; /* def/use table */
-};
-
-/* additional block-data (per-block) */
-struct ir3_ra_block_data {
- BITSET_WORD *def; /* variables defined before used in block */
- BITSET_WORD *use; /* variables used before defined in block */
- BITSET_WORD *livein; /* which defs reach entry point of block */
- BITSET_WORD *liveout; /* which defs reach exit point of block */
+ struct ir3_ra_instr_data *instrd;
};
static bool
@@ -291,8 +299,6 @@ is_temp(struct ir3_register *reg)
{
if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
return false;
- if (reg->flags & IR3_REG_RELATIV) // TODO
- return false;
if ((reg->num == regid(REG_A0, 0)) ||
(reg->num == regid(REG_P0, 0)))
return false;
@@ -309,28 +315,45 @@ writes_gpr(struct ir3_instruction *instr)
}
static struct ir3_instruction *
-get_definer(struct ir3_instruction *instr, int *sz, int *off)
+get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr,
+ int *sz, int *off)
{
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_instruction *d = NULL;
+
+ if (instr->fanin)
+ return get_definer(ctx, instr->fanin, sz, off);
+
+ if (id->defn) {
+ *sz = id->sz;
+ *off = id->off;
+ return id->defn;
+ }
+
if (is_meta(instr) && (instr->opc == OPC_META_FI)) {
/* What about the case where collect is subset of array, we
* need to find the distance between where actual array starts
* and fanin.. that probably doesn't happen currently.
*/
struct ir3_register *src;
+ int dsz, doff;
/* note: don't use foreach_ssa_src as this gets called once
* while assigning regs (which clears SSA flag)
*/
- foreach_src(src, instr) {
+ foreach_src_n(src, n, instr) {
+ struct ir3_instruction *dd;
if (!src->instr)
continue;
- if ((!d) || (src->instr->ip < d->ip))
- d = src->instr;
- }
- *sz = instr->regs_count - 1;
- *off = 0;
+ dd = get_definer(ctx, src->instr, &dsz, &doff);
+
+ if ((!d) || (dd->ip < d->ip)) {
+ d = dd;
+ *sz = dsz;
+ *off = doff - n;
+ }
+ }
} else if (instr->cp.right || instr->cp.left) {
/* covers also the meta:fo case, which ends up w/ single
@@ -386,7 +409,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
struct ir3_instruction *dd;
int dsz, doff;
- dd = get_definer(phi, &dsz, &doff);
+ dd = get_definer(ctx, phi, &dsz, &doff);
*sz = MAX2(*sz, dsz);
*off = doff;
@@ -401,6 +424,7 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
* the phi, so we don't need to chase definers
*/
struct ir3_register *src;
+ struct ir3_instruction *dd = d;
/* note: don't use foreach_ssa_src as this gets called once
* while assigning regs (which clears SSA flag)
@@ -408,16 +432,18 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
foreach_src(src, d) {
if (!src->instr)
continue;
- if (src->instr->ip < d->ip)
- d = src->instr;
+ if (src->instr->ip < dd->ip)
+ dd = src->instr;
}
+
+ d = dd;
}
if (is_meta(d) && (d->opc == OPC_META_FO)) {
struct ir3_instruction *dd;
int dsz, doff;
- dd = get_definer(d->regs[1]->instr, &dsz, &doff);
+ dd = get_definer(ctx, d->regs[1]->instr, &dsz, &doff);
/* by definition, should come before: */
debug_assert(dd->ip < d->ip);
@@ -429,9 +455,30 @@ get_definer(struct ir3_instruction *instr, int *sz, int *off)
d = dd;
}
+ id->defn = d;
+ id->sz = *sz;
+ id->off = *off;
+
return d;
}
+static void
+ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
+{
+ list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+ if (instr->regs_count == 0)
+ continue;
+ /* couple special cases: */
+ if (writes_addr(instr) || writes_pred(instr)) {
+ id->cls = -1;
+ continue;
+ }
+ id->defn = get_definer(ctx, instr, &id->sz, &id->off);
+ id->cls = size_to_class(id->sz, is_half(id->defn));
+ }
+}
+
/* give each instruction a name (and ip), and count up the # of names
* of each class
*/
@@ -439,8 +486,11 @@ static void
ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
{
list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+#ifdef DEBUG
+ instr->name = ~0;
+#endif
ctx->instr_cnt++;
@@ -450,9 +500,7 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
if (!writes_gpr(instr))
continue;
- defn = get_definer(instr, &sz, &off);
-
- if (defn != instr)
+ if (id->defn != instr)
continue;
/* arrays which don't fit in one of the pre-defined class
@@ -460,9 +508,8 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*
* TODO but we still need to allocate names for them, don't we??
*/
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- instr->name = ctx->class_alloc_count[cls]++;
+ if (id->cls >= 0) {
+ instr->name = ctx->class_alloc_count[id->cls]++;
ctx->alloc_count++;
}
}
@@ -471,8 +518,16 @@ ra_block_name_instructions(struct ir3_ra_ctx *ctx, struct ir3_block *block)
static void
ra_init(struct ir3_ra_ctx *ctx)
{
+ unsigned n;
+
ir3_clear_mark(ctx->ir);
- ir3_count_instructions(ctx->ir);
+ n = ir3_count_instructions(ctx->ir);
+
+ ctx->instrd = rzalloc_array(NULL, struct ir3_ra_instr_data, n);
+
+ list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
+ ra_block_find_definers(ctx, block);
+ }
list_for_each_entry (struct ir3_block, block, &ctx->ir->block_list, node) {
ra_block_name_instructions(ctx, block);
@@ -488,6 +543,7 @@ ra_init(struct ir3_ra_ctx *ctx)
}
ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
+ ralloc_steal(ctx->g, ctx->instrd);
ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
}
@@ -555,39 +611,36 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
*/
if (writes_gpr(instr)) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- if (defn == instr) {
+ if (id->defn == instr) {
/* arrays which don't fit in one of the pre-defined class
* sizes are pre-colored:
*/
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
- ctx->def[name] = defn->ip;
- ctx->use[name] = defn->ip;
+ ctx->def[name] = id->defn->ip;
+ ctx->use[name] = id->defn->ip;
/* since we are in SSA at this point: */
debug_assert(!BITSET_TEST(bd->use, name));
BITSET_SET(bd->def, name);
- if (is_half(defn)) {
+ if (is_half(id->defn)) {
ra_set_node_class(ctx->g, name,
- ctx->set->half_classes[cls - class_count]);
+ ctx->set->half_classes[id->cls - class_count]);
} else {
ra_set_node_class(ctx->g, name,
- ctx->set->classes[cls]);
+ ctx->set->classes[id->cls]);
}
/* extend the live range for phi srcs, which may come
* from the bottom of the loop
*/
- if (defn->regs[0]->flags & IR3_REG_PHI_SRC) {
- struct ir3_instruction *phi = defn->regs[0]->instr;
+ if (id->defn->regs[0]->flags & IR3_REG_PHI_SRC) {
+ struct ir3_instruction *phi = id->defn->regs[0]->instr;
foreach_ssa_src(src, phi) {
/* if src is after phi, then we need to extend
* the liverange to the end of src's block:
@@ -606,13 +659,10 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
foreach_ssa_src(src, instr) {
if (writes_gpr(src)) {
- struct ir3_instruction *srcdefn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[src->ip];
- srcdefn = get_definer(src, &sz, &off);
- cls = size_to_class(sz, is_half(srcdefn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, srcdefn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = MAX2(ctx->use[name], instr->ip);
if (!BITSET_TEST(bd->def, name))
BITSET_SET(bd->use, name);
@@ -704,13 +754,10 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
/* need to fix things up to keep outputs live: */
for (unsigned i = 0; i < ir->noutputs; i++) {
struct ir3_instruction *instr = ir->outputs[i];
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
ctx->use[name] = ctx->instr_cnt;
}
}
@@ -780,15 +827,12 @@ static void
reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
struct ir3_instruction *instr)
{
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- cls = size_to_class(sz, is_half(defn));
- if (cls >= 0) {
- unsigned name = ra_name(ctx, cls, defn);
+ if (id->cls >= 0) {
+ unsigned name = ra_name(ctx, id->cls, id->defn);
unsigned r = ra_get_node_reg(ctx->g, name);
- unsigned num = ctx->set->ra_reg_to_gpr[r] + off;
+ unsigned num = ctx->set->ra_reg_to_gpr[r] + id->off;
if (reg->flags & IR3_REG_RELATIV)
num += reg->offset;
@@ -796,7 +840,7 @@ reg_assign(struct ir3_ra_ctx *ctx, struct ir3_register *reg,
reg->num = num;
reg->flags &= ~(IR3_REG_SSA | IR3_REG_PHI_SRC);
- if (is_half(defn))
+ if (is_half(id->defn))
reg->flags |= IR3_REG_HALF;
}
}
@@ -851,19 +895,16 @@ ra_alloc(struct ir3_ra_ctx *ctx)
for (j = 0; i < ir->ninputs; i++) {
struct ir3_instruction *instr = ir->inputs[i];
if (instr) {
- struct ir3_instruction *defn;
- int cls, sz, off;
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
- defn = get_definer(instr, &sz, &off);
- if (defn == instr) {
+ if (id->defn == instr) {
unsigned name, reg;
- cls = size_to_class(sz, is_half(defn));
- name = ra_name(ctx, cls, defn);
- reg = ctx->set->gpr_to_ra_reg[cls][j];
+ name = ra_name(ctx, id->cls, id->defn);
+ reg = ctx->set->gpr_to_ra_reg[id->cls][j];
ra_set_node_reg(ctx->g, name, reg);
- j += sz;
+ j += id->sz;
}
}
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
index 49a4426d163..2ee325518f7 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
@@ -80,12 +80,12 @@ schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
list_delinit(&instr->node);
if (writes_addr(instr)) {
- assert(ctx->addr == NULL);
+ debug_assert(ctx->addr == NULL);
ctx->addr = instr;
}
if (writes_pred(instr)) {
- assert(ctx->pred == NULL);
+ debug_assert(ctx->pred == NULL);
ctx->pred = instr;
}
@@ -180,13 +180,13 @@ check_conflict(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
* free:
*/
if (writes_addr(instr) && ctx->addr) {
- assert(ctx->addr != instr);
+ debug_assert(ctx->addr != instr);
notes->addr_conflict = true;
return true;
}
if (writes_pred(instr) && ctx->pred) {
- assert(ctx->pred != instr);
+ debug_assert(ctx->pred != instr);
notes->pred_conflict = true;
return true;
}
@@ -261,6 +261,20 @@ instr_eligibility(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
return 0;
}
+/* could an instruction be scheduled if specified ssa src was scheduled? */
+static bool
+could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
+{
+ struct ir3_instruction *other_src;
+ foreach_ssa_src(other_src, instr) {
+ /* if dependency not scheduled, we aren't ready yet: */
+ if ((src != other_src) && !is_scheduled(other_src)) {
+ return false;
+ }
+ }
+ return true;
+}
+
/* move eligible instructions to the priority list: */
static unsigned
add_eligible_instrs(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
@@ -272,6 +286,31 @@ add_eligible_instrs(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
int e = instr_eligibility(ctx, notes, instr);
if (e < 0)
continue;
+
+ /* For instructions that write address register we need to
+ * make sure there is at least one instruction that uses the
+ * addr value which is otherwise ready.
+ *
+ * TODO if any instructions use pred register and have other
+ * src args, we would need to do the same for writes_pred()..
+ */
+ if (unlikely(writes_addr(instr))) {
+ struct ir3 *ir = instr->block->shader;
+ bool ready = false;
+ for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
+ struct ir3_instruction *indirect = ir->indirects[i];
+ if (!indirect)
+ continue;
+ if (indirect->address != instr)
+ continue;
+ ready = could_sched(indirect, instr);
+ }
+
+ /* nothing could be scheduled, so keep looking: */
+ if (!ready)
+ continue;
+ }
+
min_delay = MIN2(min_delay, e);
if (e == 0) {
/* remove from unscheduled list and into priority queue: */
@@ -287,20 +326,25 @@ add_eligible_instrs(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
* instructions which depend on the current address register
* to a clone of the instruction which wrote the address reg.
*/
-static void
+static struct ir3_instruction *
split_addr(struct ir3_sched_ctx *ctx)
{
- struct ir3 *ir = ctx->addr->block->shader;
+ struct ir3 *ir;
struct ir3_instruction *new_addr = NULL;
unsigned i;
debug_assert(ctx->addr);
+ ir = ctx->addr->block->shader;
+
for (i = 0; i < ir->indirects_count; i++) {
struct ir3_instruction *indirect = ir->indirects[i];
+ if (!indirect)
+ continue;
+
/* skip instructions already scheduled: */
- if (indirect->flags & IR3_INSTR_MARK)
+ if (is_scheduled(indirect))
continue;
/* remap remaining instructions using current addr
@@ -312,32 +356,36 @@ split_addr(struct ir3_sched_ctx *ctx)
/* original addr is scheduled, but new one isn't: */
new_addr->flags &= ~IR3_INSTR_MARK;
}
- indirect->address = new_addr;
+ ir3_instr_set_address(indirect, new_addr);
}
}
/* all remaining indirects remapped to new addr: */
ctx->addr = NULL;
+
+ return new_addr;
}
/* "spill" the predicate register by remapping any unscheduled
* instructions which depend on the current predicate register
* to a clone of the instruction which wrote the address reg.
*/
-static void
+static struct ir3_instruction *
split_pred(struct ir3_sched_ctx *ctx)
{
- struct ir3 *ir = ctx->pred->block->shader;
+ struct ir3 *ir;
struct ir3_instruction *new_pred = NULL;
unsigned i;
debug_assert(ctx->pred);
+ ir = ctx->pred->block->shader;
+
for (i = 0; i < ir->predicates_count; i++) {
struct ir3_instruction *predicated = ir->predicates[i];
/* skip instructions already scheduled: */
- if (predicated->flags & IR3_INSTR_MARK)
+ if (is_scheduled(predicated))
continue;
/* remap remaining instructions using current pred
@@ -358,6 +406,8 @@ split_pred(struct ir3_sched_ctx *ctx)
/* all remaining predicated remapped to new pred: */
ctx->pred = NULL;
+
+ return new_pred;
}
static void
@@ -407,20 +457,32 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
schedule(ctx, instr);
} else if (delay == ~0) {
+ struct ir3_instruction *new_instr = NULL;
+
/* nothing available to schedule.. if we are blocked on
* address/predicate register conflict, then break the
* deadlock by cloning the instruction that wrote that
* reg:
*/
if (notes.addr_conflict) {
- split_addr(ctx);
+ new_instr = split_addr(ctx);
} else if (notes.pred_conflict) {
- split_pred(ctx);
+ new_instr = split_pred(ctx);
} else {
debug_assert(0);
ctx->error = true;
return;
}
+
+ if (new_instr) {
+ list_del(&new_instr->node);
+ list_addtail(&new_instr->node, &unscheduled_list);
+ /* the original instr that wrote addr/pred may have
+ * originated from a different block:
+ */
+ new_instr->block = block;
+ }
+
} else {
/* and if we run out of instructions that can be scheduled,
* then it is time for nop's:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index b5b038100cc..312174c0c6d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -46,7 +46,8 @@ delete_variant(struct ir3_shader_variant *v)
{
if (v->ir)
ir3_destroy(v->ir);
- fd_bo_del(v->bo);
+ if (v->bo)
+ fd_bo_del(v->bo);
free(v);
}
@@ -139,6 +140,32 @@ assemble_variant(struct ir3_shader_variant *v)
memcpy(fd_bo_map(v->bo), bin, sz);
+ if (fd_mesa_debug & FD_DBG_DISASM) {
+ struct ir3_shader_key key = v->key;
+ DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
+ key.binning_pass, key.color_two_side, key.half_precision);
+ ir3_shader_disasm(v, bin);
+ }
+
+ if (fd_mesa_debug & FD_DBG_SHADERDB) {
+ /* print generic shader info: */
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %u instructions, %u dwords\n",
+ ir3_shader_stage(v->shader),
+ v->shader->id, v->id,
+ v->info.instrs_count,
+ v->info.sizedwords);
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %u half, %u full\n",
+ ir3_shader_stage(v->shader),
+ v->shader->id, v->id,
+ v->info.max_half_reg + 1,
+ v->info.max_reg + 1);
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %u const, %u constlen\n",
+ ir3_shader_stage(v->shader),
+ v->shader->id, v->id,
+ v->info.max_const + 1,
+ v->constlen);
+ }
+
free(bin);
/* no need to keep the ir around beyond this point: */
@@ -150,12 +177,12 @@ static struct ir3_shader_variant *
create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
{
struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
- const struct tgsi_token *tokens = shader->tokens;
int ret;
if (!v)
return NULL;
+ v->id = ++shader->variant_count;
v->shader = shader;
v->key = key;
v->type = shader->type;
@@ -163,10 +190,10 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
if (fd_mesa_debug & FD_DBG_DISASM) {
DBG("dump tgsi: type=%d, k={bp=%u,cts=%u,hp=%u}", shader->type,
key.binning_pass, key.color_two_side, key.half_precision);
- tgsi_dump(tokens, 0);
+ tgsi_dump(shader->tokens, 0);
}
- ret = ir3_compile_shader_nir(shader->compiler, v, tokens, key);
+ ret = ir3_compile_shader_nir(shader->compiler, v);
if (ret) {
debug_error("compile failed!");
goto fail;
@@ -178,12 +205,6 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
goto fail;
}
- if (fd_mesa_debug & FD_DBG_DISASM) {
- DBG("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type,
- key.binning_pass, key.color_two_side, key.half_precision);
- disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
- }
-
return v;
fail:
@@ -228,8 +249,10 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
/* compile new variant if it doesn't exist already: */
v = create_variant(shader, key);
- v->next = shader->variants;
- shader->variants = v;
+ if (v) {
+ v->next = shader->variants;
+ shader->variants = v;
+ }
return v;
}
@@ -249,13 +272,372 @@ ir3_shader_destroy(struct ir3_shader *shader)
}
struct ir3_shader *
-ir3_shader_create(struct pipe_context *pctx, const struct tgsi_token *tokens,
+ir3_shader_create(struct pipe_context *pctx,
+ const struct pipe_shader_state *cso,
enum shader_t type)
{
struct ir3_shader *shader = CALLOC_STRUCT(ir3_shader);
shader->compiler = fd_context(pctx)->screen->compiler;
+ shader->id = ++shader->compiler->shader_count;
shader->pctx = pctx;
shader->type = type;
- shader->tokens = tgsi_dup_tokens(tokens);
+ shader->tokens = tgsi_dup_tokens(cso->tokens);
+ shader->stream_output = cso->stream_output;
+ if (fd_mesa_debug & FD_DBG_SHADERDB) {
+ /* if shader-db run, create a standard variant immediately
+ * (as otherwise nothing will trigger the shader to be
+ * actually compiled)
+ */
+ static struct ir3_shader_key key = {};
+ ir3_shader_variant(shader, key);
+ }
return shader;
}
+
+static void dump_reg(const char *name, uint32_t r)
+{
+ if (r != regid(63,0))
+ debug_printf("; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]);
+}
+
+static void dump_semantic(struct ir3_shader_variant *so,
+ unsigned sem, const char *name)
+{
+ uint32_t regid;
+ regid = ir3_find_output_regid(so, ir3_semantic_name(sem, 0));
+ dump_reg(name, regid);
+}
+
+void
+ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
+{
+ struct ir3 *ir = so->ir;
+ struct ir3_register *reg;
+ const char *type = ir3_shader_stage(so->shader);
+ uint8_t regid;
+ unsigned i;
+
+ for (i = 0; i < ir->ninputs; i++) {
+ if (!ir->inputs[i]) {
+ debug_printf("; in%d unused\n", i);
+ continue;
+ }
+ reg = ir->inputs[i]->regs[0];
+ regid = reg->num;
+ debug_printf("@in(%sr%d.%c)\tin%d\n",
+ (reg->flags & IR3_REG_HALF) ? "h" : "",
+ (regid >> 2), "xyzw"[regid & 0x3], i);
+ }
+
+ for (i = 0; i < ir->noutputs; i++) {
+ if (!ir->outputs[i]) {
+ debug_printf("; out%d unused\n", i);
+ continue;
+ }
+ /* kill shows up as a virtual output.. skip it! */
+ if (is_kill(ir->outputs[i]))
+ continue;
+ reg = ir->outputs[i]->regs[0];
+ regid = reg->num;
+ debug_printf("@out(%sr%d.%c)\tout%d\n",
+ (reg->flags & IR3_REG_HALF) ? "h" : "",
+ (regid >> 2), "xyzw"[regid & 0x3], i);
+ }
+
+ for (i = 0; i < so->immediates_count; i++) {
+ debug_printf("@const(c%d.x)\t", so->first_immediate + i);
+ debug_printf("0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
+ so->immediates[i].val[0],
+ so->immediates[i].val[1],
+ so->immediates[i].val[2],
+ so->immediates[i].val[3]);
+ }
+
+ disasm_a3xx(bin, so->info.sizedwords, 0, so->type);
+
+ debug_printf("; %s: outputs:", type);
+ for (i = 0; i < so->outputs_count; i++) {
+ uint8_t regid = so->outputs[i].regid;
+ ir3_semantic sem = so->outputs[i].semantic;
+ debug_printf(" r%d.%c (%u:%u)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ sem2name(sem), sem2idx(sem));
+ }
+ debug_printf("\n");
+ debug_printf("; %s: inputs:", type);
+ for (i = 0; i < so->inputs_count; i++) {
+ uint8_t regid = so->inputs[i].regid;
+ ir3_semantic sem = so->inputs[i].semantic;
+ debug_printf(" r%d.%c (%u:%u,cm=%x,il=%u,b=%u)",
+ (regid >> 2), "xyzw"[regid & 0x3],
+ sem2name(sem), sem2idx(sem),
+ so->inputs[i].compmask,
+ so->inputs[i].inloc,
+ so->inputs[i].bary);
+ }
+ debug_printf("\n");
+
+ /* print generic shader info: */
+ debug_printf("; %s prog %d/%d: %u instructions, %d half, %d full\n",
+ type, so->shader->id, so->id,
+ so->info.instrs_count,
+ so->info.max_half_reg + 1,
+ so->info.max_reg + 1);
+
+ debug_printf("; %d const, %u constlen\n",
+ so->info.max_const + 1,
+ so->constlen);
+
+ /* print shader type specific info: */
+ switch (so->type) {
+ case SHADER_VERTEX:
+ dump_semantic(so, TGSI_SEMANTIC_POSITION, "pos");
+ dump_semantic(so, TGSI_SEMANTIC_PSIZE, "psize");
+ break;
+ case SHADER_FRAGMENT:
+ dump_reg("pos (bary)", so->pos_regid);
+ dump_semantic(so, TGSI_SEMANTIC_POSITION, "posz");
+ dump_semantic(so, TGSI_SEMANTIC_COLOR, "color");
+ /* these two are hard-coded since we don't know how to
+ * program them to anything but all 0's...
+ */
+ if (so->frag_coord)
+ debug_printf("; fragcoord: r0.x\n");
+ if (so->frag_face)
+ debug_printf("; fragface: hr0.x\n");
+ break;
+ case SHADER_COMPUTE:
+ break;
+ }
+
+ debug_printf("\n");
+}
+
+/* This has to reach into the fd_context a bit more than the rest of
+ * ir3, but it needs to be aligned with the compiler, so both agree
+ * on which const regs hold what. And the logic is identical between
+ * a3xx/a4xx, the only difference is small details in the actual
+ * CP_LOAD_STATE packets (which is handled inside the generation
+ * specific ctx->emit_const(_bo)() fxns)
+ */
+
+#include "freedreno_resource.h"
+
+static void
+emit_user_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ struct fd_constbuf_stateobj *constbuf)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ const unsigned index = 0; /* user consts are index 0 */
+ /* TODO save/restore dirty_mask for binning pass instead: */
+ uint32_t dirty_mask = constbuf->enabled_mask;
+
+ if (dirty_mask & (1 << index)) {
+ struct pipe_constant_buffer *cb = &constbuf->cb[index];
+ unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+ /* in particular, with binning shader we may end up with
+ * unused consts, ie. we could end up w/ constlen that is
+ * smaller than first_driver_param. In that case truncate
+ * the user consts early to avoid HLSQ lockup caused by
+ * writing too many consts
+ */
+ uint32_t max_const = MIN2(v->first_driver_param, v->constlen);
+
+ // I expect that size should be a multiple of vec4's:
+ assert(size == align(size, 4));
+
+ /* and even if the start of the const buffer is before
+ * first_immediate, the end may not be:
+ */
+ size = MIN2(size, 4 * max_const);
+
+ if (size > 0) {
+ fd_wfi(ctx, ring);
+ ctx->emit_const(ring, v->type, 0,
+ cb->buffer_offset, size,
+ cb->user_buffer, cb->buffer);
+ constbuf->dirty_mask &= ~(1 << index);
+ }
+ }
+}
+
+static void
+emit_ubos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ struct fd_constbuf_stateobj *constbuf)
+{
+ uint32_t offset = v->first_driver_param; /* UBOs after user consts */
+ if (v->constlen > offset) {
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ uint32_t params = MIN2(4, v->constlen - offset) * 4;
+ uint32_t offsets[params];
+ struct fd_bo *bos[params];
+
+ for (uint32_t i = 0; i < params; i++) {
+ const uint32_t index = i + 1; /* UBOs start at index 1 */
+ struct pipe_constant_buffer *cb = &constbuf->cb[index];
+ assert(!cb->user_buffer);
+
+ if ((constbuf->enabled_mask & (1 << index)) && cb->buffer) {
+ offsets[i] = cb->buffer_offset;
+ bos[i] = fd_resource(cb->buffer)->bo;
+ } else {
+ offsets[i] = 0;
+ bos[i] = NULL;
+ }
+ }
+
+ fd_wfi(ctx, ring);
+ ctx->emit_const_bo(ring, v->type, false, offset * 4, params, bos, offsets);
+ }
+}
+
+static void
+emit_immediates(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ int size = v->immediates_count;
+ uint32_t base = v->first_immediate;
+
+ /* truncate size to avoid writing constants that shader
+ * does not use:
+ */
+ size = MIN2(size + base, v->constlen) - base;
+
+ /* convert out of vec4: */
+ base *= 4;
+ size *= 4;
+
+ if (size > 0) {
+ fd_wfi(ctx, ring);
+ ctx->emit_const(ring, v->type, base,
+ 0, size, v->immediates[0].val, NULL);
+ }
+}
+
+/* emit stream-out buffers: */
+static void
+emit_tfbos(struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
+{
+ uint32_t offset = v->first_driver_param + 5; /* streamout addresses after driver-params*/
+ if (v->constlen > offset) {
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+ struct pipe_stream_output_info *info = &v->shader->stream_output;
+ uint32_t params = 4;
+ uint32_t offsets[params];
+ struct fd_bo *bos[params];
+
+ for (uint32_t i = 0; i < params; i++) {
+ struct pipe_stream_output_target *target = so->targets[i];
+
+ if (target) {
+ offsets[i] = (so->offsets[i] * info->stride[i] * 4) +
+ target->buffer_offset;
+ bos[i] = fd_resource(target->buffer)->bo;
+ } else {
+ offsets[i] = 0;
+ bos[i] = NULL;
+ }
+ }
+
+ fd_wfi(ctx, ring);
+ ctx->emit_const_bo(ring, v->type, true, offset * 4, params, bos, offsets);
+ }
+}
+
+static uint32_t
+max_tf_vtx(struct ir3_shader_variant *v)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+ struct fd_streamout_stateobj *so = &ctx->streamout;
+ struct pipe_stream_output_info *info = &v->shader->stream_output;
+ uint32_t maxvtxcnt = 0x7fffffff;
+
+ if (v->key.binning_pass)
+ return 0;
+ if (v->shader->stream_output.num_outputs == 0)
+ return 0;
+ if (so->num_targets == 0)
+ return 0;
+
+ /* offset to write to is:
+ *
+ * total_vtxcnt = vtxcnt + offsets[i]
+ * offset = total_vtxcnt * stride[i]
+ *
+ * offset = vtxcnt * stride[i] ; calculated in shader
+ * + offsets[i] * stride[i] ; calculated at emit_tfbos()
+ *
+ * assuming for each vtx, each target buffer will have data written
+ * up to 'offset + stride[i]', that leaves maxvtxcnt as:
+ *
+ * buffer_size = (maxvtxcnt * stride[i]) + stride[i]
+ * maxvtxcnt = (buffer_size - stride[i]) / stride[i]
+ *
+ * but shader is actually doing a less-than (rather than less-than-
+ * equal) check, so we can drop the -stride[i].
+ *
+ * TODO is assumption about `offset + stride[i]` legit?
+ */
+ for (unsigned i = 0; i < so->num_targets; i++) {
+ struct pipe_stream_output_target *target = so->targets[i];
+ unsigned stride = info->stride[i] * 4; /* convert dwords->bytes */
+ if (target) {
+ uint32_t max = target->buffer_size / stride;
+ maxvtxcnt = MIN2(maxvtxcnt, max);
+ }
+ }
+
+ return maxvtxcnt;
+}
+
+void
+ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ const struct pipe_draw_info *info, uint32_t dirty)
+{
+ struct fd_context *ctx = fd_context(v->shader->pctx);
+
+ if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+ struct fd_constbuf_stateobj *constbuf;
+ bool shader_dirty;
+
+ if (v->type == SHADER_VERTEX) {
+ constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX];
+ shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_VP);
+ } else if (v->type == SHADER_FRAGMENT) {
+ constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT];
+ shader_dirty = !!(ctx->prog.dirty & FD_SHADER_DIRTY_FP);
+ } else {
+ unreachable("bad shader type");
+ return;
+ }
+
+ emit_user_consts(v, ring, constbuf);
+ emit_ubos(v, ring, constbuf);
+ if (shader_dirty)
+ emit_immediates(v, ring);
+ }
+
+ /* emit driver params every time: */
+ /* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
+ if (info && (v->type == SHADER_VERTEX)) {
+ uint32_t offset = v->first_driver_param + 4; /* driver params after UBOs */
+ if (v->constlen >= offset) {
+ uint32_t vertex_params[4] = {
+ [IR3_DP_VTXID_BASE] = info->indexed ?
+ info->index_bias : info->start,
+ [IR3_DP_VTXCNT_MAX] = max_tf_vtx(v),
+ };
+
+ fd_wfi(ctx, ring);
+ ctx->emit_const(ring, SHADER_VERTEX, offset * 4, 0,
+ ARRAY_SIZE(vertex_params), vertex_params, NULL);
+
+ /* if needed, emit stream-out buffer addresses: */
+ if (vertex_params[IR3_DP_VTXCNT_MAX] > 0) {
+ emit_tfbos(v, ring);
+ }
+ }
+ }
+}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 9f1b0769180..1bbbdbd224d 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -29,9 +29,22 @@
#ifndef IR3_SHADER_H_
#define IR3_SHADER_H_
+#include "pipe/p_state.h"
+
#include "ir3.h"
#include "disasm.h"
+/* driver param indices: */
+enum ir3_driver_param {
+ IR3_DP_VTXID_BASE = 0,
+ IR3_DP_VTXCNT_MAX = 1,
+};
+
+/* internal semantic used for passing vtxcnt to vertex shader to
+ * implement transform feedback:
+ */
+#define IR3_SEMANTIC_VTXCNT (TGSI_SEMANTIC_COUNT + 0)
+
typedef uint16_t ir3_semantic; /* semantic name + index */
static inline ir3_semantic
ir3_semantic_name(uint8_t name, uint16_t index)
@@ -100,6 +113,9 @@ ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b)
struct ir3_shader_variant {
struct fd_bo *bo;
+ /* variant id (for debug) */
+ uint32_t id;
+
struct ir3_shader_key key;
struct ir3_info info;
@@ -192,26 +208,44 @@ struct ir3_shader_variant {
struct ir3_shader {
enum shader_t type;
+ /* shader id (for debug): */
+ uint32_t id;
+ uint32_t variant_count;
+
struct ir3_compiler *compiler;
struct pipe_context *pctx;
const struct tgsi_token *tokens;
+ struct pipe_stream_output_info stream_output;
struct ir3_shader_variant *variants;
-
- /* so far, only used for blit_prog shader.. values for
- * VPC_VARYING_PS_REPL[i].MODE
- */
- uint32_t vpsrepl[8];
};
void * ir3_shader_assemble(struct ir3_shader_variant *v, uint32_t gpu_id);
struct ir3_shader * ir3_shader_create(struct pipe_context *pctx,
- const struct tgsi_token *tokens, enum shader_t type);
+ const struct pipe_shader_state *cso, enum shader_t type);
void ir3_shader_destroy(struct ir3_shader *shader);
struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
struct ir3_shader_key key);
+void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
+
+struct fd_ringbuffer;
+void ir3_emit_consts(struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
+ const struct pipe_draw_info *info, uint32_t dirty);
+
+static inline const char *
+ir3_shader_stage(struct ir3_shader *shader)
+{
+ switch (shader->type) {
+ case SHADER_VERTEX: return "VERT";
+ case SHADER_FRAGMENT: return "FRAG";
+ case SHADER_COMPUTE: return "CL";
+ default:
+ unreachable("invalid type");
+ return NULL;
+ }
+}
/*
* Helper/util:
diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h
index dcf63543219..6466fa594f9 100644
--- a/src/gallium/drivers/i915/i915_batchbuffer.h
+++ b/src/gallium/drivers/i915/i915_batchbuffer.h
@@ -33,20 +33,20 @@
struct i915_context;
-static INLINE size_t
+static inline size_t
i915_winsys_batchbuffer_space(struct i915_winsys_batchbuffer *batch)
{
return batch->size - (batch->ptr - batch->map);
}
-static INLINE boolean
+static inline boolean
i915_winsys_batchbuffer_check(struct i915_winsys_batchbuffer *batch,
size_t dwords)
{
return dwords * 4 <= i915_winsys_batchbuffer_space(batch);
}
-static INLINE void
+static inline void
i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch,
unsigned dword)
{
@@ -54,7 +54,7 @@ i915_winsys_batchbuffer_dword_unchecked(struct i915_winsys_batchbuffer *batch,
batch->ptr += 4;
}
-static INLINE void
+static inline void
i915_winsys_batchbuffer_float(struct i915_winsys_batchbuffer *batch,
float f)
{
@@ -64,7 +64,7 @@ i915_winsys_batchbuffer_float(struct i915_winsys_batchbuffer *batch,
i915_winsys_batchbuffer_dword_unchecked(batch, uif.ui);
}
-static INLINE void
+static inline void
i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch,
unsigned dword)
{
@@ -72,7 +72,7 @@ i915_winsys_batchbuffer_dword(struct i915_winsys_batchbuffer *batch,
i915_winsys_batchbuffer_dword_unchecked(batch, dword);
}
-static INLINE void
+static inline void
i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
void *data,
size_t size)
@@ -83,7 +83,7 @@ i915_winsys_batchbuffer_write(struct i915_winsys_batchbuffer *batch,
batch->ptr += size;
}
-static INLINE boolean
+static inline boolean
i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer **buffers,
int num_of_buffers)
@@ -91,7 +91,7 @@ i915_winsys_validate_buffers(struct i915_winsys_batchbuffer *batch,
return batch->iws->validate_buffers(batch, buffers, num_of_buffers);
}
-static INLINE int
+static inline int
i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch,
struct i915_winsys_buffer *buffer,
enum i915_winsys_buffer_usage usage,
diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h
index 40abf3c577f..c8c7d64f5cb 100644
--- a/src/gallium/drivers/i915/i915_context.h
+++ b/src/gallium/drivers/i915/i915_context.h
@@ -339,7 +339,7 @@ struct i915_context {
#define I915_DST_VARS 4
#define I915_DST_RECT 8
-static INLINE
+static inline
void i915_set_flush_dirty(struct i915_context *i915, unsigned flush)
{
i915->hardware_dirty |= I915_HW_FLUSH;
@@ -408,7 +408,7 @@ struct pipe_context *i915_create_context(struct pipe_screen *screen,
* Inline conversion functions. These are better-typed than the
* macros used previously:
*/
-static INLINE struct i915_context *
+static inline struct i915_context *
i915_context( struct pipe_context *pipe )
{
return (struct i915_context *)pipe;
diff --git a/src/gallium/drivers/i915/i915_debug.h b/src/gallium/drivers/i915/i915_debug.h
index 079882c811f..0f12a592ae8 100644
--- a/src/gallium/drivers/i915/i915_debug.h
+++ b/src/gallium/drivers/i915/i915_debug.h
@@ -48,13 +48,13 @@ struct i915_winsys_batchbuffer;
extern unsigned i915_debug;
#ifdef DEBUG
-static INLINE boolean
+static inline boolean
I915_DBG_ON(unsigned flags)
{
return i915_debug & flags;
}
-static INLINE void
+static inline void
I915_DBG(unsigned flags, const char *fmt, ...)
{
if (I915_DBG_ON(flags)) {
@@ -67,7 +67,7 @@ I915_DBG(unsigned flags, const char *fmt, ...)
}
#else
#define I915_DBG_ON(flags) (0)
-static INLINE void I915_DBG(unsigned flags, const char *fmt, ...) {}
+static inline void I915_DBG(unsigned flags, const char *fmt, ...) {}
#endif
void i915_debug_init(struct i915_screen *i915);
diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h
index a4dbcb4d271..adc42542fea 100644
--- a/src/gallium/drivers/i915/i915_fpc.h
+++ b/src/gallium/drivers/i915/i915_fpc.h
@@ -136,7 +136,7 @@ struct i915_fp_compile {
/* One neat thing about the UREG representation:
*/
-static INLINE int
+static inline int
swizzle(int reg, uint x, uint y, uint z, uint w)
{
assert(x <= SRC_ONE);
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 38a33888166..456be9d92ca 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -111,7 +111,7 @@ static const float cos_constants[4] = { 1.0,
/**
* component-wise negation of ureg
*/
-static INLINE int
+static inline int
negate(int reg, int x, int y, int z, int w)
{
/* Another neat thing about the UREG representation */
diff --git a/src/gallium/drivers/i915/i915_prim_emit.c b/src/gallium/drivers/i915/i915_prim_emit.c
index 248e21e02da..ea84efd1d17 100644
--- a/src/gallium/drivers/i915/i915_prim_emit.c
+++ b/src/gallium/drivers/i915/i915_prim_emit.c
@@ -53,7 +53,7 @@ struct setup_stage {
/**
* Basically a cast wrapper.
*/
-static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
+static inline struct setup_stage *setup_stage( struct draw_stage *stage )
{
return (struct setup_stage *)stage;
}
@@ -65,7 +65,7 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
* have a couple of slots at the beginning (1-dword header, 4-dword
* clip pos) that we ignore here.
*/
-static INLINE void
+static inline void
emit_hw_vertex( struct i915_context *i915,
const struct vertex_header *vertex)
{
@@ -124,7 +124,7 @@ emit_hw_vertex( struct i915_context *i915,
-static INLINE void
+static inline void
emit_prim( struct draw_stage *stage,
struct prim_header *prim,
unsigned hwprim,
diff --git a/src/gallium/drivers/i915/i915_prim_vbuf.c b/src/gallium/drivers/i915/i915_prim_vbuf.c
index d134dbb1620..8f61f151e0c 100644
--- a/src/gallium/drivers/i915/i915_prim_vbuf.c
+++ b/src/gallium/drivers/i915/i915_prim_vbuf.c
@@ -96,7 +96,7 @@ struct i915_vbuf_render {
/**
* Basically a cast wrapper.
*/
-static INLINE struct i915_vbuf_render *
+static inline struct i915_vbuf_render *
i915_vbuf_render(struct vbuf_render *render)
{
assert(render);
diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h
index ef99cfb5d3c..77fe8b70f79 100644
--- a/src/gallium/drivers/i915/i915_resource.h
+++ b/src/gallium/drivers/i915/i915_resource.h
@@ -94,14 +94,14 @@ void i915_init_resource_functions(struct i915_context *i915);
extern struct u_resource_vtbl i915_buffer_vtbl;
extern struct u_resource_vtbl i915_texture_vtbl;
-static INLINE struct i915_texture *i915_texture(struct pipe_resource *resource)
+static inline struct i915_texture *i915_texture(struct pipe_resource *resource)
{
struct i915_texture *tex = (struct i915_texture *)resource;
assert(tex->b.vtbl == &i915_texture_vtbl);
return tex;
}
-static INLINE struct i915_buffer *i915_buffer(struct pipe_resource *resource)
+static inline struct i915_buffer *i915_buffer(struct pipe_resource *resource)
{
struct i915_buffer *tex = (struct i915_buffer *)resource;
assert(tex->b.vtbl == &i915_buffer_vtbl);
diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c
index 8ef73d6f2c2..9a3279ccb75 100644
--- a/src/gallium/drivers/i915/i915_resource_texture.c
+++ b/src/gallium/drivers/i915/i915_resource_texture.c
@@ -89,25 +89,25 @@ static const int bottom_offsets[6] = {
[PIPE_TEX_FACE_NEG_Z] = 16 + 5 * 8,
};
-static INLINE unsigned
+static inline unsigned
align_nblocksx(enum pipe_format format, unsigned width, unsigned align_to)
{
return align(util_format_get_nblocksx(format, width), align_to);
}
-static INLINE unsigned
+static inline unsigned
align_nblocksy(enum pipe_format format, unsigned width, unsigned align_to)
{
return align(util_format_get_nblocksy(format, width), align_to);
}
-static INLINE unsigned
+static inline unsigned
get_pot_stride(enum pipe_format format, unsigned width)
{
return util_next_power_of_two(util_format_get_stride(format, width));
}
-static INLINE const char*
+static inline const char*
get_tiling_string(enum i915_winsys_buffer_tile tile)
{
switch(tile) {
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 0590da07b9a..19a94a8e019 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -243,6 +243,10 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
@@ -462,15 +466,6 @@ i915_fence_reference(struct pipe_screen *screen,
is->iws->fence_reference(is->iws, ptr, fence);
}
-static boolean
-i915_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct i915_screen *is = i915_screen(screen);
-
- return is->iws->fence_signalled(is->iws, fence) == 1;
-}
-
static boolean
i915_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
@@ -478,6 +473,9 @@ i915_fence_finish(struct pipe_screen *screen,
{
struct i915_screen *is = i915_screen(screen);
+ if (!timeout)
+ return is->iws->fence_signalled(is->iws, fence) == 1;
+
return is->iws->fence_finish(is->iws, fence) == 1;
}
@@ -565,7 +563,6 @@ i915_screen_create(struct i915_winsys *iws)
is->base.context_create = i915_create_context;
is->base.fence_reference = i915_fence_reference;
- is->base.fence_signalled = i915_fence_signalled;
is->base.fence_finish = i915_fence_finish;
i915_init_screen_resource_functions(is);
diff --git a/src/gallium/drivers/i915/i915_screen.h b/src/gallium/drivers/i915/i915_screen.h
index 99d3ffd3af9..3be941a1561 100644
--- a/src/gallium/drivers/i915/i915_screen.h
+++ b/src/gallium/drivers/i915/i915_screen.h
@@ -59,7 +59,7 @@ struct i915_screen
*/
-static INLINE struct i915_screen *
+static inline struct i915_screen *
i915_screen(struct pipe_screen *pscreen)
{
return (struct i915_screen *) pscreen;
diff --git a/src/gallium/drivers/i915/i915_state_dynamic.c b/src/gallium/drivers/i915/i915_state_dynamic.c
index 4050cd4ac44..1c29e8ae671 100644
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -46,7 +46,7 @@
* (active) state every time a 4kb boundary is crossed.
*/
-static INLINE void set_dynamic(struct i915_context *i915,
+static inline void set_dynamic(struct i915_context *i915,
unsigned offset,
const unsigned state)
{
@@ -60,7 +60,7 @@ static INLINE void set_dynamic(struct i915_context *i915,
-static INLINE void set_dynamic_array(struct i915_context *i915,
+static inline void set_dynamic_array(struct i915_context *i915,
unsigned offset,
const unsigned *src,
unsigned dwords)
diff --git a/src/gallium/drivers/i915/i915_state_immediate.c b/src/gallium/drivers/i915/i915_state_immediate.c
index d244a349fce..c4a6cae1beb 100644
--- a/src/gallium/drivers/i915/i915_state_immediate.c
+++ b/src/gallium/drivers/i915/i915_state_immediate.c
@@ -39,7 +39,7 @@
/* Convinience function to check immediate state.
*/
-static INLINE void set_immediate(struct i915_context *i915,
+static inline void set_immediate(struct i915_context *i915,
unsigned offset,
const unsigned state)
{
diff --git a/src/gallium/drivers/i915/i915_state_inlines.h b/src/gallium/drivers/i915/i915_state_inlines.h
index d4c5ab69555..015ea32933b 100644
--- a/src/gallium/drivers/i915/i915_state_inlines.h
+++ b/src/gallium/drivers/i915/i915_state_inlines.h
@@ -34,7 +34,7 @@
#include "i915_reg.h"
-static INLINE unsigned
+static inline unsigned
i915_translate_compare_func(unsigned func)
{
switch (func) {
@@ -59,7 +59,7 @@ i915_translate_compare_func(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
i915_translate_shadow_compare_func(unsigned func)
{
switch (func) {
@@ -84,7 +84,7 @@ i915_translate_shadow_compare_func(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
i915_translate_stencil_op(unsigned op)
{
switch (op) {
@@ -109,7 +109,7 @@ i915_translate_stencil_op(unsigned op)
}
}
-static INLINE unsigned
+static inline unsigned
i915_translate_blend_factor(unsigned factor)
{
switch (factor) {
@@ -148,7 +148,7 @@ i915_translate_blend_factor(unsigned factor)
}
}
-static INLINE unsigned
+static inline unsigned
i915_translate_blend_func(unsigned mode)
{
switch (mode) {
@@ -168,7 +168,7 @@ i915_translate_blend_func(unsigned mode)
}
-static INLINE unsigned
+static inline unsigned
i915_translate_logic_op(unsigned opcode)
{
switch (opcode) {
@@ -211,7 +211,7 @@ i915_translate_logic_op(unsigned opcode)
-static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr )
+static inline boolean i915_validate_vertices( unsigned hw_prim, unsigned nr )
{
boolean ok;
diff --git a/src/gallium/drivers/ilo/Makefile.am b/src/gallium/drivers/ilo/Makefile.am
index a8785a5e8c4..1f14153748e 100644
--- a/src/gallium/drivers/ilo/Makefile.am
+++ b/src/gallium/drivers/ilo/Makefile.am
@@ -21,8 +21,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources
index e1bbb9a0781..7a7db938f92 100644
--- a/src/gallium/drivers/ilo/Makefile.sources
+++ b/src/gallium/drivers/ilo/Makefile.sources
@@ -1,5 +1,4 @@
C_SOURCES := \
- core/ilo_buffer.h \
core/ilo_builder.c \
core/ilo_builder.h \
core/ilo_builder_3d.h \
@@ -43,6 +42,7 @@ C_SOURCES := \
core/ilo_state_viewport.h \
core/ilo_state_zs.c \
core/ilo_state_zs.h \
+ core/ilo_vma.h \
core/intel_winsys.h \
ilo_blit.c \
ilo_blit.h \
@@ -65,8 +65,6 @@ C_SOURCES := \
ilo_public.h \
ilo_query.c \
ilo_query.h \
- ilo_resource.c \
- ilo_resource.h \
ilo_render.c \
ilo_render.h \
ilo_render_gen.h \
@@ -76,6 +74,8 @@ C_SOURCES := \
ilo_render_gen8.c \
ilo_render_media.c \
ilo_render_surface.c \
+ ilo_resource.c \
+ ilo_resource.h \
ilo_screen.c \
ilo_screen.h \
ilo_shader.c \
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
index 6d9e3699125..5efe9da2d22 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_bottom.h
@@ -39,6 +39,7 @@
#include "ilo_state_shader.h"
#include "ilo_state_viewport.h"
#include "ilo_state_zs.h"
+#include "ilo_vma.h"
#include "ilo_builder.h"
#include "ilo_builder_3d_top.h"
@@ -674,9 +675,10 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
dw[5] |= builder->mocs << GEN8_DEPTH_DW5_MOCS__SHIFT;
- if (zs->depth_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->depth_bo,
- zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->z_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->z_vma->bo,
+ zs->z_vma->bo_offset + zs->depth[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
dw[1] = zs->depth[0];
@@ -691,9 +693,10 @@ gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder,
else
dw[6] |= builder->mocs << GEN6_DEPTH_DW6_MOCS__SHIFT;
- if (zs->depth_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->depth_bo,
- zs->depth[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->z_vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->z_vma->bo,
+ zs->z_vma->bo_offset + zs->depth[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
@@ -724,9 +727,10 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN8_STENCIL_DW1_MOCS__SHIFT;
- if (zs->stencil_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->stencil_bo,
- zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->s_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->s_vma->bo,
+ zs->s_vma->bo_offset + zs->stencil[1],
+ (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
dw[1] = zs->stencil[0];
@@ -734,9 +738,10 @@ gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN6_STENCIL_DW1_MOCS__SHIFT;
- if (zs->stencil_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->stencil_bo,
- zs->stencil[1], (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->s_vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->s_vma->bo,
+ zs->s_vma->bo_offset + zs->stencil[1],
+ (zs->s_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
@@ -767,9 +772,10 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN8_HIZ_DW1_MOCS__SHIFT;
- if (zs->hiz_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_bo,
- zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->hiz_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, zs->hiz_vma->bo,
+ zs->hiz_vma->bo_offset + zs->hiz[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
dw[1] = zs->hiz[0];
@@ -777,9 +783,10 @@ gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder,
dw[1] |= builder->mocs << GEN6_HIZ_DW1_MOCS__SHIFT;
- if (zs->hiz_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_bo,
- zs->hiz[1], (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
+ if (zs->hiz_vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, zs->hiz_vma->bo,
+ zs->hiz_vma->bo_offset + zs->hiz[1],
+ (zs->z_readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
}
diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
index 8d30095e6f6..6e94fb25f1f 100644
--- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
+++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h
@@ -39,6 +39,7 @@
#include "ilo_state_surface.h"
#include "ilo_state_urb.h"
#include "ilo_state_vf.h"
+#include "ilo_vma.h"
#include "ilo_builder.h"
static inline void
@@ -318,8 +319,10 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
dw[3] = 0;
if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) {
- if (b->need_bo)
- ilo_builder_batch_reloc64(builder, pos + 1, b->bo, b->vb[1], 0);
+ if (b->vma) {
+ ilo_builder_batch_reloc64(builder, pos + 1, b->vma->bo,
+ b->vma->bo_offset + b->vb[1], 0);
+ }
dw[3] |= b->vb[2];
} else {
@@ -331,9 +334,11 @@ gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
dw[3] |= vf->user_instancing[elem][1];
}
- if (b->need_bo) {
- ilo_builder_batch_reloc(builder, pos + 1, b->bo, b->vb[1], 0);
- ilo_builder_batch_reloc(builder, pos + 2, b->bo, b->vb[2], 0);
+ if (b->vma) {
+ ilo_builder_batch_reloc(builder, pos + 1, b->vma->bo,
+ b->vma->bo_offset + b->vb[1], 0);
+ ilo_builder_batch_reloc(builder, pos + 2, b->vma->bo,
+ b->vma->bo_offset + b->vb[2], 0);
}
}
@@ -429,9 +434,11 @@ gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = dw0;
- if (ib->need_bo) {
- ilo_builder_batch_reloc(builder, pos + 1, ib->bo, ib->ib[1], 0);
- ilo_builder_batch_reloc(builder, pos + 2, ib->bo, ib->ib[2], 0);
+ if (ib->vma) {
+ ilo_builder_batch_reloc(builder, pos + 1, ib->vma->bo,
+ ib->vma->bo_offset + ib->ib[1], 0);
+ ilo_builder_batch_reloc(builder, pos + 2, ib->vma->bo,
+ ib->vma->bo_offset + ib->ib[2], 0);
} else {
dw[1] = 0;
dw[2] = 0;
@@ -456,8 +463,9 @@ gen8_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
dw[1] = ib->ib[0] |
builder->mocs << GEN8_IB_DW1_MOCS__SHIFT;
- if (ib->need_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, ib->bo, ib->ib[1], 0);
+ if (ib->vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, ib->vma->bo,
+ ib->vma->bo_offset + ib->ib[1], 0);
} else {
dw[2] = 0;
dw[3] = 0;
@@ -801,11 +809,11 @@ gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
builder->mocs << GEN7_SO_BUF_DW1_MOCS__SHIFT |
sol->strides[buffer] << GEN7_SO_BUF_DW1_PITCH__SHIFT;
- if (sb->need_bo) {
- ilo_builder_batch_reloc(builder, pos + 2, sb->bo,
- sb->so_buf[0], INTEL_RELOC_WRITE);
- ilo_builder_batch_reloc(builder, pos + 3, sb->bo,
- sb->so_buf[1], INTEL_RELOC_WRITE);
+ if (sb->vma) {
+ ilo_builder_batch_reloc(builder, pos + 2, sb->vma->bo,
+ sb->vma->bo_offset + sb->so_buf[0], INTEL_RELOC_WRITE);
+ ilo_builder_batch_reloc(builder, pos + 3, sb->vma->bo,
+ sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE);
} else {
dw[2] = 0;
dw[3] = 0;
@@ -832,9 +840,9 @@ gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
buffer << GEN7_SO_BUF_DW1_INDEX__SHIFT |
builder->mocs << GEN8_SO_BUF_DW1_MOCS__SHIFT;
- if (sb->need_bo) {
- ilo_builder_batch_reloc64(builder, pos + 2, sb->bo,
- sb->so_buf[1], INTEL_RELOC_WRITE);
+ if (sb->vma) {
+ ilo_builder_batch_reloc64(builder, pos + 2, sb->vma->bo,
+ sb->vma->bo_offset + sb->so_buf[1], INTEL_RELOC_WRITE);
} else {
dw[2] = 0;
dw[3] = 0;
@@ -842,9 +850,10 @@ gen8_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
dw[4] = sb->so_buf[2];
- if (sb->need_write_offset_bo) {
- ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_bo,
- sizeof(uint32_t) * buffer, INTEL_RELOC_WRITE);
+ if (sb->write_offset_vma) {
+ ilo_builder_batch_reloc64(builder, pos + 5, sb->write_offset_vma->bo,
+ sb->write_offset_vma->bo_offset + sizeof(uint32_t) * buffer,
+ INTEL_RELOC_WRITE);
} else {
dw[5] = 0;
dw[6] = 0;
@@ -1254,14 +1263,15 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
memcpy(dw, surf->surface, state_len << 2);
- if (surf->bo) {
+ if (surf->vma) {
const uint32_t mocs = (surf->scanout) ?
(GEN8_MOCS_MT_PTE | GEN8_MOCS_CT_L3) : builder->mocs;
dw[1] |= mocs << GEN8_SURFACE_DW1_MOCS__SHIFT;
- ilo_builder_surface_reloc64(builder, state_offset, 8, surf->bo,
- surf->surface[8], (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
+ ilo_builder_surface_reloc64(builder, state_offset, 8, surf->vma->bo,
+ surf->vma->bo_offset + surf->surface[8],
+ (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
}
} else {
state_align = 32;
@@ -1271,15 +1281,16 @@ gen6_SURFACE_STATE(struct ilo_builder *builder,
ILO_BUILDER_ITEM_SURFACE, state_align, state_len, &dw);
memcpy(dw, surf->surface, state_len << 2);
- if (surf->bo) {
+ if (surf->vma) {
/*
* For scanouts, we should not enable caching in LLC. Since we only
* enable that on Gen8+, we are fine here.
*/
dw[5] |= builder->mocs << GEN6_SURFACE_DW5_MOCS__SHIFT;
- ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
- surf->surface[1], (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
+ ilo_builder_surface_reloc(builder, state_offset, 1, surf->vma->bo,
+ surf->vma->bo_offset + surf->surface[1],
+ (surf->readonly) ? 0 : INTEL_RELOC_WRITE);
}
}
diff --git a/src/gallium/drivers/ilo/core/ilo_core.h b/src/gallium/drivers/ilo/core/ilo_core.h
index 0a7f7d9d3fe..da7db90a54b 100644
--- a/src/gallium/drivers/ilo/core/ilo_core.h
+++ b/src/gallium/drivers/ilo/core/ilo_core.h
@@ -29,15 +29,9 @@
#define ILO_CORE_H
#include "pipe/p_compiler.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_format.h"
#include "util/u_debug.h"
-#include "util/list.h"
-#include "util/u_format.h"
-#include "util/u_inlines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "util/u_pointer.h"
#endif /* ILO_CORE_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_image.c b/src/gallium/drivers/ilo/core/ilo_image.c
index 0d837d8a9d5..fa547ac5c36 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.c
+++ b/src/gallium/drivers/ilo/core/ilo_image.c
@@ -40,269 +40,356 @@ enum {
IMAGE_TILING_W)
};
-struct ilo_image_params {
- const struct ilo_dev *dev;
- const struct pipe_resource *templ;
- unsigned valid_tilings;
+struct ilo_image_layout {
+ enum ilo_image_walk_type walk;
+ bool interleaved_samples;
- bool compressed;
+ uint8_t valid_tilings;
+ enum gen_surface_tiling tiling;
- unsigned h0, h1;
- unsigned max_x, max_y;
+ enum ilo_image_aux_type aux;
+
+ int align_i;
+ int align_j;
+
+ struct ilo_image_lod *lods;
+ int walk_layer_h0;
+ int walk_layer_h1;
+ int walk_layer_height;
+ int monolithic_width;
+ int monolithic_height;
};
-static void
-img_get_slice_size(const struct ilo_image *img,
- const struct ilo_image_params *params,
- unsigned level, unsigned *width, unsigned *height)
+static enum ilo_image_walk_type
+image_get_gen6_walk(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
{
- const struct pipe_resource *templ = params->templ;
- unsigned w, h;
+ ILO_DEV_ASSERT(dev, 6, 6);
- w = u_minify(img->width0, level);
- h = u_minify(img->height0, level);
-
- /*
- * From the Sandy Bridge PRM, volume 1 part 1, page 114:
- *
- * "The dimensions of the mip maps are first determined by applying the
- * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
- * if necessary, they are padded out to compression block boundaries."
- */
- w = align(w, img->block_width);
- h = align(h, img->block_height);
-
- /*
- * From the Sandy Bridge PRM, volume 1 part 1, page 111:
- *
- * "If the surface is multisampled (4x), these values must be adjusted
- * as follows before proceeding:
- *
- * W_L = ceiling(W_L / 2) * 4
- * H_L = ceiling(H_L / 2) * 4"
- *
- * From the Ivy Bridge PRM, volume 1 part 1, page 108:
- *
- * "If the surface is multisampled and it is a depth or stencil surface
- * or Multisampled Surface StorageFormat in SURFACE_STATE is
- * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
- * proceeding:
- *
- * #samples W_L = H_L =
- * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
- * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
- * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
- * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
- *
- * For interleaved samples (4x), where pixels
- *
- * (x, y ) (x+1, y )
- * (x, y+1) (x+1, y+1)
- *
- * would be is occupied by
- *
- * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
- * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
- * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
- * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
- *
- * Thus the need to
- *
- * w = align(w, 2) * 2;
- * y = align(y, 2) * 2;
- */
- if (img->interleaved_samples) {
- switch (templ->nr_samples) {
- case 0:
- case 1:
- break;
- case 2:
- w = align(w, 2) * 2;
- break;
- case 4:
- w = align(w, 2) * 2;
- h = align(h, 2) * 2;
- break;
- case 8:
- w = align(w, 2) * 4;
- h = align(h, 2) * 2;
- break;
- case 16:
- w = align(w, 2) * 4;
- h = align(h, 2) * 4;
- break;
- default:
- assert(!"unsupported sample count");
- break;
- }
- }
-
- /*
- * From the Ivy Bridge PRM, volume 1 part 1, page 108:
- *
- * "For separate stencil buffer, the width must be mutiplied by 2 and
- * height divided by 2..."
- *
- * To make things easier (for transfer), we will just double the stencil
- * stride in 3DSTATE_STENCIL_BUFFER.
- */
- w = align(w, img->align_i);
- h = align(h, img->align_j);
-
- *width = w;
- *height = h;
-}
-
-static unsigned
-img_get_num_layers(const struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
- unsigned num_layers = templ->array_size;
-
- /* samples of the same index are stored in a layer */
- if (templ->nr_samples > 1 && !img->interleaved_samples)
- num_layers *= templ->nr_samples;
-
- return num_layers;
-}
-
-static void
-img_init_layer_height(struct ilo_image *img,
- struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
- unsigned num_layers;
-
- if (img->walk != ILO_IMAGE_WALK_LAYER)
- return;
-
- num_layers = img_get_num_layers(img, params);
- if (num_layers <= 1)
- return;
+ /* TODO we want LODs to be page-aligned */
+ if (info->type == GEN6_SURFTYPE_3D)
+ return ILO_IMAGE_WALK_3D;
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 115:
*
- * "The following equation is used for surface formats other than
- * compressed textures:
+ * "The separate stencil buffer does not support mip mapping, thus the
+ * storage for LODs other than LOD 0 is not needed. The following
+ * QPitch equation applies only to the separate stencil buffer:
*
- * QPitch = (h0 + h1 + 11j)"
+ * QPitch = h_0"
*
- * "The equation for compressed textures (BC* and FXT1 surface formats)
- * follows:
- *
- * QPitch = (h0 + h1 + 11j) / 4"
- *
- * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
- * value calculated in the equation above, for every other odd Surface
- * Height starting from 1 i.e. 1,5,9,13"
- *
- * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
- *
- * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
- * buffer and stencil buffer have an implied value of ARYSPC_FULL):
- *
- * QPitch = (h0 + h1 + 12j)
- * QPitch = (h0 + h1 + 12j) / 4 (compressed)
- *
- * (There are many typos or missing words here...)"
- *
- * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
- * the base address. The PRM divides QPitch by 4 for compressed formats
- * because the block height for those formats are 4, and it wants QPitch to
- * mean the number of memory rows, as opposed to texel rows, between
- * slices. Since we use texel rows everywhere, we do not need to divide
- * QPitch by 4.
+ * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels
+ * when bound.
*/
- img->walk_layer_height = params->h0 + params->h1 +
- ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * img->align_j;
+ if (info->bind_zs && info->format == GEN6_FORMAT_R8_UINT)
+ return ILO_IMAGE_WALK_LOD;
- if (ilo_dev_gen(params->dev) == ILO_GEN(6) && templ->nr_samples > 1 &&
- img->height0 % 4 == 1)
- img->walk_layer_height += 4;
-
- params->max_y += img->walk_layer_height * (num_layers - 1);
+ /* compact spacing is not supported otherwise */
+ return ILO_IMAGE_WALK_LAYER;
}
-static void
-img_init_lods(struct ilo_image *img,
- struct ilo_image_params *params)
+static enum ilo_image_walk_type
+image_get_gen7_walk(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
{
- const struct pipe_resource *templ = params->templ;
- unsigned cur_x, cur_y;
- unsigned lv;
+ ILO_DEV_ASSERT(dev, 7, 8);
- cur_x = 0;
- cur_y = 0;
- for (lv = 0; lv <= templ->last_level; lv++) {
- unsigned lod_w, lod_h;
+ if (info->type == GEN6_SURFTYPE_3D)
+ return ILO_IMAGE_WALK_3D;
- img_get_slice_size(img, params, lv, &lod_w, &lod_h);
+ /*
+ * From the Ivy Bridge PRM, volume 1 part 1, page 111:
+ *
+ * "note that the depth buffer and stencil buffer have an implied value
+ * of ARYSPC_FULL"
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 66:
+ *
+ * "If Multisampled Surface Storage Format is MSFMT_MSS and Number of
+ * Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array
+ * Spacing) must be set to ARYSPC_LOD0."
+ */
+ if (info->sample_count > 1)
+ assert(info->level_count == 1);
+ return (info->bind_zs || info->level_count > 1) ?
+ ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD;
+}
- img->lods[lv].x = cur_x;
- img->lods[lv].y = cur_y;
- img->lods[lv].slice_width = lod_w;
- img->lods[lv].slice_height = lod_h;
+static bool
+image_get_gen6_interleaved_samples(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
- switch (img->walk) {
- case ILO_IMAGE_WALK_LAYER:
- /* MIPLAYOUT_BELOW */
- if (lv == 1)
- cur_x += lod_w;
- else
- cur_y += lod_h;
- break;
- case ILO_IMAGE_WALK_LOD:
- lod_h *= img_get_num_layers(img, params);
- if (lv == 1)
- cur_x += lod_w;
- else
- cur_y += lod_h;
+ /*
+ * Gen6 supports only interleaved samples. It is not explicitly stated,
+ * but on Gen7+, render targets are expected to be UMS/CMS (samples
+ * non-interleaved) and depth/stencil buffers are expected to be IMS
+ * (samples interleaved).
+ *
+ * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
+ */
+ return (ilo_dev_gen(dev) == ILO_GEN(6) || info->bind_zs);
+}
- /* every LOD begins at tile boundaries */
- if (templ->last_level > 0) {
- assert(img->format == PIPE_FORMAT_S8_UINT);
- cur_x = align(cur_x, 64);
- cur_y = align(cur_y, 64);
- }
- break;
- case ILO_IMAGE_WALK_3D:
- {
- const unsigned num_slices = u_minify(templ->depth0, lv);
- const unsigned num_slices_per_row = 1 << lv;
- const unsigned num_rows =
- (num_slices + num_slices_per_row - 1) / num_slices_per_row;
+static uint8_t
+image_get_gen6_valid_tilings(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
+{
+ uint8_t valid_tilings = IMAGE_TILING_ALL;
- lod_w *= num_slices_per_row;
- lod_h *= num_rows;
+ ILO_DEV_ASSERT(dev, 6, 8);
- cur_y += lod_h;
- }
- break;
- }
+ if (info->valid_tilings)
+ valid_tilings &= info->valid_tilings;
- if (params->max_x < img->lods[lv].x + lod_w)
- params->max_x = img->lods[lv].x + lod_w;
- if (params->max_y < img->lods[lv].y + lod_h)
- params->max_y = img->lods[lv].y + lod_h;
- }
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 2, page 32:
+ *
+ * "Display/Overlay Y-Major not supported.
+ * X-Major required for Async Flips"
+ */
+ if (unlikely(info->bind_scanout))
+ valid_tilings &= IMAGE_TILING_X;
- if (img->walk == ILO_IMAGE_WALK_LAYER) {
- params->h0 = img->lods[0].slice_height;
+ /*
+ * From the Sandy Bridge PRM, volume 3 part 2, page 158:
+ *
+ * "The cursor surface address must be 4K byte aligned. The cursor must
+ * be in linear memory, it cannot be tiled."
+ */
+ if (unlikely(info->bind_cursor))
+ valid_tilings &= IMAGE_TILING_NONE;
- if (templ->last_level > 0)
- params->h1 = img->lods[1].slice_height;
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 318:
+ *
+ * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
+ * Depth Buffer is not supported."
+ *
+ * "The Depth Buffer, if tiled, must use Y-Major tiling."
+ *
+ * From the Sandy Bridge PRM, volume 1 part 2, page 22:
+ *
+ * "W-Major Tile Format is used for separate stencil."
+ */
+ if (info->bind_zs) {
+ if (info->format == GEN6_FORMAT_R8_UINT)
+ valid_tilings &= IMAGE_TILING_W;
else
- img_get_slice_size(img, params, 1, &cur_x, ¶ms->h1);
+ valid_tilings &= IMAGE_TILING_Y;
+ }
+
+ if (info->bind_surface_sampler ||
+ info->bind_surface_dp_render ||
+ info->bind_surface_dp_typed) {
+ /*
+ * From the Haswell PRM, volume 2d, page 233:
+ *
+ * "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
+ * (Tiled Surface) must be TRUE."
+ */
+ if (info->sample_count > 1)
+ valid_tilings &= ~IMAGE_TILING_NONE;
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8))
+ valid_tilings &= ~IMAGE_TILING_W;
+ }
+
+ if (info->bind_surface_dp_render) {
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 2, page 32:
+ *
+ * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
+ * either TileX or Linear."
+ *
+ * From the Haswell PRM, volume 5, page 32:
+ *
+ * "NOTE: 128 BPP format color buffer (render target) supports
+ * Linear, TiledX and TiledY."
+ */
+ if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->block_size == 16)
+ valid_tilings &= ~IMAGE_TILING_Y;
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+ *
+ * "This field (Surface Vertical Aligment) must be set to VALIGN_4
+ * for all tiled Y Render Target surfaces."
+ *
+ * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
+ *
+ * R32G32B32_FLOAT is not renderable and we only need an assert() here.
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
+ assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
+ }
+
+ return valid_tilings;
+}
+
+static uint64_t
+image_get_gen6_estimated_size(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
+{
+ /* padding not considered */
+ const uint64_t slice_size = info->width * info->height *
+ info->block_size / (info->block_width * info->block_height);
+ const uint64_t slice_count =
+ info->depth * info->array_size * info->sample_count;
+ const uint64_t estimated_size = slice_size * slice_count;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (info->level_count == 1)
+ return estimated_size;
+ else
+ return estimated_size * 4 / 3;
+}
+
+static enum gen_surface_tiling
+image_get_gen6_tiling(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ uint8_t valid_tilings)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (valid_tilings) {
+ case IMAGE_TILING_NONE:
+ return GEN6_TILING_NONE;
+ case IMAGE_TILING_X:
+ return GEN6_TILING_X;
+ case IMAGE_TILING_Y:
+ return GEN6_TILING_Y;
+ case IMAGE_TILING_W:
+ return GEN8_TILING_W;
+ default:
+ break;
+ }
+
+ /*
+ * X-tiling has the property that vertically adjacent pixels are usually in
+ * the same page. When the image size is less than a page, the image
+ * height is 1, or when the image is not accessed in blocks, there is no
+ * reason to tile.
+ *
+ * Y-tiling is similar, where vertically adjacent pixels are usually in the
+ * same cacheline.
+ */
+ if (valid_tilings & IMAGE_TILING_NONE) {
+ const uint64_t estimated_size =
+ image_get_gen6_estimated_size(dev, info);
+
+ if (info->height == 1 || !(info->bind_surface_sampler ||
+ info->bind_surface_dp_render ||
+ info->bind_surface_dp_typed))
+ return GEN6_TILING_NONE;
+
+ if (estimated_size <= 64 ||
+ estimated_size > info->prefer_linear_threshold)
+ return GEN6_TILING_NONE;
+
+ if (estimated_size <= 2048)
+ valid_tilings &= ~IMAGE_TILING_X;
+ }
+
+ return (valid_tilings & IMAGE_TILING_Y) ? GEN6_TILING_Y :
+ (valid_tilings & IMAGE_TILING_X) ? GEN6_TILING_X :
+ GEN6_TILING_NONE;
+}
+
+static bool
+image_get_gen6_hiz_enable(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* depth buffer? */
+ if (!info->bind_zs ||
+ info->format == GEN6_FORMAT_R8_UINT ||
+ info->interleaved_stencil)
+ return false;
+
+ /* we want to be able to force 8x4 alignments */
+ if (info->type == GEN6_SURFTYPE_1D)
+ return false;
+
+ if (info->aux_disable)
+ return false;
+
+ if (ilo_debug & ILO_DEBUG_NOHIZ)
+ return false;
+
+ return true;
+}
+
+static bool
+image_get_gen7_mcs_enable(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ enum gen_surface_tiling tiling)
+{
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ if (!info->bind_surface_sampler && !info->bind_surface_dp_render)
+ return false;
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 77:
+ *
+ * "For Render Target and Sampling Engine Surfaces:If the surface is
+ * multisampled (Number of Multisamples any value other than
+ * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
+ *
+ * "This field must be set to 0 for all SINT MSRTs when all RT channels
+ * are not written"
+ */
+ if (info->sample_count > 1) {
+ if (ilo_dev_gen(dev) < ILO_GEN(8))
+ assert(!info->is_integer);
+ return true;
+ }
+
+ if (info->aux_disable)
+ return false;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 326:
+ *
+ * "When MCS is buffer is used for color clear of non-multisampler
+ * render target, the following restrictions apply.
+ * - Support is limited to tiled render targets.
+ * - Support is for non-mip-mapped and non-array surface types only.
+ * - Clear is supported only on the full RT; i.e., no partial clear or
+ * overlapping clears.
+ * - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
+ * 64bpp and 128bpp.
+ * ..."
+ *
+ * How about SURFTYPE_3D?
+ */
+ if (!info->bind_surface_dp_render ||
+ tiling == GEN6_TILING_NONE ||
+ info->level_count > 1 ||
+ info->array_size > 1)
+ return false;
+
+ switch (info->block_size) {
+ case 4:
+ case 8:
+ case 16:
+ return true;
+ default:
+ return false;
}
}
static void
-img_init_alignments(struct ilo_image *img,
- const struct ilo_image_params *params)
+image_get_gen6_alignments(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ int *align_i, int *align_j)
{
- const struct pipe_resource *templ = params->templ;
+ ILO_DEV_ASSERT(dev, 6, 6);
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 113:
@@ -335,13 +422,33 @@ img_init_alignments(struct ilo_image *img,
*
* align_i align_j
* compressed formats block width block height
- * PIPE_FORMAT_S8_UINT 4 2
+ * GEN6_FORMAT_R8_UINT 4 2
* other depth/stencil formats 4 4
* 4x multisampled 4 4
* bpp 96 4 2
* others 4 2 or 4
*/
+ *align_i = (info->compressed) ? info->block_width : 4;
+ if (info->compressed) {
+ *align_j = info->block_height;
+ } else if (info->bind_zs) {
+ *align_j = (info->format == GEN6_FORMAT_R8_UINT) ? 2 : 4;
+ } else {
+ *align_j = (info->sample_count > 1 || info->block_size != 12) ? 4 : 2;
+ }
+}
+
+static void
+image_get_gen7_alignments(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ enum gen_surface_tiling tiling,
+ int *align_i, int *align_j)
+{
+ int i, j;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
/*
* From the Ivy Bridge PRM, volume 1 part 1, page 110:
*
@@ -383,465 +490,301 @@ img_init_alignments(struct ilo_image *img,
*
* align_i align_j
* compressed formats block width block height
- * PIPE_FORMAT_Z16_UNORM 8 4
- * PIPE_FORMAT_S8_UINT 8 8
+ * GEN6_FORMAT_R16_UNORM 8 4
+ * GEN6_FORMAT_R8_UINT 8 8
* other depth/stencil formats 4 4
* 2x or 4x multisampled 4 or 8 4
* tiled Y 4 or 8 4 (if rt)
- * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
+ * GEN6_FORMAT_R32G32B32_FLOAT 4 or 8 2
* others 4 or 8 2 or 4
*/
-
- if (params->compressed) {
- /* this happens to be the case */
- img->align_i = img->block_width;
- img->align_j = img->block_height;
- } else if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7)) {
- switch (img->format) {
- case PIPE_FORMAT_Z16_UNORM:
- img->align_i = 8;
- img->align_j = 4;
- break;
- case PIPE_FORMAT_S8_UINT:
- img->align_i = 8;
- img->align_j = 8;
- break;
- default:
- img->align_i = 4;
- img->align_j = 4;
- break;
- }
- } else {
- switch (img->format) {
- case PIPE_FORMAT_S8_UINT:
- img->align_i = 4;
- img->align_j = 2;
- break;
- default:
- img->align_i = 4;
- img->align_j = 4;
- break;
- }
+ if (info->compressed) {
+ i = info->block_width;
+ j = info->block_height;
+ } else if (info->bind_zs) {
+ switch (info->format) {
+ case GEN6_FORMAT_R16_UNORM:
+ i = 8;
+ j = 4;
+ break;
+ case GEN6_FORMAT_R8_UINT:
+ i = 8;
+ j = 8;
+ break;
+ default:
+ i = 4;
+ j = 4;
+ break;
}
} else {
const bool valign_4 =
- (templ->nr_samples > 1) ||
- (ilo_dev_gen(params->dev) >= ILO_GEN(8)) ||
- (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
- img->tiling == GEN6_TILING_Y &&
- (templ->bind & PIPE_BIND_RENDER_TARGET));
+ (info->sample_count > 1 || ilo_dev_gen(dev) >= ILO_GEN(8) ||
+ (tiling == GEN6_TILING_Y && info->bind_surface_dp_render));
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
- ilo_dev_gen(params->dev) <= ILO_GEN(7.5) && valign_4)
- assert(img->format != PIPE_FORMAT_R32G32B32_FLOAT);
+ if (ilo_dev_gen(dev) < ILO_GEN(8) && valign_4)
+ assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
- img->align_i = 4;
- img->align_j = (valign_4) ? 4 : 2;
+ i = 4;
+ j = (valign_4) ? 4 : 2;
}
- /*
- * the fact that align i and j are multiples of block width and height
- * respectively is what makes the size of the bo a multiple of the block
- * size, slices start at block boundaries, and many of the computations
- * work.
- */
- assert(img->align_i % img->block_width == 0);
- assert(img->align_j % img->block_height == 0);
-
- /* make sure align() works */
- assert(util_is_power_of_two(img->align_i) &&
- util_is_power_of_two(img->align_j));
- assert(util_is_power_of_two(img->block_width) &&
- util_is_power_of_two(img->block_height));
-}
-
-static void
-img_init_tiling(struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
- unsigned preferred_tilings = params->valid_tilings;
-
- /* no fencing nor BLT support */
- if (preferred_tilings & ~IMAGE_TILING_W)
- preferred_tilings &= ~IMAGE_TILING_W;
-
- if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
- /*
- * heuristically set a minimum width/height for enabling tiling
- */
- if (img->width0 < 64 && (preferred_tilings & ~IMAGE_TILING_X))
- preferred_tilings &= ~IMAGE_TILING_X;
-
- if ((img->width0 < 32 || img->height0 < 16) &&
- (img->width0 < 16 || img->height0 < 32) &&
- (preferred_tilings & ~IMAGE_TILING_Y))
- preferred_tilings &= ~IMAGE_TILING_Y;
- } else {
- /* force linear if we are not sure where the texture is bound to */
- if (preferred_tilings & IMAGE_TILING_NONE)
- preferred_tilings &= IMAGE_TILING_NONE;
- }
-
- /* prefer tiled over linear */
- if (preferred_tilings & IMAGE_TILING_Y)
- img->tiling = GEN6_TILING_Y;
- else if (preferred_tilings & IMAGE_TILING_X)
- img->tiling = GEN6_TILING_X;
- else if (preferred_tilings & IMAGE_TILING_W)
- img->tiling = GEN8_TILING_W;
- else
- img->tiling = GEN6_TILING_NONE;
-}
-
-static void
-img_init_walk_gen7(struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
-
- /*
- * It is not explicitly states, but render targets are expected to be
- * UMS/CMS (samples non-interleaved) and depth/stencil buffers are expected
- * to be IMS (samples interleaved).
- *
- * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
- */
- if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
- /*
- * From the Ivy Bridge PRM, volume 1 part 1, page 111:
- *
- * "note that the depth buffer and stencil buffer have an implied
- * value of ARYSPC_FULL"
- */
- img->walk = (templ->target == PIPE_TEXTURE_3D) ?
- ILO_IMAGE_WALK_3D : ILO_IMAGE_WALK_LAYER;
-
- img->interleaved_samples = true;
- } else {
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 66:
- *
- * "If Multisampled Surface Storage Format is MSFMT_MSS and Number
- * of Multisamples is not MULTISAMPLECOUNT_1, this field (Surface
- * Array Spacing) must be set to ARYSPC_LOD0."
- *
- * As multisampled resources are not mipmapped, we never use
- * ARYSPC_FULL for them.
- */
- if (templ->nr_samples > 1)
- assert(templ->last_level == 0);
-
- img->walk =
- (templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
- (templ->last_level > 0) ? ILO_IMAGE_WALK_LAYER :
- ILO_IMAGE_WALK_LOD;
-
- img->interleaved_samples = false;
- }
-}
-
-static void
-img_init_walk_gen6(struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- /*
- * From the Sandy Bridge PRM, volume 1 part 1, page 115:
- *
- * "The separate stencil buffer does not support mip mapping, thus the
- * storage for LODs other than LOD 0 is not needed. The following
- * QPitch equation applies only to the separate stencil buffer:
- *
- * QPitch = h_0"
- *
- * GEN6 does not support compact spacing otherwise.
- */
- img->walk =
- (params->templ->target == PIPE_TEXTURE_3D) ? ILO_IMAGE_WALK_3D :
- (img->format == PIPE_FORMAT_S8_UINT) ? ILO_IMAGE_WALK_LOD :
- ILO_IMAGE_WALK_LAYER;
-
- /* GEN6 supports only interleaved samples */
- img->interleaved_samples = true;
-}
-
-static void
-img_init_walk(struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
- img_init_walk_gen7(img, params);
- else
- img_init_walk_gen6(img, params);
-}
-
-static unsigned
-img_get_valid_tilings(const struct ilo_image *img,
- const struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
- const enum pipe_format format = img->format;
- unsigned valid_tilings = params->valid_tilings;
-
- /*
- * From the Sandy Bridge PRM, volume 1 part 2, page 32:
- *
- * "Display/Overlay Y-Major not supported.
- * X-Major required for Async Flips"
- */
- if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
- valid_tilings &= IMAGE_TILING_X;
-
- /*
- * From the Sandy Bridge PRM, volume 3 part 2, page 158:
- *
- * "The cursor surface address must be 4K byte aligned. The cursor must
- * be in linear memory, it cannot be tiled."
- */
- if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
- valid_tilings &= IMAGE_TILING_NONE;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 318:
- *
- * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
- * Depth Buffer is not supported."
- *
- * "The Depth Buffer, if tiled, must use Y-Major tiling."
- *
- * From the Sandy Bridge PRM, volume 1 part 2, page 22:
- *
- * "W-Major Tile Format is used for separate stencil."
- */
- if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
- switch (format) {
- case PIPE_FORMAT_S8_UINT:
- valid_tilings &= IMAGE_TILING_W;
- break;
- default:
- valid_tilings &= IMAGE_TILING_Y;
- break;
- }
- }
-
- if (templ->bind & PIPE_BIND_RENDER_TARGET) {
- /*
- * From the Sandy Bridge PRM, volume 1 part 2, page 32:
- *
- * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
- * either TileX or Linear."
- *
- * From the Haswell PRM, volume 5, page 32:
- *
- * "NOTE: 128 BPP format color buffer (render target) supports
- * Linear, TiledX and TiledY."
- */
- if (ilo_dev_gen(params->dev) < ILO_GEN(7.5) && img->block_size == 16)
- valid_tilings &= ~IMAGE_TILING_Y;
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 63:
- *
- * "This field (Surface Vertical Aligment) must be set to VALIGN_4
- * for all tiled Y Render Target surfaces."
- *
- * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
- */
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7) &&
- ilo_dev_gen(params->dev) <= ILO_GEN(7.5) &&
- img->format == PIPE_FORMAT_R32G32B32_FLOAT)
- valid_tilings &= ~IMAGE_TILING_Y;
-
- valid_tilings &= ~IMAGE_TILING_W;
- }
-
- if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
- if (ilo_dev_gen(params->dev) < ILO_GEN(8))
- valid_tilings &= ~IMAGE_TILING_W;
- }
-
- /* no conflicting binding flags */
- assert(valid_tilings);
-
- return valid_tilings;
-}
-
-static void
-img_init_size_and_format(struct ilo_image *img,
- struct ilo_image_params *params)
-{
- const struct pipe_resource *templ = params->templ;
- enum pipe_format format = templ->format;
- bool require_separate_stencil = false;
-
- img->target = templ->target;
- img->width0 = templ->width0;
- img->height0 = templ->height0;
- img->depth0 = templ->depth0;
- img->array_size = templ->array_size;
- img->level_count = templ->last_level + 1;
- img->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
- *
- * "This field (Separate Stencil Buffer Enable) must be set to the same
- * value (enabled or disabled) as Hierarchical Depth Buffer Enable."
- *
- * GEN7+ requires separate stencil buffers.
- */
- if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
- require_separate_stencil = true;
- else
- require_separate_stencil = (img->aux.type == ILO_IMAGE_AUX_HIZ);
- }
-
- switch (format) {
- case PIPE_FORMAT_ETC1_RGB8:
- format = PIPE_FORMAT_R8G8B8X8_UNORM;
- break;
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- if (require_separate_stencil) {
- format = PIPE_FORMAT_Z24X8_UNORM;
- img->separate_stencil = true;
- }
- break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- if (require_separate_stencil) {
- format = PIPE_FORMAT_Z32_FLOAT;
- img->separate_stencil = true;
- }
- break;
- default:
- break;
- }
-
- img->format = format;
- img->block_width = util_format_get_blockwidth(format);
- img->block_height = util_format_get_blockheight(format);
- img->block_size = util_format_get_blocksize(format);
-
- params->valid_tilings = img_get_valid_tilings(img, params);
- params->compressed = util_format_is_compressed(img->format);
+ *align_i = i;
+ *align_j = j;
}
static bool
-img_want_mcs(const struct ilo_image *img,
- const struct ilo_image_params *params)
+image_init_gen6_hardware_layout(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
{
- const struct pipe_resource *templ = params->templ;
- bool want_mcs = false;
+ ILO_DEV_ASSERT(dev, 6, 8);
- /* MCS is for RT on GEN7+ */
- if (ilo_dev_gen(params->dev) < ILO_GEN(7))
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ layout->walk = image_get_gen7_walk(dev, info);
+ else
+ layout->walk = image_get_gen6_walk(dev, info);
+
+ layout->interleaved_samples =
+ image_get_gen6_interleaved_samples(dev, info);
+
+ layout->valid_tilings = image_get_gen6_valid_tilings(dev, info);
+ if (!layout->valid_tilings)
return false;
- if (templ->target != PIPE_TEXTURE_2D ||
- !(templ->bind & PIPE_BIND_RENDER_TARGET))
- return false;
+ layout->tiling = image_get_gen6_tiling(dev, info, layout->valid_tilings);
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 77:
- *
- * "For Render Target and Sampling Engine Surfaces:If the surface is
- * multisampled (Number of Multisamples any value other than
- * MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
- *
- * "This field must be set to 0 for all SINT MSRTs when all RT channels
- * are not written"
- */
- if (templ->nr_samples > 1 && !util_format_is_pure_sint(templ->format)) {
- want_mcs = true;
- } else if (templ->nr_samples <= 1) {
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 326:
- *
- * "When MCS is buffer is used for color clear of non-multisampler
- * render target, the following restrictions apply.
- * - Support is limited to tiled render targets.
- * - Support is for non-mip-mapped and non-array surface types
- * only.
- * - Clear is supported only on the full RT; i.e., no partial clear
- * or overlapping clears.
- * - MCS buffer for non-MSRT is supported only for RT formats
- * 32bpp, 64bpp and 128bpp.
- * ..."
- */
- if (img->tiling != GEN6_TILING_NONE &&
- templ->last_level == 0 && templ->array_size == 1) {
- switch (img->block_size) {
- case 4:
- case 8:
- case 16:
- want_mcs = true;
- break;
- default:
- break;
- }
- }
+ if (image_get_gen6_hiz_enable(dev, info))
+ layout->aux = ILO_IMAGE_AUX_HIZ;
+ else if (ilo_dev_gen(dev) >= ILO_GEN(7) &&
+ image_get_gen7_mcs_enable(dev, info, layout->tiling))
+ layout->aux = ILO_IMAGE_AUX_MCS;
+ else
+ layout->aux = ILO_IMAGE_AUX_NONE;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ image_get_gen7_alignments(dev, info, layout->tiling,
+ &layout->align_i, &layout->align_j);
+ } else {
+ image_get_gen6_alignments(dev, info,
+ &layout->align_i, &layout->align_j);
}
- return want_mcs;
+ return true;
}
static bool
-img_want_hiz(const struct ilo_image *img,
- const struct ilo_image_params *params)
+image_init_gen6_transfer_layout(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
{
- const struct pipe_resource *templ = params->templ;
- const struct util_format_description *desc =
- util_format_description(templ->format);
+ ILO_DEV_ASSERT(dev, 6, 8);
- if (ilo_debug & ILO_DEBUG_NOHIZ)
- return false;
-
- /* we want 8x4 aligned levels */
- if (templ->target == PIPE_TEXTURE_1D)
- return false;
-
- if (!(templ->bind & PIPE_BIND_DEPTH_STENCIL))
- return false;
-
- if (!util_format_has_depth(desc))
- return false;
-
- /* no point in having HiZ */
- if (templ->usage == PIPE_USAGE_STAGING)
- return false;
-
- /*
- * As can be seen in img_calculate_hiz_size(), HiZ may not be enabled
- * for every level. This is generally fine except on GEN6, where HiZ and
- * separate stencil are enabled and disabled at the same time. When the
- * format is PIPE_FORMAT_Z32_FLOAT_S8X24_UINT, enabling and disabling HiZ
- * can result in incompatible formats.
- */
- if (ilo_dev_gen(params->dev) == ILO_GEN(6) &&
- templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
- templ->last_level)
- return false;
+ /* we can define our own layout to save space */
+ layout->walk = ILO_IMAGE_WALK_LOD;
+ layout->interleaved_samples = false;
+ layout->valid_tilings = IMAGE_TILING_NONE;
+ layout->tiling = GEN6_TILING_NONE;
+ layout->aux = ILO_IMAGE_AUX_NONE;
+ layout->align_i = info->block_width;
+ layout->align_j = info->block_height;
return true;
}
static void
-img_init_aux(struct ilo_image *img,
- const struct ilo_image_params *params)
+image_get_gen6_slice_size(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout,
+ uint8_t level,
+ int *width, int *height)
{
- if (img_want_hiz(img, params))
- img->aux.type = ILO_IMAGE_AUX_HIZ;
- else if (img_want_mcs(img, params))
- img->aux.type = ILO_IMAGE_AUX_MCS;
+ int w, h;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ w = u_minify(info->width, level);
+ h = u_minify(info->height, level);
+
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 114:
+ *
+ * "The dimensions of the mip maps are first determined by applying the
+ * sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
+ * if necessary, they are padded out to compression block boundaries."
+ */
+ w = align(w, info->block_width);
+ h = align(h, info->block_height);
+
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 111:
+ *
+ * "If the surface is multisampled (4x), these values must be adjusted
+ * as follows before proceeding:
+ *
+ * W_L = ceiling(W_L / 2) * 4
+ * H_L = ceiling(H_L / 2) * 4"
+ *
+ * From the Ivy Bridge PRM, volume 1 part 1, page 108:
+ *
+ * "If the surface is multisampled and it is a depth or stencil surface
+ * or Multisampled Surface StorageFormat in SURFACE_STATE is
+ * MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
+ * proceeding:
+ *
+ * #samples W_L = H_L =
+ * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
+ * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
+ * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
+ * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
+ *
+ * For interleaved samples (4x), where pixels
+ *
+ * (x, y ) (x+1, y )
+ * (x, y+1) (x+1, y+1)
+ *
+ * would be is occupied by
+ *
+ * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
+ * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
+ * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
+ * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
+ *
+ * Thus the need to
+ *
+ * w = align(w, 2) * 2;
+ * y = align(y, 2) * 2;
+ */
+ if (layout->interleaved_samples) {
+ switch (info->sample_count) {
+ case 1:
+ break;
+ case 2:
+ w = align(w, 2) * 2;
+ break;
+ case 4:
+ w = align(w, 2) * 2;
+ h = align(h, 2) * 2;
+ break;
+ case 8:
+ w = align(w, 2) * 4;
+ h = align(h, 2) * 2;
+ break;
+ case 16:
+ w = align(w, 2) * 4;
+ h = align(h, 2) * 4;
+ break;
+ default:
+ assert(!"unsupported sample count");
+ break;
+ }
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 1 part 1, page 108:
+ *
+ * "For separate stencil buffer, the width must be mutiplied by 2 and
+ * height divided by 2..."
+ *
+ * To make things easier (for transfer), we will just double the stencil
+ * stride in 3DSTATE_STENCIL_BUFFER.
+ */
+ w = align(w, layout->align_i);
+ h = align(h, layout->align_j);
+
+ *width = w;
+ *height = h;
+}
+
+static int
+image_get_gen6_layer_count(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout)
+{
+ int count = info->array_size;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /* samples of the same index are stored in a layer */
+ if (!layout->interleaved_samples)
+ count *= info->sample_count;
+
+ return count;
}
static void
-img_align(struct ilo_image *img, struct ilo_image_params *params)
+image_get_gen6_walk_layer_heights(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ layout->walk_layer_h0 = layout->lods[0].slice_height;
+
+ if (info->level_count > 1) {
+ layout->walk_layer_h1 = layout->lods[1].slice_height;
+ } else {
+ int dummy;
+ image_get_gen6_slice_size(dev, info, layout, 1,
+ &dummy, &layout->walk_layer_h1);
+ }
+
+ if (image_get_gen6_layer_count(dev, info, layout) == 1) {
+ layout->walk_layer_height = 0;
+ return;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 115:
+ *
+ * "The following equation is used for surface formats other than
+ * compressed textures:
+ *
+ * QPitch = (h0 + h1 + 11j)"
+ *
+ * "The equation for compressed textures (BC* and FXT1 surface formats)
+ * follows:
+ *
+ * QPitch = (h0 + h1 + 11j) / 4"
+ *
+ * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
+ * value calculated in the equation above, for every other odd Surface
+ * Height starting from 1 i.e. 1,5,9,13"
+ *
+ * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
+ *
+ * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
+ * buffer and stencil buffer have an implied value of ARYSPC_FULL):
+ *
+ * QPitch = (h0 + h1 + 12j)
+ * QPitch = (h0 + h1 + 12j) / 4 (compressed)
+ *
+ * (There are many typos or missing words here...)"
+ *
+ * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
+ * the base address. The PRM divides QPitch by 4 for compressed formats
+ * because the block height for those formats are 4, and it wants QPitch to
+ * mean the number of memory rows, as opposed to texel rows, between
+ * slices. Since we use texel rows everywhere, we do not need to divide
+ * QPitch by 4.
+ */
+ layout->walk_layer_height = layout->walk_layer_h0 + layout->walk_layer_h1 +
+ ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
+
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && info->sample_count > 1 &&
+ info->height % 4 == 1)
+ layout->walk_layer_height += 4;
+}
+
+static void
+image_get_gen6_monolithic_size(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout,
+ int max_x, int max_y)
{
- const struct pipe_resource *templ = params->templ;
int align_w = 1, align_h = 1, pad_h = 0;
+ ILO_DEV_ASSERT(dev, 6, 8);
+
/*
* From the Sandy Bridge PRM, volume 1 part 1, page 118:
*
@@ -864,15 +807,15 @@ img_align(struct ilo_image *img, struct ilo_image_params *params)
* padding purposes. The value of 4 for j still applies for mip level
* alignment and QPitch calculation."
*/
- if (templ->bind & PIPE_BIND_SAMPLER_VIEW) {
- align_w = MAX2(align_w, img->align_i);
- align_h = MAX2(align_h, img->align_j);
+ if (info->bind_surface_sampler) {
+ align_w = MAX2(align_w, layout->align_i);
+ align_h = MAX2(align_h, layout->align_j);
- if (templ->target == PIPE_TEXTURE_CUBE)
+ if (info->type == GEN6_SURFTYPE_CUBE)
pad_h += 2;
- if (params->compressed)
- align_h = MAX2(align_h, img->align_j * 2);
+ if (info->compressed)
+ align_h = MAX2(align_h, layout->align_j * 2);
}
/*
@@ -881,149 +824,288 @@ img_align(struct ilo_image *img, struct ilo_image_params *params)
* "If the surface contains an odd number of rows of data, a final row
* below the surface must be allocated."
*/
- if (templ->bind & PIPE_BIND_RENDER_TARGET)
+ if (info->bind_surface_dp_render)
align_h = MAX2(align_h, 2);
/*
* Depth Buffer Clear/Resolve works in 8x4 sample blocks. Pad to allow HiZ
* for unaligned non-mipmapped and non-array images.
*/
- if (img->aux.type == ILO_IMAGE_AUX_HIZ &&
- templ->last_level == 0 &&
- templ->array_size == 1 &&
- templ->depth0 == 1) {
+ if (layout->aux == ILO_IMAGE_AUX_HIZ &&
+ info->level_count == 1 && info->array_size == 1 && info->depth == 1) {
align_w = MAX2(align_w, 8);
align_h = MAX2(align_h, 4);
}
- params->max_x = align(params->max_x, align_w);
- params->max_y = align(params->max_y + pad_h, align_h);
+ layout->monolithic_width = align(max_x, align_w);
+ layout->monolithic_height = align(max_y + pad_h, align_h);
}
-/* note that this may force the texture to be linear */
static void
-img_calculate_bo_size(struct ilo_image *img,
- const struct ilo_image_params *params)
+image_get_gen6_lods(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
{
- assert(params->max_x % img->block_width == 0);
- assert(params->max_y % img->block_height == 0);
- assert(img->walk_layer_height % img->block_height == 0);
+ const int layer_count = image_get_gen6_layer_count(dev, info, layout);
+ int cur_x, cur_y, max_x, max_y;
+ uint8_t lv;
- img->bo_stride =
- (params->max_x / img->block_width) * img->block_size;
- img->bo_height = params->max_y / img->block_height;
+ ILO_DEV_ASSERT(dev, 6, 8);
- while (true) {
- unsigned w = img->bo_stride, h = img->bo_height;
- unsigned align_w, align_h;
+ cur_x = 0;
+ cur_y = 0;
+ max_x = 0;
+ max_y = 0;
+ for (lv = 0; lv < info->level_count; lv++) {
+ int slice_w, slice_h, lod_w, lod_h;
- /*
- * From the Haswell PRM, volume 5, page 163:
- *
- * "For linear surfaces, additional padding of 64 bytes is required
- * at the bottom of the surface. This is in addition to the padding
- * required above."
- */
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7.5) &&
- (params->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
- img->tiling == GEN6_TILING_NONE)
- h += (64 + img->bo_stride - 1) / img->bo_stride;
+ image_get_gen6_slice_size(dev, info, layout, lv, &slice_w, &slice_h);
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "- For linear render target surfaces, the pitch must be a
- * multiple of the element size for non-YUV surface formats.
- * Pitch must be a multiple of 2 * element size for YUV surface
- * formats.
- * - For other linear surfaces, the pitch can be any multiple of
- * bytes.
- * - For tiled surfaces, the pitch must be a multiple of the tile
- * width."
- *
- * Different requirements may exist when the bo is used in different
- * places, but our alignments here should be good enough that we do not
- * need to check params->templ->bind.
- */
- switch (img->tiling) {
- case GEN6_TILING_X:
- align_w = 512;
- align_h = 8;
+ layout->lods[lv].x = cur_x;
+ layout->lods[lv].y = cur_y;
+ layout->lods[lv].slice_width = slice_w;
+ layout->lods[lv].slice_height = slice_h;
+
+ switch (layout->walk) {
+ case ILO_IMAGE_WALK_LAYER:
+ lod_w = slice_w;
+ lod_h = slice_h;
+
+ /* MIPLAYOUT_BELOW */
+ if (lv == 1)
+ cur_x += lod_w;
+ else
+ cur_y += lod_h;
break;
- case GEN6_TILING_Y:
- align_w = 128;
- align_h = 32;
+ case ILO_IMAGE_WALK_LOD:
+ lod_w = slice_w;
+ lod_h = slice_h * layer_count;
+
+ if (lv == 1)
+ cur_x += lod_w;
+ else
+ cur_y += lod_h;
+
+ /* every LOD begins at tile boundaries */
+ if (info->level_count > 1) {
+ assert(info->format == GEN6_FORMAT_R8_UINT);
+ cur_x = align(cur_x, 64);
+ cur_y = align(cur_y, 64);
+ }
break;
- case GEN8_TILING_W:
- /*
- * From the Sandy Bridge PRM, volume 1 part 2, page 22:
- *
- * "A 4KB tile is subdivided into 8-high by 8-wide array of
- * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
- * bytes."
- */
- align_w = 64;
- align_h = 64;
+ case ILO_IMAGE_WALK_3D:
+ {
+ const int slice_count = u_minify(info->depth, lv);
+ const int slice_count_per_row = 1 << lv;
+ const int row_count =
+ (slice_count + slice_count_per_row - 1) / slice_count_per_row;
+
+ lod_w = slice_w * slice_count_per_row;
+ lod_h = slice_h * row_count;
+ }
+
+ cur_y += lod_h;
break;
default:
- assert(img->tiling == GEN6_TILING_NONE);
- /* some good enough values */
- align_w = 64;
- align_h = 2;
+ assert(!"unknown walk type");
+ lod_w = 0;
+ lod_h = 0;
break;
}
- w = align(w, align_w);
- h = align(h, align_h);
-
- /* make sure the bo is mappable */
- if (img->tiling != GEN6_TILING_NONE) {
- /*
- * Usually only the first 256MB of the GTT is mappable.
- *
- * See also how intel_context::max_gtt_map_object_size is calculated.
- */
- const size_t mappable_gtt_size = 256 * 1024 * 1024;
-
- /*
- * Be conservative. We may be able to switch from VALIGN_4 to
- * VALIGN_2 if the image was Y-tiled, but let's keep it simple.
- */
- if (mappable_gtt_size / w / 4 < h) {
- if (params->valid_tilings & IMAGE_TILING_NONE) {
- img->tiling = GEN6_TILING_NONE;
- /* MCS support for non-MSRTs is limited to tiled RTs */
- if (img->aux.type == ILO_IMAGE_AUX_MCS &&
- params->templ->nr_samples <= 1)
- img->aux.type = ILO_IMAGE_AUX_NONE;
-
- continue;
- } else {
- ilo_warn("cannot force texture to be linear\n");
- }
- }
- }
-
- img->bo_stride = w;
- img->bo_height = h;
- break;
+ if (max_x < layout->lods[lv].x + lod_w)
+ max_x = layout->lods[lv].x + lod_w;
+ if (max_y < layout->lods[lv].y + lod_h)
+ max_y = layout->lods[lv].y + lod_h;
}
+
+ if (layout->walk == ILO_IMAGE_WALK_LAYER) {
+ image_get_gen6_walk_layer_heights(dev, info, layout);
+ if (layer_count > 1)
+ max_y += layout->walk_layer_height * (layer_count - 1);
+ } else {
+ layout->walk_layer_h0 = 0;
+ layout->walk_layer_h1 = 0;
+ layout->walk_layer_height = 0;
+ }
+
+ image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y);
}
-static void
-img_calculate_hiz_size(struct ilo_image *img,
- const struct ilo_image_params *params)
+static bool
+image_bind_gpu(const struct ilo_image_info *info)
{
- const struct pipe_resource *templ = params->templ;
- const unsigned hz_align_j = 8;
+ return (info->bind_surface_sampler ||
+ info->bind_surface_dp_render ||
+ info->bind_surface_dp_typed ||
+ info->bind_zs ||
+ info->bind_scanout ||
+ info->bind_cursor);
+}
+
+static bool
+image_validate_gen6(const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 314:
+ *
+ * "The separate stencil buffer is always enabled, thus the field in
+ * 3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil
+ * buffer has been removed Surface formats with interleaved depth and
+ * stencil are no longer supported"
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->bind_zs)
+ assert(!info->interleaved_stencil);
+
+ return true;
+}
+
+static bool
+image_get_gen6_layout(const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ struct ilo_image_layout *layout)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (!image_validate_gen6(dev, info))
+ return false;
+
+ if (image_bind_gpu(info) || info->level_count > 1) {
+ if (!image_init_gen6_hardware_layout(dev, info, layout))
+ return false;
+ } else {
+ if (!image_init_gen6_transfer_layout(dev, info, layout))
+ return false;
+ }
+
+ /*
+ * the fact that align i and j are multiples of block width and height
+ * respectively is what makes the size of the bo a multiple of the block
+ * size, slices start at block boundaries, and many of the computations
+ * work.
+ */
+ assert(layout->align_i % info->block_width == 0);
+ assert(layout->align_j % info->block_height == 0);
+
+ /* make sure align() works */
+ assert(util_is_power_of_two(layout->align_i) &&
+ util_is_power_of_two(layout->align_j));
+ assert(util_is_power_of_two(info->block_width) &&
+ util_is_power_of_two(info->block_height));
+
+ image_get_gen6_lods(dev, info, layout);
+
+ assert(layout->walk_layer_height % info->block_height == 0);
+ assert(layout->monolithic_width % info->block_width == 0);
+ assert(layout->monolithic_height % info->block_height == 0);
+
+ return true;
+}
+
+static bool
+image_set_gen6_bo_size(struct ilo_image *img,
+ const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout)
+{
+ int stride, height;
+ int align_w, align_h;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ stride = (layout->monolithic_width / info->block_width) * info->block_size;
+ height = layout->monolithic_height / info->block_height;
+
+ /*
+ * From the Haswell PRM, volume 5, page 163:
+ *
+ * "For linear surfaces, additional padding of 64 bytes is required
+ * at the bottom of the surface. This is in addition to the padding
+ * required above."
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && info->bind_surface_sampler &&
+ layout->tiling == GEN6_TILING_NONE)
+ height += (64 + stride - 1) / stride;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+ *
+ * "- For linear render target surfaces, the pitch must be a multiple
+ * of the element size for non-YUV surface formats. Pitch must be a
+ * multiple of 2 * element size for YUV surface formats.
+ *
+ * - For other linear surfaces, the pitch can be any multiple of
+ * bytes.
+ * - For tiled surfaces, the pitch must be a multiple of the tile
+ * width."
+ *
+ * Different requirements may exist when the image is used in different
+ * places, but our alignments here should be good enough that we do not
+ * need to check info->bind_x.
+ */
+ switch (layout->tiling) {
+ case GEN6_TILING_X:
+ align_w = 512;
+ align_h = 8;
+ break;
+ case GEN6_TILING_Y:
+ align_w = 128;
+ align_h = 32;
+ break;
+ case GEN8_TILING_W:
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 2, page 22:
+ *
+ * "A 4KB tile is subdivided into 8-high by 8-wide array of
+ * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
+ * bytes."
+ */
+ align_w = 64;
+ align_h = 64;
+ break;
+ default:
+ assert(layout->tiling == GEN6_TILING_NONE);
+ /* some good enough values */
+ align_w = 64;
+ align_h = 2;
+ break;
+ }
+
+ if (info->force_bo_stride) {
+ if (info->force_bo_stride % align_w || info->force_bo_stride < stride)
+ return false;
+
+ img->bo_stride = info->force_bo_stride;
+ } else {
+ img->bo_stride = align(stride, align_w);
+ }
+
+ img->bo_height = align(height, align_h);
+
+ return true;
+}
+
+static bool
+image_set_gen6_hiz(struct ilo_image *img,
+ const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout)
+{
+ const int hz_align_j = 8;
enum ilo_image_walk_type hz_walk;
- unsigned hz_width, hz_height, lv;
- unsigned hz_clear_w, hz_clear_h;
+ int hz_width, hz_height;
+ int hz_clear_w, hz_clear_h;
+ uint8_t lv;
- assert(img->aux.type == ILO_IMAGE_AUX_HIZ);
+ ILO_DEV_ASSERT(dev, 6, 8);
- assert(img->walk == ILO_IMAGE_WALK_LAYER ||
- img->walk == ILO_IMAGE_WALK_3D);
+ assert(layout->aux == ILO_IMAGE_AUX_HIZ);
+
+ assert(layout->walk == ILO_IMAGE_WALK_LAYER ||
+ layout->walk == ILO_IMAGE_WALK_3D);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 312:
@@ -1036,8 +1118,8 @@ img_calculate_hiz_size(struct ilo_image *img,
*
* We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
*/
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
- hz_walk = img->walk;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ hz_walk = layout->walk;
else
hz_walk = ILO_IMAGE_WALK_LOD;
@@ -1051,16 +1133,16 @@ img_calculate_hiz_size(struct ilo_image *img,
switch (hz_walk) {
case ILO_IMAGE_WALK_LAYER:
{
- const unsigned h0 = align(params->h0, hz_align_j);
- const unsigned h1 = align(params->h1, hz_align_j);
- const unsigned htail =
- ((ilo_dev_gen(params->dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
- const unsigned hz_qpitch = h0 + h1 + htail;
+ const int h0 = align(layout->walk_layer_h0, hz_align_j);
+ const int h1 = align(layout->walk_layer_h1, hz_align_j);
+ const int htail =
+ ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
+ const int hz_qpitch = h0 + h1 + htail;
- hz_width = align(img->lods[0].slice_width, 16);
+ hz_width = align(layout->lods[0].slice_width, 16);
- hz_height = hz_qpitch * templ->array_size / 2;
- if (ilo_dev_gen(params->dev) >= ILO_GEN(7))
+ hz_height = hz_qpitch * info->array_size / 2;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
hz_height = align(hz_height, 8);
img->aux.walk_layer_height = hz_qpitch;
@@ -1068,27 +1150,27 @@ img_calculate_hiz_size(struct ilo_image *img,
break;
case ILO_IMAGE_WALK_LOD:
{
- unsigned lod_tx[PIPE_MAX_TEXTURE_LEVELS];
- unsigned lod_ty[PIPE_MAX_TEXTURE_LEVELS];
- unsigned cur_tx, cur_ty;
+ int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT];
+ int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT];
+ int cur_tx, cur_ty;
/* figure out the tile offsets of LODs */
hz_width = 0;
hz_height = 0;
cur_tx = 0;
cur_ty = 0;
- for (lv = 0; lv <= templ->last_level; lv++) {
- unsigned tw, th;
+ for (lv = 0; lv < info->level_count; lv++) {
+ int tw, th;
lod_tx[lv] = cur_tx;
lod_ty[lv] = cur_ty;
- tw = align(img->lods[lv].slice_width, 16);
- th = align(img->lods[lv].slice_height, hz_align_j) *
- templ->array_size / 2;
+ tw = align(layout->lods[lv].slice_width, 16);
+ th = align(layout->lods[lv].slice_height, hz_align_j) *
+ info->array_size / 2;
/* convert to Y-tiles */
- tw = align(tw, 128) / 128;
- th = align(th, 32) / 32;
+ tw = (tw + 127) / 128;
+ th = (th + 31) / 32;
if (hz_width < cur_tx + tw)
hz_width = cur_tx + tw;
@@ -1102,22 +1184,23 @@ img_calculate_hiz_size(struct ilo_image *img,
}
/* convert tile offsets to memory offsets */
- for (lv = 0; lv <= templ->last_level; lv++) {
+ for (lv = 0; lv < info->level_count; lv++) {
img->aux.walk_lod_offsets[lv] =
(lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
}
+
hz_width *= 128;
hz_height *= 32;
}
break;
case ILO_IMAGE_WALK_3D:
- hz_width = align(img->lods[0].slice_width, 16);
+ hz_width = align(layout->lods[0].slice_width, 16);
hz_height = 0;
- for (lv = 0; lv <= templ->last_level; lv++) {
- const unsigned h = align(img->lods[lv].slice_height, hz_align_j);
+ for (lv = 0; lv < info->level_count; lv++) {
+ const int h = align(layout->lods[lv].slice_height, hz_align_j);
/* according to the formula, slices are packed together vertically */
- hz_height += h * u_minify(templ->depth0, lv);
+ hz_height += h * u_minify(info->depth, lv);
}
hz_height /= 2;
break;
@@ -1136,8 +1219,7 @@ img_calculate_hiz_size(struct ilo_image *img,
*/
hz_clear_w = 8;
hz_clear_h = 4;
- switch (templ->nr_samples) {
- case 0:
+ switch (info->sample_count) {
case 1:
default:
break;
@@ -1158,33 +1240,38 @@ img_calculate_hiz_size(struct ilo_image *img,
break;
}
- for (lv = 0; lv <= templ->last_level; lv++) {
- if (u_minify(img->width0, lv) % hz_clear_w ||
- u_minify(img->height0, lv) % hz_clear_h)
+ for (lv = 0; lv < info->level_count; lv++) {
+ if (u_minify(info->width, lv) % hz_clear_w ||
+ u_minify(info->height, lv) % hz_clear_h)
break;
img->aux.enables |= 1 << lv;
}
- /* we padded to allow this in img_align() */
- if (templ->last_level == 0 && templ->array_size == 1 && templ->depth0 == 1)
+ /* we padded to allow this in image_get_gen6_monolithic_size() */
+ if (info->level_count == 1 && info->array_size == 1 && info->depth == 1)
img->aux.enables |= 0x1;
/* align to Y-tile */
img->aux.bo_stride = align(hz_width, 128);
img->aux.bo_height = align(hz_height, 32);
+
+ return true;
}
-static void
-img_calculate_mcs_size(struct ilo_image *img,
- const struct ilo_image_params *params)
+static bool
+image_set_gen7_mcs(struct ilo_image *img,
+ const struct ilo_dev *dev,
+ const struct ilo_image_info *info,
+ const struct ilo_image_layout *layout)
{
- const struct pipe_resource *templ = params->templ;
int mcs_width, mcs_height, mcs_cpp;
int downscale_x, downscale_y;
- assert(img->aux.type == ILO_IMAGE_AUX_MCS);
+ ILO_DEV_ASSERT(dev, 7, 8);
- if (templ->nr_samples > 1) {
+ assert(layout->aux == ILO_IMAGE_AUX_MCS);
+
+ if (info->sample_count > 1) {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
* rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA. The
@@ -1198,7 +1285,7 @@ img_calculate_mcs_size(struct ilo_image *img,
* RT. Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
* pixel block in the RT.
*/
- switch (templ->nr_samples) {
+ switch (info->sample_count) {
case 2:
case 4:
downscale_x = 8;
@@ -1217,7 +1304,7 @@ img_calculate_mcs_size(struct ilo_image *img,
break;
default:
assert(!"unsupported sample count");
- return;
+ return false;
break;
}
@@ -1226,8 +1313,8 @@ img_calculate_mcs_size(struct ilo_image *img,
* clear rectangle cannot be masked. The scale-down clear rectangle
* thus must be aligned to 2x2, and we need to pad.
*/
- mcs_width = align(img->width0, downscale_x * 2);
- mcs_height = align(img->height0, downscale_y * 2);
+ mcs_width = align(info->width, downscale_x * 2);
+ mcs_height = align(info->height, downscale_y * 2);
} else {
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 327:
@@ -1262,18 +1349,18 @@ img_calculate_mcs_size(struct ilo_image *img,
* anything except for the size of the allocated MCS. Let's see if we
* hit out-of-bound access.
*/
- switch (img->tiling) {
+ switch (layout->tiling) {
case GEN6_TILING_X:
- downscale_x = 64 / img->block_size;
+ downscale_x = 64 / info->block_size;
downscale_y = 2;
break;
case GEN6_TILING_Y:
- downscale_x = 32 / img->block_size;
+ downscale_x = 32 / info->block_size;
downscale_y = 4;
break;
default:
assert(!"unsupported tiling mode");
- return;
+ return false;
break;
}
@@ -1290,181 +1377,75 @@ img_calculate_mcs_size(struct ilo_image *img,
* The scaled-down clear rectangle must be aligned to 4x4 instead of
* 2x2, and we need to pad.
*/
- mcs_width = align(img->width0, downscale_x * 4) / downscale_x;
- mcs_height = align(img->height0, downscale_y * 4) / downscale_y;
+ mcs_width = align(info->width, downscale_x * 4) / downscale_x;
+ mcs_height = align(info->height, downscale_y * 4) / downscale_y;
mcs_cpp = 16; /* an OWord */
}
- img->aux.enables = (1 << (templ->last_level + 1)) - 1;
+ img->aux.enables = (1 << info->level_count) - 1;
/* align to Y-tile */
img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
img->aux.bo_height = align(mcs_height, 32);
+
+ return true;
}
-static void
-img_init(struct ilo_image *img,
- struct ilo_image_params *params)
+bool
+ilo_image_init(struct ilo_image *img,
+ const struct ilo_dev *dev,
+ const struct ilo_image_info *info)
{
- /* there are hard dependencies between every function here */
+ struct ilo_image_layout layout;
- img_init_aux(img, params);
- img_init_size_and_format(img, params);
- img_init_walk(img, params);
- img_init_tiling(img, params);
- img_init_alignments(img, params);
- img_init_lods(img, params);
- img_init_layer_height(img, params);
+ assert(ilo_is_zeroed(img, sizeof(*img)));
- img_align(img, params);
- img_calculate_bo_size(img, params);
+ memset(&layout, 0, sizeof(layout));
+ layout.lods = img->lods;
- img->scanout = (params->templ->bind & PIPE_BIND_SCANOUT);
+ if (!image_get_gen6_layout(dev, info, &layout))
+ return false;
- switch (img->aux.type) {
+ img->type = info->type;
+
+ img->format = info->format;
+ img->block_width = info->block_width;
+ img->block_height = info->block_height;
+ img->block_size = info->block_size;
+
+ img->width0 = info->width;
+ img->height0 = info->height;
+ img->depth0 = info->depth;
+ img->array_size = info->array_size;
+ img->level_count = info->level_count;
+ img->sample_count = info->sample_count;
+
+ img->walk = layout.walk;
+ img->interleaved_samples = layout.interleaved_samples;
+
+ img->tiling = layout.tiling;
+
+ img->aux.type = layout.aux;
+
+ img->align_i = layout.align_i;
+ img->align_j = layout.align_j;
+
+ img->walk_layer_height = layout.walk_layer_height;
+
+ if (!image_set_gen6_bo_size(img, dev, info, &layout))
+ return false;
+
+ img->scanout = info->bind_scanout;
+
+ switch (layout.aux) {
case ILO_IMAGE_AUX_HIZ:
- img_calculate_hiz_size(img, params);
+ image_set_gen6_hiz(img, dev, info, &layout);
break;
case ILO_IMAGE_AUX_MCS:
- img_calculate_mcs_size(img, params);
+ image_set_gen7_mcs(img, dev, info, &layout);
break;
default:
break;
}
-}
-
-/**
- * The texutre is for transfer only. We can define our own layout to save
- * space.
- */
-static void
-img_init_for_transfer(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ)
-{
- const unsigned num_layers = (templ->target == PIPE_TEXTURE_3D) ?
- templ->depth0 : templ->array_size;
- unsigned layer_width, layer_height;
-
- assert(templ->last_level == 0);
- assert(templ->nr_samples <= 1);
-
- img->aux.type = ILO_IMAGE_AUX_NONE;
-
- img->target = templ->target;
- img->width0 = templ->width0;
- img->height0 = templ->height0;
- img->depth0 = templ->depth0;
- img->array_size = templ->array_size;
- img->level_count = 1;
- img->sample_count = 1;
-
- img->format = templ->format;
- img->block_width = util_format_get_blockwidth(templ->format);
- img->block_height = util_format_get_blockheight(templ->format);
- img->block_size = util_format_get_blocksize(templ->format);
-
- img->walk = ILO_IMAGE_WALK_LOD;
-
- img->tiling = GEN6_TILING_NONE;
-
- img->align_i = img->block_width;
- img->align_j = img->block_height;
-
- assert(util_is_power_of_two(img->block_width) &&
- util_is_power_of_two(img->block_height));
-
- /* use packed layout */
- layer_width = align(templ->width0, img->align_i);
- layer_height = align(templ->height0, img->align_j);
-
- img->lods[0].slice_width = layer_width;
- img->lods[0].slice_height = layer_height;
-
- img->bo_stride = (layer_width / img->block_width) * img->block_size;
- img->bo_stride = align(img->bo_stride, 64);
-
- img->bo_height = (layer_height / img->block_height) * num_layers;
-}
-
-/**
- * Initialize the image. Callers should zero-initialize \p img first.
- */
-void ilo_image_init(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ)
-{
- struct ilo_image_params params;
- bool transfer_only;
-
- assert(ilo_is_zeroed(img, sizeof(*img)));
-
- /* use transfer layout when the texture is never bound to GPU */
- transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
- PIPE_BIND_TRANSFER_READ));
- if (transfer_only && templ->last_level == 0 && templ->nr_samples <= 1) {
- img_init_for_transfer(img, dev, templ);
- return;
- }
-
- memset(¶ms, 0, sizeof(params));
- params.dev = dev;
- params.templ = templ;
- params.valid_tilings = IMAGE_TILING_ALL;
-
- img_init(img, ¶ms);
-}
-
-bool
-ilo_image_init_for_imported(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ,
- enum gen_surface_tiling tiling,
- unsigned bo_stride)
-{
- struct ilo_image_params params;
-
- assert(ilo_is_zeroed(img, sizeof(*img)));
-
- if ((tiling == GEN6_TILING_X && bo_stride % 512) ||
- (tiling == GEN6_TILING_Y && bo_stride % 128) ||
- (tiling == GEN8_TILING_W && bo_stride % 64))
- return false;
-
- memset(¶ms, 0, sizeof(params));
- params.dev = dev;
- params.templ = templ;
- params.valid_tilings = 1 << tiling;
-
- img_init(img, ¶ms);
-
- assert(img->tiling == tiling);
- if (img->bo_stride > bo_stride)
- return false;
-
- img->bo_stride = bo_stride;
-
- /* assume imported RTs are also scanouts */
- if (!img->scanout)
- img->scanout = (templ->bind & PIPE_BIND_RENDER_TARGET);
-
- return true;
-}
-
-bool
-ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev)
-{
- /* HiZ is required for separate stencil on Gen6 */
- if (ilo_dev_gen(dev) == ILO_GEN(6) &&
- img->aux.type == ILO_IMAGE_AUX_HIZ &&
- img->separate_stencil)
- return false;
-
- /* MCS is required for multisample images */
- if (img->aux.type == ILO_IMAGE_AUX_MCS &&
- img->sample_count > 1)
- return false;
-
- img->aux.enables = 0x0;
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_image.h b/src/gallium/drivers/ilo/core/ilo_image.h
index af15e856028..646ed6f5727 100644
--- a/src/gallium/drivers/ilo/core/ilo_image.h
+++ b/src/gallium/drivers/ilo/core/ilo_image.h
@@ -29,11 +29,17 @@
#define ILO_IMAGE_H
#include "genhw/genhw.h"
-#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
+/*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 75:
+ *
+ * "(MIP Count / LOD) representing [1,15] MIP levels"
+ */
+#define ILO_IMAGE_MAX_LEVEL_COUNT 15
+
enum ilo_image_aux_type {
ILO_IMAGE_AUX_NONE,
ILO_IMAGE_AUX_HIZ,
@@ -68,6 +74,49 @@ enum ilo_image_walk_type {
ILO_IMAGE_WALK_3D,
};
+struct ilo_image_info {
+ enum gen_surface_type type;
+
+ enum gen_surface_format format;
+ bool interleaved_stencil;
+ bool is_integer;
+ /* width, height and size of pixel blocks */
+ bool compressed;
+ unsigned block_width;
+ unsigned block_height;
+ unsigned block_size;
+
+ /* image size */
+ uint16_t width;
+ uint16_t height;
+ uint16_t depth;
+ uint16_t array_size;
+ uint8_t level_count;
+ uint8_t sample_count;
+
+ /* disable optional aux */
+ bool aux_disable;
+
+ /* tilings to consider, if any bit is set */
+ uint8_t valid_tilings;
+
+ /*
+ * prefer GEN6_TILING_NONE when the (estimated) image size exceeds the
+ * threshold
+ */
+ uint32_t prefer_linear_threshold;
+
+ /* force a stride when non-zero */
+ uint32_t force_bo_stride;
+
+ bool bind_surface_sampler;
+ bool bind_surface_dp_render;
+ bool bind_surface_dp_typed;
+ bool bind_zs;
+ bool bind_scanout;
+ bool bind_cursor;
+};
+
/*
* When the walk type is ILO_IMAGE_WALK_LAYER, there is only a slice in each
* LOD and this is used to describe LODs in the first array layer. Otherwise,
@@ -88,7 +137,10 @@ struct ilo_image_lod {
* Texture layout.
*/
struct ilo_image {
- enum pipe_texture_target target;
+ enum gen_surface_type type;
+
+ enum gen_surface_format format;
+ bool interleaved_stencil;
/* size, format, etc for programming hardware states */
unsigned width0;
@@ -97,8 +149,6 @@ struct ilo_image {
unsigned array_size;
unsigned level_count;
unsigned sample_count;
- enum pipe_format format;
- bool separate_stencil;
/*
* width, height, and size of pixel blocks for conversion between pixel
@@ -117,7 +167,7 @@ struct ilo_image {
unsigned align_i;
unsigned align_j;
- struct ilo_image_lod lods[PIPE_MAX_TEXTURE_LEVELS];
+ struct ilo_image_lod lods[ILO_IMAGE_MAX_LEVEL_COUNT];
/* physical layer height for ILO_IMAGE_WALK_LAYER */
unsigned walk_layer_height;
@@ -136,36 +186,18 @@ struct ilo_image {
unsigned enables;
/* LOD offsets for ILO_IMAGE_WALK_LOD */
- unsigned walk_lod_offsets[PIPE_MAX_TEXTURE_LEVELS];
+ unsigned walk_lod_offsets[ILO_IMAGE_MAX_LEVEL_COUNT];
unsigned walk_layer_height;
unsigned bo_stride;
unsigned bo_height;
-
- /* managed by users */
- struct intel_bo *bo;
} aux;
-
- /* managed by users */
- struct intel_bo *bo;
};
-struct pipe_resource;
-
-void
+bool
ilo_image_init(struct ilo_image *img,
const struct ilo_dev *dev,
- const struct pipe_resource *templ);
-
-bool
-ilo_image_init_for_imported(struct ilo_image *img,
- const struct ilo_dev *dev,
- const struct pipe_resource *templ,
- enum gen_surface_tiling tiling,
- unsigned bo_stride);
-
-bool
-ilo_image_disable_aux(struct ilo_image *img, const struct ilo_dev *dev);
+ const struct ilo_image_info *info);
static inline bool
ilo_image_can_enable_aux(const struct ilo_image *img, unsigned level)
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.c b/src/gallium/drivers/ilo/core/ilo_state_sol.c
index 38c0b719ab3..6ef2c91a592 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_sol.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.c
@@ -26,7 +26,7 @@
*/
#include "ilo_debug.h"
-#include "ilo_buffer.h"
+#include "ilo_vma.h"
#include "ilo_state_sol.h"
static bool
@@ -270,9 +270,6 @@ sol_buffer_validate_gen7(const struct ilo_dev *dev,
{
ILO_DEV_ASSERT(dev, 7, 8);
- if (info->buf)
- assert(info->offset < info->buf->bo_size && info->size);
-
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 208:
*
@@ -281,9 +278,17 @@ sol_buffer_validate_gen7(const struct ilo_dev *dev,
*/
assert(info->offset % 4 == 0);
+ if (info->vma) {
+ assert(info->vma->vm_alignment % 4 == 0);
+ assert(info->size && info->offset + info->size <= info->vma->vm_size);
+ }
+
/* Gen8+ only */
- if (info->write_offset_load || info->write_offset_save)
- assert(ilo_dev_gen(dev) >= ILO_GEN(8));
+ if (info->write_offset_load || info->write_offset_save) {
+ assert(ilo_dev_gen(dev) >= ILO_GEN(8) && info->write_offset_vma);
+ assert(info->write_offset_offset + sizeof(uint32_t) <=
+ info->write_offset_vma->vm_size);
+ }
/*
* From the Broadwell PRM, volume 2b, page 206:
@@ -304,25 +309,15 @@ static uint32_t
sol_buffer_get_gen6_size(const struct ilo_dev *dev,
const struct ilo_state_sol_buffer_info *info)
{
- uint32_t size;
-
ILO_DEV_ASSERT(dev, 6, 8);
- if (!info->buf)
- return 0;
-
- size = (info->offset + info->size <= info->buf->bo_size) ? info->size :
- info->buf->bo_size - info->offset;
-
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 208:
*
* "(Surface End Address) This field specifies the ending DWord
* address..."
*/
- size &= ~3;
-
- return size;
+ return (info->vma) ? info->size & ~3 : 0;
}
static bool
@@ -359,7 +354,7 @@ sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
dw1 = 0;
- if (info->buf)
+ if (info->vma)
dw1 |= GEN8_SO_BUF_DW1_ENABLE;
if (info->write_offset_load)
dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE;
@@ -429,6 +424,15 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
return ilo_state_sol_init(sol, dev, &info);
}
+uint32_t
+ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment)
+{
+ /* DWord aligned without padding */
+ *alignment = 4;
+ return size;
+}
+
bool
ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
const struct ilo_dev *dev,
@@ -443,9 +447,8 @@ ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
else
ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info);
- sb->need_bo = (info->size > 0);
- sb->need_write_offset_bo = (info->write_offset_save ||
- (info->write_offset_load && !info->write_offset_imm_enable));
+ sb->vma = info->vma;
+ sb->write_offset_vma = info->write_offset_vma;
assert(ret);
diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.h b/src/gallium/drivers/ilo/core/ilo_state_sol.h
index 2513fcb4979..92c5f94725b 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_sol.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_sol.h
@@ -107,17 +107,17 @@ struct ilo_state_sol {
uint8_t decl_count;
};
-struct ilo_buffer;
+struct ilo_vma;
struct ilo_state_sol_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
uint32_t offset;
uint32_t size;
- /*
- * Gen8+ only. When enabled, require a write offset bo of at least
- * (sizeof(uint32_t) * ILO_STATE_SOL_MAX_BUFFER_COUNT) bytes
- */
+ /* Gen8+ only; at least sizeof(uint32_t) bytes */
+ const struct ilo_vma *write_offset_vma;
+ uint32_t write_offset_offset;
+
bool write_offset_load;
bool write_offset_save;
@@ -126,14 +126,10 @@ struct ilo_state_sol_buffer_info {
};
struct ilo_state_sol_buffer {
- uint32_t so_buf[4];
+ uint32_t so_buf[5];
- bool need_bo;
- bool need_write_offset_bo;
-
- /* managed by users */
- struct intel_bo *bo;
- struct intel_bo *write_offset_bo;
+ const struct ilo_vma *vma;
+ const struct ilo_vma *write_offset_vma;
};
static inline size_t
@@ -154,6 +150,10 @@ ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
const struct ilo_dev *dev,
bool render_disable);
+uint32_t
+ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment);
+
bool
ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
const struct ilo_dev *dev,
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.c b/src/gallium/drivers/ilo/core/ilo_state_surface.c
index 5be9f8f6270..40fe15f316f 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.c
@@ -26,8 +26,8 @@
*/
#include "ilo_debug.h"
-#include "ilo_buffer.h"
#include "ilo_image.h"
+#include "ilo_vma.h"
#include "ilo_state_surface.h"
static bool
@@ -94,17 +94,129 @@ surface_set_gen7_null_SURFACE_STATE(struct ilo_state_surface *surf,
return true;
}
+static uint32_t
+surface_get_gen6_buffer_offset_alignment(const struct ilo_dev *dev,
+ const struct ilo_state_surface_buffer_info *info)
+{
+ uint32_t alignment;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+ *
+ * "The Base Address for linear render target surfaces and surfaces
+ * accessed with the typed surface read/write data port messages must
+ * be element-size aligned, for non-YUV surface formats, or a multiple
+ * of 2 element-sizes for YUV surface formats. Other linear surfaces
+ * have no alignment requirements (byte alignment is sufficient)."
+ *
+ * "Certain message types used to access surfaces have more stringent
+ * alignment requirements. Please refer to the specific message
+ * documentation for additional restrictions."
+ */
+ switch (info->access) {
+ case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+ /* no alignment requirements */
+ alignment = 1;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+ case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+ /* element-size aligned */
+ alignment = info->format_size;
+
+ assert(info->struct_size % alignment == 0);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+ /*
+ * Nothing is said about Untyped* messages, but I think they require the
+ * base address to be DWord aligned.
+ */
+ alignment = 4;
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+ *
+ * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the
+ * pitch must be a multiple of 4 bytes."
+ */
+ if (info->struct_size > 1)
+ assert(info->struct_size % alignment == 0);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237:
+ *
+ * "the surface base address must be OWord aligned"
+ *
+ * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord
+ * Dual Block Read/Write.
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249:
+ *
+ * "The surface base address must be DWord aligned"
+ *
+ * for DWord Scattered Read/Write and Byte Scattered Read/Write.
+ */
+ alignment = (info->format_size > 4) ? 16 : 4;
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, 237, and
+ * 246:
+ *
+ * "the surface pitch is ignored, the surface is treated as a
+ * 1-dimensional surface. An element size (pitch) of 16 bytes is
+ * used to determine the size of the buffer for out-of-bounds
+ * checking if using the surface state model."
+ *
+ * for OWord Block Read/Write, Unaligned OWord Block Read, OWord
+ * Dual Block Read/Write, and DWord Scattered Read/Write.
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 248:
+ *
+ * "The surface pitch is ignored, the surface is treated as a
+ * 1-dimensional surface. An element size (pitch) of 4 bytes is
+ * used to determine the size of the buffer for out-of-bounds
+ * checking if using the surface state model."
+ *
+ * for Byte Scattered Read/Write.
+ *
+ * It is programmable on Gen7.5+.
+ */
+ if (ilo_dev_gen(dev) < ILO_GEN(7.5)) {
+ const int fixed = (info->format_size > 1) ? 16 : 4;
+ assert(info->struct_size == fixed);
+ }
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 259:
+ *
+ * "Both the surface base address and surface pitch must be DWord
+ * aligned."
+ */
+ alignment = 4;
+
+ assert(info->struct_size % alignment == 0);
+ break;
+ default:
+ assert(!"unknown access");
+ alignment = 1;
+ break;
+ }
+
+ return alignment;
+}
+
static bool
surface_validate_gen6_buffer(const struct ilo_dev *dev,
const struct ilo_state_surface_buffer_info *info)
{
+ uint32_t alignment;
+
ILO_DEV_ASSERT(dev, 6, 8);
- /* SVB writes are Gen6-only */
- if (ilo_dev_gen(dev) >= ILO_GEN(7))
- assert(info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB);
-
- if (info->offset + info->size > info->buf->bo_size) {
+ if (info->offset + info->size > info->vma->vm_size) {
ilo_warn("invalid buffer range\n");
return false;
}
@@ -120,87 +232,34 @@ surface_validate_gen6_buffer(const struct ilo_dev *dev,
return false;
}
+ alignment = surface_get_gen6_buffer_offset_alignment(dev, info);
+ if (info->offset % alignment || info->vma->vm_alignment % alignment) {
+ ilo_warn("bad buffer offset\n");
+ return false;
+ }
+
+ /* no STRBUF on Gen6 */
+ if (info->format == GEN6_FORMAT_RAW && info->struct_size > 1)
+ assert(ilo_dev_gen(dev) >= ILO_GEN(7));
+
+ /* SVB writes are Gen6 only */
+ if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB)
+ assert(ilo_dev_gen(dev) == ILO_GEN(6));
+
/*
- * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+ * From the Ivy Bridge PRM, volume 4 part 1, page 83:
*
- * "The Base Address for linear render target surfaces and surfaces
- * accessed with the typed surface read/write data port messages must
- * be element-size aligned, for non-YUV surface formats, or a multiple
- * of 2 element-sizes for YUV surface formats. Other linear surfaces
- * have no alignment requirements (byte alignment is sufficient)."
+ * "NOTE: "RAW" is supported only with buffers and structured buffers
+ * accessed via the untyped surface read/write and untyped atomic
+ * operation messages, which do not have a column in the table."
*
- * "Certain message types used to access surfaces have more stringent
- * alignment requirements. Please refer to the specific message
- * documentation for additional restrictions."
+ * From the Ivy Bridge PRM, volume 4 part 1, page 252:
*
- * From the Ivy Bridge PRM, volume 4 part 1, page 233, 235, and 237:
- *
- * "the surface base address must be OWord aligned"
- *
- * for OWord Block Read/Write, Unaligned OWord Block Read, and OWord Dual
- * Block Read/Write.
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 246 and 249:
- *
- * "The surface base address must be DWord aligned"
- *
- * for DWord Scattered Read/Write and Byte Scattered Read/Write.
- *
- * We have to rely on users to correctly set info->struct_size here. DWord
- * Scattered Read/Write has conflicting pitch and alignment, but we do not
- * use them yet so we are fine.
- *
- * It is unclear if sampling engine surfaces require aligned offsets.
+ * "For untyped messages, the Surface Format must be RAW and the
+ * Surface Type must be SURFTYPE_BUFFER or SURFTYPE_STRBUF."
*/
- if (info->access != ILO_STATE_SURFACE_ACCESS_DP_SVB) {
- assert(info->struct_size % info->format_size == 0);
-
- if (info->offset % info->struct_size) {
- ilo_warn("bad buffer offset\n");
- return false;
- }
- }
-
- if (info->format == GEN6_FORMAT_RAW) {
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 97:
- *
- * ""RAW" is supported only with buffers and structured buffers
- * accessed via the untyped surface read/write and untyped atomic
- * operation messages, which do not have a column in the table."
- *
- * We do not have a specific access mode for untyped messages.
- */
- assert(info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED);
-
- /*
- * Nothing is said about Untyped* messages, but I guess they require the
- * base address to be DWord aligned.
- */
- if (info->offset % 4) {
- ilo_warn("bad RAW buffer offset\n");
- return false;
- }
-
- if (info->struct_size > 1) {
- /* no STRBUF on Gen6 */
- if (ilo_dev_gen(dev) == ILO_GEN(6)) {
- ilo_warn("no STRBUF support\n");
- return false;
- }
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 70:
- *
- * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the
- * pitch must be a multiple of 4 bytes."
- */
- if (info->struct_size % 4) {
- ilo_warn("bad STRBUF pitch\n");
- return false;
- }
- }
- }
+ assert((info->access == ILO_STATE_SURFACE_ACCESS_DP_UNTYPED) ==
+ (info->format == GEN6_FORMAT_RAW));
return true;
}
@@ -215,8 +274,7 @@ surface_get_gen6_buffer_struct_count(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
c = info->size / info->struct_size;
- if (info->access == ILO_STATE_SURFACE_ACCESS_DP_SVB &&
- info->format_size < info->size - info->struct_size * c)
+ if (info->format_size < info->size - info->struct_size * c)
c++;
/*
@@ -367,29 +425,6 @@ surface_set_gen7_buffer_SURFACE_STATE(struct ilo_state_surface *surf,
return true;
}
-static enum gen_surface_type
-get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- switch (img->target) {
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return GEN6_SURFTYPE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return GEN6_SURFTYPE_2D;
- case PIPE_TEXTURE_3D:
- return GEN6_SURFTYPE_3D;
- default:
- assert(!"unknown texture target");
- return GEN6_SURFTYPE_NULL;
- }
-}
-
static bool
surface_validate_gen6_image(const struct ilo_dev *dev,
const struct ilo_state_surface_image_info *info)
@@ -408,6 +443,17 @@ surface_validate_gen6_image(const struct ilo_dev *dev,
break;
}
+ assert(info->img && info->vma);
+
+ if (info->img->tiling != GEN6_TILING_NONE)
+ assert(info->vma->vm_alignment % 4096 == 0);
+
+ if (info->aux_vma) {
+ assert(ilo_image_can_enable_aux(info->img, info->level_base));
+ /* always tiled */
+ assert(info->aux_vma->vm_alignment % 4096 == 0);
+ }
+
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 78:
*
@@ -418,17 +464,19 @@ surface_validate_gen6_image(const struct ilo_dev *dev,
assert(info->img->bo_stride && info->img->bo_stride <= 512 * 1024 &&
info->img->width0 <= info->img->bo_stride);
- if (info->is_cube_map) {
- assert(get_gen6_surface_type(dev, info->img) == GEN6_SURFTYPE_2D);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 78:
- *
- * "For cube maps, Width must be set equal to the Height."
- */
- assert(info->img->width0 == info->img->height0);
+ if (info->type != info->img->type) {
+ assert(info->type == GEN6_SURFTYPE_2D &&
+ info->img->type == GEN6_SURFTYPE_CUBE);
}
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 78:
+ *
+ * "For cube maps, Width must be set equal to the Height."
+ */
+ if (info->type == GEN6_SURFTYPE_CUBE)
+ assert(info->img->width0 == info->img->height0);
+
/*
* From the Sandy Bridge PRM, volume 4 part 1, page 72:
*
@@ -463,20 +511,21 @@ surface_validate_gen6_image(const struct ilo_dev *dev,
}
static void
-get_gen6_max_extent(const struct ilo_dev *dev,
- const struct ilo_image *img,
- uint16_t *max_w, uint16_t *max_h)
+surface_get_gen6_image_max_extent(const struct ilo_dev *dev,
+ const struct ilo_state_surface_image_info *info,
+ uint16_t *max_w, uint16_t *max_h)
{
const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
ILO_DEV_ASSERT(dev, 6, 8);
- switch (get_gen6_surface_type(dev, img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
*max_w = max_size;
*max_h = 1;
break;
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
*max_w = max_size;
*max_h = max_size;
break;
@@ -504,7 +553,7 @@ surface_get_gen6_image_extent(const struct ilo_dev *dev,
w = info->img->width0;
h = info->img->height0;
- get_gen6_max_extent(dev, info->img, &max_w, &max_h);
+ surface_get_gen6_image_max_extent(dev, info, &max_w, &max_h);
assert(w && h && w <= max_w && h <= max_h);
*width = w - 1;
@@ -555,16 +604,17 @@ surface_get_gen6_image_slices(const struct ilo_dev *dev,
* layers to (86 * 6), about 512.
*/
- switch (get_gen6_surface_type(dev, info->img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7.5)) ? 2048 : 512;
assert(info->img->array_size <= max_slice);
max_slice = info->img->array_size;
d = info->slice_count;
- if (info->is_cube_map) {
+ if (info->type == GEN6_SURFTYPE_CUBE) {
if (info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
if (!d || d % 6) {
ilo_warn("invalid cube slice count\n");
@@ -877,7 +927,6 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
uint8_t min_lod, mip_count;
enum gen_sample_count sample_count;
uint32_t alignments;
- enum gen_surface_type type;
uint32_t dw0, dw2, dw3, dw4, dw5;
ILO_DEV_ASSERT(dev, 6, 6);
@@ -897,10 +946,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
if (info->img->sample_count > 1)
assert(info->img->interleaved_samples);
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- get_gen6_surface_type(dev, info->img);
-
- dw0 = type << GEN6_SURFACE_DW0_TYPE__SHIFT |
+ dw0 = info->type << GEN6_SURFACE_DW0_TYPE__SHIFT |
info->format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
@@ -927,7 +973,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
* "When TEXCOORDMODE_CLAMP is used when accessing a cube map, this
* field must be programmed to 111111b (all faces enabled)."
*/
- if (info->is_cube_map &&
+ if (info->type == GEN6_SURFTYPE_CUBE &&
info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER) {
dw0 |= GEN6_SURFACE_DW0_CUBE_MAP_CORNER_MODE_AVERAGE |
GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
@@ -956,7 +1002,7 @@ surface_set_gen6_image_SURFACE_STATE(struct ilo_state_surface *surf,
surf->surface[4] = dw4;
surf->surface[5] = dw5;
- surf->type = type;
+ surf->type = info->type;
surf->min_lod = min_lod;
surf->mip_count = mip_count;
@@ -972,7 +1018,6 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
uint8_t min_lod, mip_count;
uint32_t alignments;
enum gen_sample_count sample_count;
- enum gen_surface_type type;
uint32_t dw0, dw1, dw2, dw3, dw4, dw5, dw7;
ILO_DEV_ASSERT(dev, 7, 8);
@@ -986,10 +1031,7 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
!surface_get_gen6_image_alignments(dev, info, &alignments))
return false;
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- get_gen6_surface_type(dev, info->img);
-
- dw0 = type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+ dw0 = info->type << GEN7_SURFACE_DW0_TYPE__SHIFT |
info->format << GEN7_SURFACE_DW0_FORMAT__SHIFT |
alignments;
@@ -1023,7 +1065,7 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
* field must be programmed to 111111b (all faces enabled). This field
* is ignored unless the Surface Type is SURFTYPE_CUBE."
*/
- if (info->is_cube_map &&
+ if (info->type == GEN6_SURFTYPE_CUBE &&
info->access == ILO_STATE_SURFACE_ACCESS_SAMPLER)
dw0 |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
@@ -1087,13 +1129,61 @@ surface_set_gen7_image_SURFACE_STATE(struct ilo_state_surface *surf,
surf->surface[12] = 0;
}
- surf->type = type;
+ surf->type = info->type;
surf->min_lod = min_lod;
surf->mip_count = mip_count;
return true;
}
+uint32_t
+ilo_state_surface_buffer_size(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ uint32_t size, uint32_t *alignment)
+{
+ switch (access) {
+ case ILO_STATE_SURFACE_ACCESS_SAMPLER:
+ /*
+ * From the Sandy Bridge PRM, volume 1 part 1, page 118:
+ *
+ * "For buffers, which have no inherent "height," padding
+ * requirements are different. A buffer must be padded to the next
+ * multiple of 256 array elements, with an additional 16 bytes
+ * added beyond that to account for the L1 cache line."
+ *
+ * Assuming tightly packed GEN6_FORMAT_R32G32B32A32_FLOAT, the size
+ * needs to be padded to 4096 (= 16 * 256).
+ */
+ *alignment = 1;
+ size = align(size, 4096) + 16;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_RENDER:
+ case ILO_STATE_SURFACE_ACCESS_DP_TYPED:
+ /* element-size aligned for worst cases */
+ *alignment = 16;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_UNTYPED:
+ /* DWord aligned? */
+ *alignment = 4;
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_DATA:
+ /* OWord aligned */
+ *alignment = 16;
+ size = align(size, 16);
+ break;
+ case ILO_STATE_SURFACE_ACCESS_DP_SVB:
+ /* always DWord aligned */
+ *alignment = 4;
+ break;
+ default:
+ assert(!"unknown access");
+ *alignment = 1;
+ break;
+ }
+
+ return size;
+}
+
bool
ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
const struct ilo_dev *dev)
@@ -1107,6 +1197,7 @@ ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
else
ret &= surface_set_gen6_null_SURFACE_STATE(surf, dev);
+ surf->vma = NULL;
surf->type = GEN6_SURFTYPE_NULL;
surf->readonly = true;
@@ -1129,6 +1220,7 @@ ilo_state_surface_init_for_buffer(struct ilo_state_surface *surf,
else
ret &= surface_set_gen6_buffer_SURFACE_STATE(surf, dev, info);
+ surf->vma = info->vma;
surf->readonly = info->readonly;
assert(ret);
@@ -1150,6 +1242,9 @@ ilo_state_surface_init_for_image(struct ilo_state_surface *surf,
else
ret &= surface_set_gen6_image_SURFACE_STATE(surf, dev, info);
+ surf->vma = info->vma;
+ surf->aux_vma = info->aux_vma;
+
surf->is_integer = info->is_integer;
surf->readonly = info->readonly;
surf->scanout = info->img->scanout;
diff --git a/src/gallium/drivers/ilo/core/ilo_state_surface.h b/src/gallium/drivers/ilo/core/ilo_state_surface.h
index 9c025428d50..e78c7c97db1 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_surface.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_surface.h
@@ -29,14 +29,10 @@
#define ILO_STATE_SURFACE_H
#include "genhw/genhw.h"
-#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
-struct ilo_buffer;
-struct ilo_image;
-
enum ilo_state_surface_access {
ILO_STATE_SURFACE_ACCESS_SAMPLER, /* sampling engine surfaces */
ILO_STATE_SURFACE_ACCESS_DP_RENDER, /* render target surfaces */
@@ -46,42 +42,51 @@ enum ilo_state_surface_access {
ILO_STATE_SURFACE_ACCESS_DP_SVB,
};
+struct ilo_vma;
+struct ilo_image;
+
struct ilo_state_surface_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
+ uint32_t offset;
+ uint32_t size;
enum ilo_state_surface_access access;
+ /* format_size may be less than, equal to, or greater than struct_size */
enum gen_surface_format format;
uint8_t format_size;
bool readonly;
uint16_t struct_size;
-
- uint32_t offset;
- uint32_t size;
};
struct ilo_state_surface_image_info {
const struct ilo_image *img;
+ uint8_t level_base;
+ uint8_t level_count;
+ uint16_t slice_base;
+ uint16_t slice_count;
+
+ const struct ilo_vma *vma;
+ const struct ilo_vma *aux_vma;
enum ilo_state_surface_access access;
+ enum gen_surface_type type;
+
enum gen_surface_format format;
bool is_integer;
bool readonly;
- bool is_cube_map;
bool is_array;
-
- uint8_t level_base;
- uint8_t level_count;
- uint16_t slice_base;
- uint16_t slice_count;
};
struct ilo_state_surface {
uint32_t surface[13];
+ const struct ilo_vma *vma;
+ const struct ilo_vma *aux_vma;
+
enum gen_surface_type type;
uint8_t min_lod;
uint8_t mip_count;
@@ -89,9 +94,6 @@ struct ilo_state_surface {
bool readonly;
bool scanout;
-
- /* managed by users */
- struct intel_bo *bo;
};
bool
@@ -99,6 +101,11 @@ ilo_state_surface_valid_format(const struct ilo_dev *dev,
enum ilo_state_surface_access access,
enum gen_surface_format format);
+uint32_t
+ilo_state_surface_buffer_size(const struct ilo_dev *dev,
+ enum ilo_state_surface_access access,
+ uint32_t size, uint32_t *alignment);
+
bool
ilo_state_surface_init_for_null(struct ilo_state_surface *surf,
const struct ilo_dev *dev);
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.c b/src/gallium/drivers/ilo/core/ilo_state_vf.c
index ddc75428ed7..9faf835fef2 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_vf.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.c
@@ -26,7 +26,7 @@
*/
#include "ilo_debug.h"
-#include "ilo_buffer.h"
+#include "ilo_vma.h"
#include "ilo_state_vf.h"
static bool
@@ -479,8 +479,8 @@ vertex_buffer_validate_gen6(const struct ilo_dev *dev,
{
ILO_DEV_ASSERT(dev, 6, 8);
- if (info->buf)
- assert(info->offset < info->buf->bo_size && info->size);
+ if (info->vma)
+ assert(info->size && info->offset + info->size <= info->vma->vm_size);
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 86:
@@ -500,6 +500,9 @@ vertex_buffer_validate_gen6(const struct ilo_dev *dev,
* aligned address, and BufferPitch must be a multiple of 64-bits."
*/
if (info->cv_has_double) {
+ if (info->vma)
+ assert(info->vma->vm_alignment % 8 == 0);
+
assert(info->stride % 8 == 0);
assert((info->offset + info->cv_double_vertex_offset_mod_8) % 8 == 0);
}
@@ -512,12 +515,7 @@ vertex_buffer_get_gen6_size(const struct ilo_dev *dev,
const struct ilo_state_vertex_buffer_info *info)
{
ILO_DEV_ASSERT(dev, 6, 8);
-
- if (!info->buf)
- return 0;
-
- return (info->offset + info->size <= info->buf->bo_size) ? info->size :
- info->buf->bo_size - info->offset;
+ return (info->vma) ? info->size : 0;
}
static bool
@@ -537,7 +535,7 @@ vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb,
if (ilo_dev_gen(dev) >= ILO_GEN(7))
dw0 |= GEN7_VB_DW0_ADDR_MODIFIED;
- if (!info->buf)
+ if (!info->vma)
dw0 |= GEN6_VB_DW0_IS_NULL;
STATIC_ASSERT(ARRAY_SIZE(vb->vb) >= 3);
@@ -551,7 +549,7 @@ vertex_buffer_set_gen8_vertex_buffer_state(struct ilo_state_vertex_buffer *vb,
vb->vb[2] = (size) ? info->offset + size - 1 : 0;
}
- vb->need_bo = (info->buf != NULL);
+ vb->vma = info->vma;
return true;
}
@@ -586,8 +584,10 @@ index_buffer_validate_gen6(const struct ilo_dev *dev,
*/
assert(info->offset % format_size == 0);
- if (info->buf)
- assert(info->offset < info->buf->bo_size && info->size);
+ if (info->vma) {
+ assert(info->vma->vm_alignment % format_size == 0);
+ assert(info->size && info->offset + info->size <= info->vma->vm_size);
+ }
return true;
}
@@ -600,12 +600,10 @@ index_buffer_get_gen6_size(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
- if (!info->buf)
+ if (!info->vma)
return 0;
- size = (info->offset + info->size <= info->buf->bo_size) ? info->size :
- info->buf->bo_size - info->offset;
-
+ size = info->size;
if (ilo_dev_gen(dev) < ILO_GEN(8)) {
const uint32_t format_size = get_index_format_size(info->format);
size -= (size % format_size);
@@ -638,7 +636,7 @@ index_buffer_set_gen8_3DSTATE_INDEX_BUFFER(struct ilo_state_index_buffer *ib,
ib->ib[2] = (size) ? info->offset + size - 1 : 0;
}
- ib->need_bo = (info->buf != NULL);
+ ib->vma = info->vma;
return true;
}
@@ -949,6 +947,15 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
}
}
+uint32_t
+ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment)
+{
+ /* align for doubles without padding */
+ *alignment = 8;
+ return size;
+}
+
/**
* No need to initialize first.
*/
@@ -966,6 +973,15 @@ ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
return ret;
}
+uint32_t
+ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment)
+{
+ /* align for the worst case without padding */
+ *alignment = get_index_format_size(GEN6_INDEX_DWORD);
+ return size;
+}
+
/**
* No need to initialize first.
*/
diff --git a/src/gallium/drivers/ilo/core/ilo_state_vf.h b/src/gallium/drivers/ilo/core/ilo_state_vf.h
index f15c63a248a..16b128bf63c 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_vf.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_vf.h
@@ -126,10 +126,10 @@ struct ilo_state_vf_delta {
uint32_t dirty;
};
-struct ilo_buffer;
+struct ilo_vma;
struct ilo_state_vertex_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
uint32_t offset;
uint32_t size;
@@ -143,14 +143,11 @@ struct ilo_state_vertex_buffer_info {
struct ilo_state_vertex_buffer {
uint32_t vb[3];
- bool need_bo;
-
- /* managed by users */
- struct intel_bo *bo;
+ const struct ilo_vma *vma;
};
struct ilo_state_index_buffer_info {
- const struct ilo_buffer *buf;
+ const struct ilo_vma *vma;
uint32_t offset;
uint32_t size;
@@ -160,10 +157,7 @@ struct ilo_state_index_buffer_info {
struct ilo_state_index_buffer {
uint32_t ib[3];
- bool need_bo;
-
- /* managed by users */
- struct intel_bo *bo;
+ const struct ilo_vma *vma;
};
static inline size_t
@@ -215,11 +209,19 @@ ilo_state_vf_get_delta(const struct ilo_state_vf *vf,
const struct ilo_state_vf *old,
struct ilo_state_vf_delta *delta);
+uint32_t
+ilo_state_vertex_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment);
+
bool
ilo_state_vertex_buffer_set_info(struct ilo_state_vertex_buffer *vb,
const struct ilo_dev *dev,
const struct ilo_state_vertex_buffer_info *info);
+uint32_t
+ilo_state_index_buffer_size(const struct ilo_dev *dev, uint32_t size,
+ uint32_t *alignment);
+
bool
ilo_state_index_buffer_set_info(struct ilo_state_index_buffer *ib,
const struct ilo_dev *dev,
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.c b/src/gallium/drivers/ilo/core/ilo_state_zs.c
index 901fedb5599..827632764b2 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.c
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.c
@@ -25,10 +25,9 @@
* Chia-I Wu
*/
-#include "intel_winsys.h"
-
#include "ilo_debug.h"
#include "ilo_image.h"
+#include "ilo_vma.h"
#include "ilo_state_zs.h"
static bool
@@ -56,70 +55,9 @@ zs_set_gen6_null_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
zs->depth[3] = 0;
zs->depth[4] = 0;
- zs->depth_format = format;
-
return true;
}
-static enum gen_surface_type
-get_gen6_surface_type(const struct ilo_dev *dev, const struct ilo_image *img)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- switch (img->target) {
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return GEN6_SURFTYPE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return GEN6_SURFTYPE_2D;
- case PIPE_TEXTURE_3D:
- return GEN6_SURFTYPE_3D;
- default:
- assert(!"unknown texture target");
- return GEN6_SURFTYPE_NULL;
- }
-}
-
-static enum gen_depth_format
-get_gen6_depth_format(const struct ilo_dev *dev, const struct ilo_image *img)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- switch (img->format) {
- case PIPE_FORMAT_Z32_FLOAT:
- return GEN6_ZFORMAT_D32_FLOAT;
- case PIPE_FORMAT_Z24X8_UNORM:
- return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
- case PIPE_FORMAT_Z16_UNORM:
- return GEN6_ZFORMAT_D16_UNORM;
- default:
- assert(!"unknown depth format");
- return GEN6_ZFORMAT_D32_FLOAT;
- }
- } else {
- switch (img->format) {
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
- case PIPE_FORMAT_Z32_FLOAT:
- return GEN6_ZFORMAT_D32_FLOAT;
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- return GEN6_ZFORMAT_D24_UNORM_S8_UINT;
- case PIPE_FORMAT_Z24X8_UNORM:
- return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
- case PIPE_FORMAT_Z16_UNORM:
- return GEN6_ZFORMAT_D16_UNORM;
- default:
- assert(!"unknown depth format");
- return GEN6_ZFORMAT_D32_FLOAT;
- }
- }
-}
-
static bool
zs_validate_gen6(const struct ilo_dev *dev,
const struct ilo_state_zs_info *info)
@@ -128,63 +66,102 @@ zs_validate_gen6(const struct ilo_dev *dev,
ILO_DEV_ASSERT(dev, 6, 8);
+ assert(!info->z_img == !info->z_vma);
+ assert(!info->s_img == !info->s_vma);
+
+ /* all tiled */
+ if (info->z_img) {
+ assert(info->z_img->tiling == GEN6_TILING_Y);
+ assert(info->z_vma->vm_alignment % 4096 == 0);
+ }
+ if (info->s_img) {
+ assert(info->s_img->tiling == GEN8_TILING_W);
+ assert(info->s_vma->vm_alignment % 4096 == 0);
+ }
+ if (info->hiz_vma) {
+ assert(info->z_img &&
+ ilo_image_can_enable_aux(info->z_img, info->level));
+ assert(info->z_vma->vm_alignment % 4096 == 0);
+ }
+
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
- * The stencil buffer has a format of S8_UINT, and shares Surface
+ * "The stencil buffer has a format of S8_UINT, and shares Surface
* Type, Height, Width, and Depth, Minimum Array Element, Render
* Target View Extent, Depth Coordinate Offset X/Y, LOD, and Depth
- * Buffer Object Control State fields of the depth buffer.
+ * Buffer Object Control State fields of the depth buffer."
*/
- if (info->z_img == info->s_img) {
- assert(info->z_img->target == info->s_img->target &&
- info->z_img->width0 == info->s_img->width0 &&
+ if (info->z_img && info->s_img && info->z_img != info->s_img) {
+ assert(info->z_img->type == info->s_img->type &&
info->z_img->height0 == info->s_img->height0 &&
info->z_img->depth0 == info->s_img->depth0);
}
+ if (info->type != img->type) {
+ assert(info->type == GEN6_SURFTYPE_2D &&
+ img->type == GEN6_SURFTYPE_CUBE);
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ switch (info->format) {
+ case GEN6_ZFORMAT_D32_FLOAT:
+ case GEN6_ZFORMAT_D24_UNORM_X8_UINT:
+ case GEN6_ZFORMAT_D16_UNORM:
+ break;
+ default:
+ assert(!"unknown depth format");
+ break;
+ }
+ } else {
+ /*
+ * From the Ironlake PRM, volume 2 part 1, page 330:
+ *
+ * "If this field (Separate Stencil Buffer Enable) is disabled, the
+ * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 321:
+ *
+ * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be
+ * set to the same value (enabled or disabled) as Hierarchical
+ * Depth Buffer Enable."
+ */
+ if (info->hiz_vma)
+ assert(info->format != GEN6_ZFORMAT_D24_UNORM_S8_UINT);
+ else
+ assert(info->format != GEN6_ZFORMAT_D24_UNORM_X8_UINT);
+ }
+
assert(info->level < img->level_count);
assert(img->bo_stride);
- if (info->hiz_enable) {
- assert(info->z_img &&
- ilo_image_can_enable_aux(info->z_img, info->level));
- }
-
- if (info->is_cube_map) {
- assert(get_gen6_surface_type(dev, img) == GEN6_SURFTYPE_2D);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 323:
- *
- * "For cube maps, Width must be set equal to Height."
- */
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 323:
+ *
+ * "For cube maps, Width must be set equal to Height."
+ */
+ if (info->type == GEN6_SURFTYPE_CUBE)
assert(img->width0 == img->height0);
- }
-
- if (info->z_img)
- assert(info->z_img->tiling == GEN6_TILING_Y);
- if (info->s_img)
- assert(info->s_img->tiling == GEN8_TILING_W);
return true;
}
static void
-get_gen6_max_extent(const struct ilo_dev *dev,
- const struct ilo_image *img,
- uint16_t *max_w, uint16_t *max_h)
+zs_get_gen6_max_extent(const struct ilo_dev *dev,
+ const struct ilo_state_zs_info *info,
+ uint16_t *max_w, uint16_t *max_h)
{
const uint16_t max_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
ILO_DEV_ASSERT(dev, 6, 8);
- switch (get_gen6_surface_type(dev, img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
*max_w = max_size;
*max_h = 1;
break;
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
*max_w = max_size;
*max_h = max_size;
break;
@@ -274,7 +251,7 @@ zs_get_gen6_depth_extent(const struct ilo_dev *dev,
w = img->width0;
h = img->height0;
- if (info->hiz_enable) {
+ if (info->hiz_vma) {
uint16_t align_w, align_h;
get_gen6_hiz_alignments(dev, info->z_img, &align_w, &align_h);
@@ -290,7 +267,7 @@ zs_get_gen6_depth_extent(const struct ilo_dev *dev,
h = align(h, align_h);
}
- get_gen6_max_extent(dev, img, &max_w, &max_h);
+ zs_get_gen6_max_extent(dev, info, &max_w, &max_h);
assert(w && h && w <= max_w && h <= max_h);
*width = w - 1;
@@ -319,16 +296,17 @@ zs_get_gen6_depth_slices(const struct ilo_dev *dev,
* surfaces. If the volume texture is MIP-mapped, this field specifies
* the depth of the base MIP level."
*/
- switch (get_gen6_surface_type(dev, img)) {
+ switch (info->type) {
case GEN6_SURFTYPE_1D:
case GEN6_SURFTYPE_2D:
+ case GEN6_SURFTYPE_CUBE:
max_slice = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
assert(img->array_size <= max_slice);
max_slice = img->array_size;
d = info->slice_count;
- if (info->is_cube_map) {
+ if (info->type == GEN6_SURFTYPE_CUBE) {
/*
* Minumum Array Element and Depth must be 0; Render Target View
* Extent is ignored.
@@ -408,8 +386,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
const struct ilo_state_zs_info *info)
{
uint16_t width, height, depth, array_base, view_extent;
- enum gen_surface_type type;
- enum gen_depth_format format;
uint32_t dw1, dw2, dw3, dw4;
ILO_DEV_ASSERT(dev, 6, 6);
@@ -420,37 +396,15 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
&view_extent))
return false;
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- (info->z_img) ? get_gen6_surface_type(dev, info->z_img) :
- get_gen6_surface_type(dev, info->s_img);
-
- format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
- GEN6_ZFORMAT_D32_FLOAT;
-
- /*
- * From the Ironlake PRM, volume 2 part 1, page 330:
- *
- * "If this field (Separate Stencil Buffer Enable) is disabled, the
- * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 321:
- *
- * "[DevSNB]: This field (Separate Stencil Buffer Enable) must be set
- * to the same value (enabled or disabled) as Hierarchical Depth
- * Buffer Enable."
- */
- if (!info->hiz_enable && format == GEN6_ZFORMAT_D24_UNORM_X8_UINT)
- format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
-
/* info->z_readonly and info->s_readonly are ignored on Gen6 */
- dw1 = type << GEN6_DEPTH_DW1_TYPE__SHIFT |
+ dw1 = info->type << GEN6_DEPTH_DW1_TYPE__SHIFT |
GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT |
- format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
+ info->format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
if (info->z_img)
dw1 |= (info->z_img->bo_stride - 1) << GEN6_DEPTH_DW1_PITCH__SHIFT;
- if (info->hiz_enable || !info->z_img) {
+ if (info->hiz_vma || !info->z_img) {
dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
GEN6_DEPTH_DW1_SEPARATE_STENCIL;
}
@@ -471,8 +425,6 @@ zs_set_gen6_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
zs->depth[3] = dw4;
zs->depth[4] = 0;
- zs->depth_format = format;
-
return true;
}
@@ -481,8 +433,6 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
const struct ilo_dev *dev,
const struct ilo_state_zs_info *info)
{
- enum gen_surface_type type;
- enum gen_depth_format format;
uint16_t width, height, depth;
uint16_t array_base, view_extent;
uint32_t dw1, dw2, dw3, dw4, dw6;
@@ -495,20 +445,13 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
&view_extent))
return false;
- type = (info->is_cube_map) ? GEN6_SURFTYPE_CUBE :
- (info->z_img) ? get_gen6_surface_type(dev, info->z_img) :
- get_gen6_surface_type(dev, info->s_img);
-
- format = (info->z_img) ? get_gen6_depth_format(dev, info->z_img) :
- GEN6_ZFORMAT_D32_FLOAT;
-
- dw1 = type << GEN7_DEPTH_DW1_TYPE__SHIFT |
- format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
+ dw1 = info->type << GEN7_DEPTH_DW1_TYPE__SHIFT |
+ info->format << GEN7_DEPTH_DW1_FORMAT__SHIFT;
if (info->z_img) {
if (!info->z_readonly)
dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
- if (info->hiz_enable)
+ if (info->hiz_vma)
dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
dw1 |= (info->z_img->bo_stride - 1) << GEN7_DEPTH_DW1_PITCH__SHIFT;
@@ -539,8 +482,6 @@ zs_set_gen7_3DSTATE_DEPTH_BUFFER(struct ilo_state_zs *zs,
zs->depth[3] = dw4;
zs->depth[4] = dw6;
- zs->depth_format = format;
-
return true;
}
@@ -683,11 +624,15 @@ ilo_state_zs_init(struct ilo_state_zs *zs, const struct ilo_dev *dev,
else
ret &= zs_set_gen6_null_3DSTATE_STENCIL_BUFFER(zs, dev);
- if (info->z_img && info->hiz_enable)
+ if (info->z_img && info->hiz_vma)
ret &= zs_set_gen6_3DSTATE_HIER_DEPTH_BUFFER(zs, dev, info);
else
ret &= zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+ zs->z_vma = info->z_vma;
+ zs->s_vma = info->s_vma;
+ zs->hiz_vma = info->hiz_vma;
+
zs->z_readonly = info->z_readonly;
zs->s_readonly = info->s_readonly;
@@ -703,6 +648,8 @@ ilo_state_zs_init_for_null(struct ilo_state_zs *zs,
struct ilo_state_zs_info info;
memset(&info, 0, sizeof(info));
+ info.type = GEN6_SURFTYPE_NULL;
+ info.format = GEN6_ZFORMAT_D32_FLOAT;
return ilo_state_zs_init(zs, dev, &info);
}
@@ -720,8 +667,11 @@ ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
*/
assert(ilo_dev_gen(dev) >= ILO_GEN(7));
- zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE;
- zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+ if (zs->hiz_vma) {
+ zs->depth[0] &= ~GEN7_DEPTH_DW1_HIZ_ENABLE;
+ zs_set_gen6_null_3DSTATE_HIER_DEPTH_BUFFER(zs, dev);
+ zs->hiz_vma = NULL;
+ }
return true;
}
diff --git a/src/gallium/drivers/ilo/core/ilo_state_zs.h b/src/gallium/drivers/ilo/core/ilo_state_zs.h
index 98212daf74f..6a25a873897 100644
--- a/src/gallium/drivers/ilo/core/ilo_state_zs.h
+++ b/src/gallium/drivers/ilo/core/ilo_state_zs.h
@@ -29,28 +29,31 @@
#define ILO_STATE_ZS_H
#include "genhw/genhw.h"
-#include "intel_winsys.h"
#include "ilo_core.h"
#include "ilo_dev.h"
+struct ilo_vma;
struct ilo_image;
struct ilo_state_zs_info {
- /* both are optional */
+ /* both optional */
const struct ilo_image *z_img;
const struct ilo_image *s_img;
+ uint8_t level;
+ uint16_t slice_base;
+ uint16_t slice_count;
+
+ const struct ilo_vma *z_vma;
+ const struct ilo_vma *s_vma;
+ const struct ilo_vma *hiz_vma;
+
+ enum gen_surface_type type;
+ enum gen_depth_format format;
/* ignored prior to Gen7 */
bool z_readonly;
bool s_readonly;
-
- bool hiz_enable;
- bool is_cube_map;
-
- uint8_t level;
- uint16_t slice_base;
- uint16_t slice_count;
};
struct ilo_state_zs {
@@ -58,16 +61,12 @@ struct ilo_state_zs {
uint32_t stencil[3];
uint32_t hiz[3];
- /* TODO move this to ilo_image */
- enum gen_depth_format depth_format;
+ const struct ilo_vma *z_vma;
+ const struct ilo_vma *s_vma;
+ const struct ilo_vma *hiz_vma;
bool z_readonly;
bool s_readonly;
-
- /* managed by users */
- struct intel_bo *depth_bo;
- struct intel_bo *stencil_bo;
- struct intel_bo *hiz_bo;
};
bool
@@ -83,11 +82,4 @@ bool
ilo_state_zs_disable_hiz(struct ilo_state_zs *zs,
const struct ilo_dev *dev);
-static inline enum gen_depth_format
-ilo_state_zs_get_depth_format(const struct ilo_state_zs *zs,
- const struct ilo_dev *dev)
-{
- return zs->depth_format;
-}
-
#endif /* ILO_STATE_ZS_H */
diff --git a/src/gallium/drivers/ilo/core/ilo_buffer.h b/src/gallium/drivers/ilo/core/ilo_vma.h
similarity index 60%
rename from src/gallium/drivers/ilo/core/ilo_buffer.h
rename to src/gallium/drivers/ilo/core/ilo_vma.h
index ca3c61ff890..ad2a1d4b33e 100644
--- a/src/gallium/drivers/ilo/core/ilo_buffer.h
+++ b/src/gallium/drivers/ilo/core/ilo_vma.h
@@ -1,7 +1,7 @@
/*
* Mesa 3-D graphics library
*
- * Copyright (C) 2012-2013 LunarG, Inc.
+ * Copyright (C) 2015 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -25,40 +25,49 @@
* Chia-I Wu
*/
-#ifndef ILO_BUFFER_H
-#define ILO_BUFFER_H
-
-#include "intel_winsys.h"
+#ifndef ILO_VMA_H
+#define ILO_VMA_H
#include "ilo_core.h"
#include "ilo_debug.h"
#include "ilo_dev.h"
-struct ilo_buffer {
- unsigned bo_size;
+struct intel_bo;
- /* managed by users */
+/**
+ * A virtual memory area.
+ */
+struct ilo_vma {
+ /* address space */
+ uint32_t vm_size;
+ uint32_t vm_alignment;
+
+ /* backing storage */
struct intel_bo *bo;
+ uint32_t bo_offset;
};
-static inline void
-ilo_buffer_init(struct ilo_buffer *buf, const struct ilo_dev *dev,
- unsigned size, uint32_t bind, uint32_t flags)
+static inline bool
+ilo_vma_init(struct ilo_vma *vma, const struct ilo_dev *dev,
+ uint32_t size, uint32_t alignment)
{
- assert(ilo_is_zeroed(buf, sizeof(*buf)));
+ assert(ilo_is_zeroed(vma, sizeof(*vma)));
+ assert(size && alignment);
- buf->bo_size = size;
+ vma->vm_alignment = alignment;
+ vma->vm_size = size;
- /*
- * From the Sandy Bridge PRM, volume 1 part 1, page 118:
- *
- * "For buffers, which have no inherent "height," padding requirements
- * are different. A buffer must be padded to the next multiple of 256
- * array elements, with an additional 16 bytes added beyond that to
- * account for the L1 cache line."
- */
- if (bind & PIPE_BIND_SAMPLER_VIEW)
- buf->bo_size = align(buf->bo_size, 256) + 16;
+ return true;
}
-#endif /* ILO_BUFFER_H */
+static inline void
+ilo_vma_set_bo(struct ilo_vma *vma, const struct ilo_dev *dev,
+ struct intel_bo *bo, uint32_t offset)
+{
+ assert(offset % vma->vm_alignment == 0);
+
+ vma->bo = bo;
+ vma->bo_offset = offset;
+}
+
+#endif /* ILO_VMA_H */
diff --git a/src/gallium/drivers/ilo/ilo_blitter_blt.c b/src/gallium/drivers/ilo/ilo_blitter_blt.c
index d55dc35e360..66203e86137 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_blt.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_blt.c
@@ -127,7 +127,7 @@ ilo_blitter_blt_end(struct ilo_blitter *blitter, uint32_t swctrl)
static bool
buf_clear_region(struct ilo_blitter *blitter,
- struct ilo_buffer *buf, unsigned offset,
+ struct ilo_buffer_resource *buf, unsigned offset,
uint32_t val, unsigned size,
enum gen6_blt_mask value_mask,
enum gen6_blt_mask write_mask)
@@ -140,8 +140,8 @@ buf_clear_region(struct ilo_blitter *blitter,
if (offset % cpp || size % cpp)
return false;
- dst.bo = buf->bo;
- dst.offset = offset;
+ dst.bo = buf->vma.bo;
+ dst.offset = buf->vma.bo_offset + offset;
ilo_blitter_blt_begin(blitter, GEN6_COLOR_BLT__SIZE *
(1 + size / 32764 / gen6_blt_max_scanlines),
@@ -179,25 +179,26 @@ buf_clear_region(struct ilo_blitter *blitter,
static bool
buf_copy_region(struct ilo_blitter *blitter,
- struct ilo_buffer *dst_buf, unsigned dst_offset,
- struct ilo_buffer *src_buf, unsigned src_offset,
+ struct ilo_buffer_resource *dst_buf, unsigned dst_offset,
+ struct ilo_buffer_resource *src_buf, unsigned src_offset,
unsigned size)
{
const uint8_t rop = 0xcc; /* SRCCOPY */
struct ilo_builder *builder = &blitter->ilo->cp->builder;
struct gen6_blt_bo dst, src;
- dst.bo = dst_buf->bo;
- dst.offset = dst_offset;
+ dst.bo = dst_buf->vma.bo;
+ dst.offset = dst_buf->vma.bo_offset + dst_offset;
dst.pitch = 0;
- src.bo = src_buf->bo;
- src.offset = src_offset;
+ src.bo = src_buf->vma.bo;
+ src.offset = src_buf->vma.bo_offset + src_offset;
src.pitch = 0;
ilo_blitter_blt_begin(blitter, GEN6_SRC_COPY_BLT__SIZE *
(1 + size / 32764 / gen6_blt_max_scanlines),
- dst_buf->bo, GEN6_TILING_NONE, src_buf->bo, GEN6_TILING_NONE);
+ dst_buf->vma.bo, GEN6_TILING_NONE,
+ src_buf->vma.bo, GEN6_TILING_NONE);
while (size) {
unsigned width, height;
@@ -258,14 +259,14 @@ tex_clear_region(struct ilo_blitter *blitter,
if (dst_box->width * cpp > gen6_blt_max_bytes_per_scanline)
return false;
- dst.bo = dst_tex->image.bo;
- dst.offset = 0;
+ dst.bo = dst_tex->vma.bo;
+ dst.offset = dst_tex->vma.bo_offset;
dst.pitch = dst_tex->image.bo_stride;
dst.tiling = dst_tex->image.tiling;
swctrl = ilo_blitter_blt_begin(blitter,
GEN6_XY_COLOR_BLT__SIZE * dst_box->depth,
- dst_tex->image.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE);
+ dst_tex->vma.bo, dst_tex->image.tiling, NULL, GEN6_TILING_NONE);
for (slice = 0; slice < dst_box->depth; slice++) {
unsigned x, y;
@@ -299,7 +300,7 @@ tex_copy_region(struct ilo_blitter *blitter,
const struct pipe_box *src_box)
{
const struct util_format_description *desc =
- util_format_description(dst_tex->image.format);
+ util_format_description(dst_tex->image_format);
const unsigned max_extent = 32767; /* INT16_MAX */
const uint8_t rop = 0xcc; /* SRCCOPY */
struct ilo_builder *builder = &blitter->ilo->cp->builder;
@@ -347,13 +348,13 @@ tex_copy_region(struct ilo_blitter *blitter,
break;
}
- dst.bo = dst_tex->image.bo;
- dst.offset = 0;
+ dst.bo = dst_tex->vma.bo;
+ dst.offset = dst_tex->vma.bo_offset;
dst.pitch = dst_tex->image.bo_stride;
dst.tiling = dst_tex->image.tiling;
- src.bo = src_tex->image.bo;
- src.offset = 0;
+ src.bo = src_tex->vma.bo;
+ src.offset = src_tex->vma.bo_offset;
src.pitch = src_tex->image.bo_stride;
src.tiling = src_tex->image.tiling;
@@ -423,8 +424,8 @@ ilo_blitter_blt_copy_resource(struct ilo_blitter *blitter,
src_box->height == 1 &&
src_box->depth == 1);
- success = buf_copy_region(blitter,
- ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size);
+ success = buf_copy_region(blitter, ilo_buffer_resource(dst), dst_offset,
+ ilo_buffer_resource(src), src_offset, size);
}
else if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) {
success = tex_copy_region(blitter,
@@ -488,7 +489,7 @@ ilo_blitter_blt_clear_rt(struct ilo_blitter *blitter,
if (offset + size > end)
size = end - offset;
- success = buf_clear_region(blitter, ilo_buffer(rt->texture),
+ success = buf_clear_region(blitter, ilo_buffer_resource(rt->texture),
offset, packed.ui[0], size, mask, mask);
}
else {
diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
index 13c8f500680..86e67084d6e 100644
--- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
+++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c
@@ -318,7 +318,7 @@ hiz_can_clear_zs(const struct ilo_blitter *blitter,
* The truth is when HiZ is enabled, separate stencil is also enabled on
* all GENs. The depth buffer format cannot be combined depth/stencil.
*/
- switch (tex->image.format) {
+ switch (tex->image_format) {
case PIPE_FORMAT_Z16_UNORM:
if (ilo_dev_gen(blitter->ilo->dev) == ILO_GEN(6) &&
tex->base.width0 % 16)
@@ -355,7 +355,7 @@ ilo_blitter_rectlist_clear_zs(struct ilo_blitter *blitter,
if (ilo_dev_gen(blitter->ilo->dev) >= ILO_GEN(8))
clear_value = fui(depth);
else
- clear_value = util_pack_z(tex->image.format, depth);
+ clear_value = util_pack_z(tex->image_format, depth);
ilo_blit_resolve_surface(blitter->ilo, zs,
ILO_TEXTURE_RENDER_WRITE | ILO_TEXTURE_CLEAR);
diff --git a/src/gallium/drivers/ilo/ilo_common.h b/src/gallium/drivers/ilo/ilo_common.h
index 9ebbf76e81e..3dbe79fb872 100644
--- a/src/gallium/drivers/ilo/ilo_common.h
+++ b/src/gallium/drivers/ilo/ilo_common.h
@@ -28,6 +28,14 @@
#ifndef ILO_COMMON_H
#define ILO_COMMON_H
+#include "pipe/p_format.h"
+#include "pipe/p_defines.h"
+
+#include "util/list.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_pointer.h"
+
#include "core/ilo_core.h"
#include "core/ilo_debug.h"
#include "core/ilo_dev.h"
diff --git a/src/gallium/drivers/ilo/ilo_context.c b/src/gallium/drivers/ilo/ilo_context.c
index 3d5c7b636a8..b9a16aab81d 100644
--- a/src/gallium/drivers/ilo/ilo_context.c
+++ b/src/gallium/drivers/ilo/ilo_context.c
@@ -62,6 +62,8 @@ ilo_flush(struct pipe_context *pipe,
(flags & PIPE_FLUSH_END_OF_FRAME) ? "frame end" : "user request");
if (f) {
+ struct pipe_screen *screen = pipe->screen;
+ screen->fence_reference(screen, f, NULL);
*f = ilo_screen_fence_create(pipe->screen, ilo->cp->last_submitted_bo);
}
}
diff --git a/src/gallium/drivers/ilo/ilo_draw.c b/src/gallium/drivers/ilo/ilo_draw.c
index e8e1a4cd14c..433348d9326 100644
--- a/src/gallium/drivers/ilo/ilo_draw.c
+++ b/src/gallium/drivers/ilo/ilo_draw.c
@@ -444,6 +444,7 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
const struct pipe_draw_info *info)
{
const struct ilo_ib_state *ib = &ilo->state_vector.ib;
+ const struct ilo_vma *vma;
union {
const void *ptr;
const uint8_t *u8;
@@ -453,10 +454,12 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
/* we will draw with IB mapped */
if (ib->state.buffer) {
- u.ptr = intel_bo_map(ilo_buffer(ib->state.buffer)->bo, false);
+ vma = ilo_resource_get_vma(ib->state.buffer);
+ u.ptr = intel_bo_map(vma->bo, false);
if (u.ptr)
- u.u8 += ib->state.offset;
+ u.u8 += vma->bo_offset + ib->state.offset;
} else {
+ vma = NULL;
u.ptr = ib->state.user_buffer;
}
@@ -500,8 +503,8 @@ draw_vbo_with_sw_restart(struct ilo_context *ilo,
#undef DRAW_VBO_WITH_SW_RESTART
- if (ib->state.buffer)
- intel_bo_unmap(ilo_buffer(ib->state.buffer)->bo);
+ if (vma)
+ intel_bo_unmap(vma->bo);
}
static bool
diff --git a/src/gallium/drivers/ilo/ilo_format.h b/src/gallium/drivers/ilo/ilo_format.h
index 4e955c09c14..0a19c02659e 100644
--- a/src/gallium/drivers/ilo/ilo_format.h
+++ b/src/gallium/drivers/ilo/ilo_format.h
@@ -165,4 +165,39 @@ ilo_format_translate_vertex(const struct ilo_dev *dev,
return ilo_format_translate(dev, format, PIPE_BIND_VERTEX_BUFFER);
}
+static inline enum gen_depth_format
+ilo_format_translate_depth(const struct ilo_dev *dev,
+ enum pipe_format format)
+{
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_ZFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_ZFORMAT_D16_UNORM;
+ default:
+ assert(!"unknown depth format");
+ return GEN6_ZFORMAT_D32_FLOAT;
+ }
+ } else {
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_ZFORMAT_D32_FLOAT;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ return GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_ZFORMAT_D24_UNORM_X8_UINT;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_ZFORMAT_D16_UNORM;
+ default:
+ assert(!"unknown depth format");
+ return GEN6_ZFORMAT_D32_FLOAT;
+ }
+ }
+}
+
#endif /* ILO_FORMAT_H */
diff --git a/src/gallium/drivers/ilo/ilo_render_surface.c b/src/gallium/drivers/ilo/ilo_render_surface.c
index ad053564294..3bf8646b344 100644
--- a/src/gallium/drivers/ilo/ilo_render_surface.c
+++ b/src/gallium/drivers/ilo/ilo_render_surface.c
@@ -42,14 +42,17 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder,
const struct pipe_stream_output_info *so_info,
int so_index)
{
- struct ilo_buffer *buf = ilo_buffer(so->buffer);
struct ilo_state_surface_buffer_info info;
struct ilo_state_surface surf;
ILO_DEV_ASSERT(builder->dev, 6, 6);
memset(&info, 0, sizeof(info));
- info.buf = buf;
+
+ info.vma = ilo_resource_get_vma(so->buffer);
+ info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
+ info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4;
+
info.access = ILO_STATE_SURFACE_ACCESS_DP_SVB;
switch (so_info->output[so_index].num_components) {
@@ -78,12 +81,9 @@ gen6_so_SURFACE_STATE(struct ilo_builder *builder,
info.struct_size =
so_info->stride[so_info->output[so_index].output_buffer] * 4;
- info.offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
- info.size = so->buffer_size - so_info->output[so_index].dst_offset * 4;
memset(&surf, 0, sizeof(surf));
ilo_state_surface_init_for_buffer(&surf, builder->dev, &info);
- surf.bo = info.buf->bo;
return gen6_SURFACE_STATE(builder, &surf);
}
@@ -482,18 +482,19 @@ gen6_emit_launch_grid_surface_const(struct ilo_render *r,
return;
memset(&info, 0, sizeof(info));
- info.buf = ilo_buffer(session->input->buffer);
+
+ info.vma = ilo_resource_get_vma(session->input->buffer);
+ info.offset = session->input->buffer_offset;
+ info.size = session->input->buffer_size;
+
info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
info.format = GEN6_FORMAT_RAW;
info.format_size = 1;
info.struct_size = 1;
info.readonly = true;
- info.offset = session->input->buffer_offset;
- info.size = session->input->buffer_size;
memset(&surf, 0, sizeof(surf));
ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
- surf.bo = info.buf->bo;
assert(count == 1 && session->input->buffer);
surface_state[base] = gen6_SURFACE_STATE(r->builder, &surf);
@@ -538,23 +539,23 @@ gen6_emit_launch_grid_surface_global(struct ilo_render *r,
surface_state += base;
for (i = 0; i < count; i++) {
if (i < vec->global_binding.count && bindings[i].resource) {
- const struct ilo_buffer *buf = ilo_buffer(bindings[i].resource);
struct ilo_state_surface_buffer_info info;
struct ilo_state_surface surf;
assert(bindings[i].resource->target == PIPE_BUFFER);
memset(&info, 0, sizeof(info));
- info.buf = buf;
+
+ info.vma = ilo_resource_get_vma(bindings[i].resource);
+ info.size = info.vma->vm_size;
+
info.access = ILO_STATE_SURFACE_ACCESS_DP_UNTYPED;
info.format = GEN6_FORMAT_RAW;
info.format_size = 1;
info.struct_size = 1;
- info.size = buf->bo_size;
memset(&surf, 0, sizeof(surf));
ilo_state_surface_init_for_buffer(&surf, r->dev, &info);
- surf.bo = info.buf->bo;
surface_state[i] = gen6_SURFACE_STATE(r->builder, &surf);
} else {
diff --git a/src/gallium/drivers/ilo/ilo_resource.c b/src/gallium/drivers/ilo/ilo_resource.c
index be9fd10a84c..9026ba9a983 100644
--- a/src/gallium/drivers/ilo/ilo_resource.c
+++ b/src/gallium/drivers/ilo/ilo_resource.c
@@ -25,7 +25,12 @@
* Chia-I Wu
*/
+#include "core/ilo_state_vf.h"
+#include "core/ilo_state_sol.h"
+#include "core/ilo_state_surface.h"
+
#include "ilo_screen.h"
+#include "ilo_format.h"
#include "ilo_resource.h"
/*
@@ -83,6 +88,134 @@ resource_get_cpu_init(const struct pipe_resource *templ)
PIPE_BIND_STREAM_OUTPUT)) ? false : true;
}
+static enum gen_surface_type
+get_surface_type(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return GEN6_SURFTYPE_1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return GEN6_SURFTYPE_2D;
+ case PIPE_TEXTURE_3D:
+ return GEN6_SURFTYPE_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return GEN6_SURFTYPE_CUBE;
+ default:
+ assert(!"unknown texture target");
+ return GEN6_SURFTYPE_NULL;
+ }
+}
+
+static enum pipe_format
+resource_get_image_format(const struct pipe_resource *templ,
+ const struct ilo_dev *dev,
+ bool *separate_stencil_ret)
+{
+ enum pipe_format format = templ->format;
+ bool separate_stencil;
+
+ /* silently promote ETC1 */
+ if (templ->format == PIPE_FORMAT_ETC1_RGB8)
+ format = PIPE_FORMAT_R8G8B8X8_UNORM;
+
+ /* separate stencil buffers */
+ separate_stencil = false;
+ if ((templ->bind & PIPE_BIND_DEPTH_STENCIL) &&
+ util_format_is_depth_and_stencil(templ->format)) {
+ switch (templ->format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ /* Gen6 requires HiZ to be available for all levels */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7) || templ->last_level == 0) {
+ format = PIPE_FORMAT_Z32_FLOAT;
+ separate_stencil = true;
+ }
+ break;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ format = PIPE_FORMAT_Z24X8_UNORM;
+ separate_stencil = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (separate_stencil_ret)
+ *separate_stencil_ret = separate_stencil;
+
+ return format;
+}
+
+static inline enum gen_surface_format
+pipe_to_surface_format(const struct ilo_dev *dev, enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ return GEN6_FORMAT_R32_FLOAT_X8X24_TYPELESS;
+ case PIPE_FORMAT_Z32_FLOAT:
+ return GEN6_FORMAT_R32_FLOAT;
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ return GEN6_FORMAT_R24_UNORM_X8_TYPELESS;
+ case PIPE_FORMAT_Z16_UNORM:
+ return GEN6_FORMAT_R16_UNORM;
+ case PIPE_FORMAT_S8_UINT:
+ return GEN6_FORMAT_R8_UINT;
+ default:
+ return ilo_format_translate_color(dev, format);
+ }
+}
+
+static void
+resource_get_image_info(const struct pipe_resource *templ,
+ const struct ilo_dev *dev,
+ enum pipe_format image_format,
+ struct ilo_image_info *info)
+{
+ memset(info, 0, sizeof(*info));
+
+ info->type = get_surface_type(templ->target);
+
+ info->format = pipe_to_surface_format(dev, image_format);
+ info->interleaved_stencil = util_format_is_depth_and_stencil(image_format);
+ info->is_integer = util_format_is_pure_integer(image_format);
+ info->compressed = util_format_is_compressed(image_format);
+ info->block_width = util_format_get_blockwidth(image_format);
+ info->block_height = util_format_get_blockheight(image_format);
+ info->block_size = util_format_get_blocksize(image_format);
+
+ info->width = templ->width0;
+ info->height = templ->height0;
+ info->depth = templ->depth0;
+ info->array_size = templ->array_size;
+ info->level_count = templ->last_level + 1;
+ info->sample_count = (templ->nr_samples) ? templ->nr_samples : 1;
+
+ info->aux_disable = (templ->usage == PIPE_USAGE_STAGING);
+
+ if (templ->bind & PIPE_BIND_LINEAR)
+ info->valid_tilings = 1 << GEN6_TILING_NONE;
+
+ /*
+ * Tiled images must be mapped via GTT to get a linear view. Prefer linear
+ * images when the image size is greater than one-fourth of the mappable
+ * aperture.
+ */
+ if (templ->bind & (PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_TRANSFER_READ))
+ info->prefer_linear_threshold = dev->aperture_mappable / 4;
+
+ info->bind_surface_sampler = (templ->bind & PIPE_BIND_SAMPLER_VIEW);
+ info->bind_surface_dp_render = (templ->bind & PIPE_BIND_RENDER_TARGET);
+ info->bind_surface_dp_typed = (templ->bind &
+ (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE));
+ info->bind_zs = (templ->bind & PIPE_BIND_DEPTH_STENCIL);
+ info->bind_scanout = (templ->bind & PIPE_BIND_SCANOUT);
+ info->bind_cursor = (templ->bind & PIPE_BIND_CURSOR);
+}
+
static enum gen_surface_tiling
winsys_to_surface_tiling(enum intel_tiling_mode tiling)
{
@@ -178,8 +311,8 @@ tex_create_bo(struct ilo_texture *tex)
if (!bo)
return false;
- intel_bo_unref(tex->image.bo);
- tex->image.bo = bo;
+ intel_bo_unref(tex->vma.bo);
+ ilo_vma_set_bo(&tex->vma, &is->dev, bo, 0);
return true;
}
@@ -206,7 +339,7 @@ tex_create_separate_stencil(struct ilo_texture *tex)
tex->separate_s8 = ilo_texture(s8);
- assert(tex->separate_s8->image.format == PIPE_FORMAT_S8_UINT);
+ assert(tex->separate_s8->image_format == PIPE_FORMAT_S8_UINT);
return true;
}
@@ -215,15 +348,16 @@ static bool
tex_create_hiz(struct ilo_texture *tex)
{
const struct pipe_resource *templ = &tex->base;
+ const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height;
struct ilo_screen *is = ilo_screen(tex->base.screen);
struct intel_bo *bo;
- bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture",
- tex->image.aux.bo_stride * tex->image.aux.bo_height, false);
+ bo = intel_winsys_alloc_bo(is->dev.winsys, "hiz texture", size, false);
if (!bo)
return false;
- tex->image.aux.bo = bo;
+ ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096);
+ ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0);
if (tex->imported) {
unsigned lv;
@@ -246,17 +380,18 @@ tex_create_hiz(struct ilo_texture *tex)
static bool
tex_create_mcs(struct ilo_texture *tex)
{
+ const uint32_t size = tex->image.aux.bo_stride * tex->image.aux.bo_height;
struct ilo_screen *is = ilo_screen(tex->base.screen);
struct intel_bo *bo;
assert(tex->image.aux.enables == (1 << (tex->base.last_level + 1)) - 1);
- bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture",
- tex->image.aux.bo_stride * tex->image.aux.bo_height, false);
+ bo = intel_winsys_alloc_bo(is->dev.winsys, "mcs texture", size, false);
if (!bo)
return false;
- tex->image.aux.bo = bo;
+ ilo_vma_init(&tex->aux_vma, &is->dev, size, 4096);
+ ilo_vma_set_bo(&tex->aux_vma, &is->dev, bo, 0);
return true;
}
@@ -267,8 +402,8 @@ tex_destroy(struct ilo_texture *tex)
if (tex->separate_s8)
tex_destroy(tex->separate_s8);
- intel_bo_unref(tex->image.bo);
- intel_bo_unref(tex->image.aux.bo);
+ intel_bo_unref(tex->vma.bo);
+ intel_bo_unref(tex->aux_vma.bo);
tex_free_slices(tex);
FREE(tex);
@@ -277,24 +412,16 @@ tex_destroy(struct ilo_texture *tex)
static bool
tex_alloc_bos(struct ilo_texture *tex)
{
- struct ilo_screen *is = ilo_screen(tex->base.screen);
-
if (!tex->imported && !tex_create_bo(tex))
return false;
- /* allocate separate stencil resource */
- if (tex->image.separate_stencil && !tex_create_separate_stencil(tex))
- return false;
-
switch (tex->image.aux.type) {
case ILO_IMAGE_AUX_HIZ:
- if (!tex_create_hiz(tex) &&
- !ilo_image_disable_aux(&tex->image, &is->dev))
+ if (!tex_create_hiz(tex))
return false;
break;
case ILO_IMAGE_AUX_MCS:
- if (!tex_create_mcs(tex) &&
- !ilo_image_disable_aux(&tex->image, &is->dev))
+ if (!tex_create_mcs(tex))
return false;
break;
default:
@@ -304,9 +431,10 @@ tex_alloc_bos(struct ilo_texture *tex)
return true;
}
-static bool
+static struct intel_bo *
tex_import_handle(struct ilo_texture *tex,
- const struct winsys_handle *handle)
+ const struct winsys_handle *handle,
+ struct ilo_image_info *info)
{
struct ilo_screen *is = ilo_screen(tex->base.screen);
const struct pipe_resource *templ = &tex->base;
@@ -317,45 +445,94 @@ tex_import_handle(struct ilo_texture *tex,
bo = intel_winsys_import_handle(is->dev.winsys, name, handle,
tex->image.bo_height, &tiling, &pitch);
- if (!bo)
- return false;
+ /* modify image info */
+ if (bo) {
+ const uint8_t valid_tilings = 1 << winsys_to_surface_tiling(tiling);
- if (!ilo_image_init_for_imported(&tex->image, &is->dev, templ,
- winsys_to_surface_tiling(tiling), pitch)) {
- ilo_err("failed to import handle for texture\n");
- intel_bo_unref(bo);
- return false;
+ if (info->valid_tilings && !(info->valid_tilings & valid_tilings)) {
+ intel_bo_unref(bo);
+ return NULL;
+ }
+
+ info->valid_tilings = valid_tilings;
+ info->force_bo_stride = pitch;
+
+ /* assume imported RTs are also scanouts */
+ if (!info->bind_scanout)
+ info->bind_scanout = (templ->usage & PIPE_BIND_RENDER_TARGET);
}
- tex->image.bo = bo;
-
- tex->imported = true;
-
- return true;
+ return bo;
}
static bool
tex_init_image(struct ilo_texture *tex,
- const struct winsys_handle *handle)
+ const struct winsys_handle *handle,
+ bool *separate_stencil)
{
struct ilo_screen *is = ilo_screen(tex->base.screen);
const struct pipe_resource *templ = &tex->base;
struct ilo_image *img = &tex->image;
+ struct intel_bo *imported_bo = NULL;;
+ struct ilo_image_info info;
+
+ tex->image_format = resource_get_image_format(templ,
+ &is->dev, separate_stencil);
+ resource_get_image_info(templ, &is->dev, tex->image_format, &info);
if (handle) {
- if (!tex_import_handle(tex, handle))
+ imported_bo = tex_import_handle(tex, handle, &info);
+ if (!imported_bo)
return false;
- } else {
- ilo_image_init(img, &is->dev, templ);
}
- if (img->bo_height > ilo_max_resource_size / img->bo_stride)
+ if (!ilo_image_init(img, &is->dev, &info)) {
+ intel_bo_unref(imported_bo);
return false;
+ }
+
+ /*
+ * HiZ requires 8x4 alignment and some levels might need HiZ disabled. It
+ * is generally fine except on Gen6, where HiZ and separate stencil must be
+ * enabled together. For PIPE_FORMAT_Z24X8_UNORM with separate stencil, we
+ * can live with stencil values being interleaved for levels where HiZ is
+ * disabled. But it is not the case for PIPE_FORMAT_Z32_FLOAT with
+ * separate stencil. If HiZ was disabled for a level, we had to change the
+ * format to PIPE_FORMAT_Z32_FLOAT_S8X24_UINT for the level and that format
+ * had a different bpp. In other words, HiZ has to be available for all
+ * levels.
+ */
+ if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
+ templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ tex->image_format == PIPE_FORMAT_Z32_FLOAT &&
+ img->aux.enables != (1 << templ->last_level)) {
+ tex->image_format = templ->format;
+ info.format = pipe_to_surface_format(&is->dev, tex->image_format);
+ info.interleaved_stencil = true;
+
+ memset(img, 0, sizeof(*img));
+ if (!ilo_image_init(img, &is->dev, &info)) {
+ intel_bo_unref(imported_bo);
+ return false;
+ }
+ }
+
+ if (img->bo_height > ilo_max_resource_size / img->bo_stride ||
+ !ilo_vma_init(&tex->vma, &is->dev, img->bo_stride * img->bo_height,
+ 4096)) {
+ intel_bo_unref(imported_bo);
+ return false;
+ }
+
+ if (imported_bo) {
+ ilo_vma_set_bo(&tex->vma, &is->dev, imported_bo, 0);
+ tex->imported = true;
+ }
if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
/* require on-the-fly tiling/untiling or format conversion */
- if (img->tiling == GEN8_TILING_W || img->separate_stencil ||
- img->format != templ->format)
+ if (img->tiling == GEN8_TILING_W || *separate_stencil ||
+ tex->image_format != templ->format)
return false;
}
@@ -371,6 +548,7 @@ tex_create(struct pipe_screen *screen,
const struct winsys_handle *handle)
{
struct ilo_texture *tex;
+ bool separate_stencil;
tex = CALLOC_STRUCT(ilo_texture);
if (!tex)
@@ -380,12 +558,13 @@ tex_create(struct pipe_screen *screen,
tex->base.screen = screen;
pipe_reference_init(&tex->base.reference, 1);
- if (!tex_init_image(tex, handle)) {
+ if (!tex_init_image(tex, handle, &separate_stencil)) {
FREE(tex);
return NULL;
}
- if (!tex_alloc_bos(tex)) {
+ if (!tex_alloc_bos(tex) ||
+ (separate_stencil && !tex_create_separate_stencil(tex))) {
tex_destroy(tex);
return NULL;
}
@@ -406,7 +585,7 @@ tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle)
else
tiling = surface_to_winsys_tiling(tex->image.tiling);
- err = intel_winsys_export_handle(is->dev.winsys, tex->image.bo, tiling,
+ err = intel_winsys_export_handle(is->dev.winsys, tex->vma.bo, tiling,
tex->image.bo_stride, tex->image.bo_height, handle);
return !err;
@@ -420,13 +599,12 @@ buf_create_bo(struct ilo_buffer_resource *buf)
const bool cpu_init = resource_get_cpu_init(&buf->base);
struct intel_bo *bo;
- bo = intel_winsys_alloc_bo(is->dev.winsys, name,
- buf->buffer.bo_size, cpu_init);
+ bo = intel_winsys_alloc_bo(is->dev.winsys, name, buf->bo_size, cpu_init);
if (!bo)
return false;
- intel_bo_unref(buf->buffer.bo);
- buf->buffer.bo = bo;
+ intel_bo_unref(buf->vma.bo);
+ ilo_vma_set_bo(&buf->vma, &is->dev, bo, 0);
return true;
}
@@ -434,7 +612,7 @@ buf_create_bo(struct ilo_buffer_resource *buf)
static void
buf_destroy(struct ilo_buffer_resource *buf)
{
- intel_bo_unref(buf->buffer.bo);
+ intel_bo_unref(buf->vma.bo);
FREE(buf);
}
@@ -443,6 +621,7 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
{
const struct ilo_screen *is = ilo_screen(screen);
struct ilo_buffer_resource *buf;
+ uint32_t alignment;
unsigned size;
buf = CALLOC_STRUCT(ilo_buffer_resource);
@@ -471,10 +650,17 @@ buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
ilo_dev_gen(&is->dev) < ILO_GEN(7.5))
size = align(size, 4096);
- ilo_buffer_init(&buf->buffer, &is->dev, size, templ->bind, templ->flags);
+ if (templ->bind & PIPE_BIND_VERTEX_BUFFER)
+ size = ilo_state_vertex_buffer_size(&is->dev, size, &alignment);
+ if (templ->bind & PIPE_BIND_INDEX_BUFFER)
+ size = ilo_state_index_buffer_size(&is->dev, size, &alignment);
+ if (templ->bind & PIPE_BIND_STREAM_OUTPUT)
+ size = ilo_state_sol_buffer_size(&is->dev, size, &alignment);
- if (buf->buffer.bo_size < templ->width0 ||
- buf->buffer.bo_size > ilo_max_resource_size ||
+ buf->bo_size = size;
+ ilo_vma_init(&buf->vma, &is->dev, buf->bo_size, 4096);
+
+ if (buf->bo_size < templ->width0 || buf->bo_size > ilo_max_resource_size ||
!buf_create_bo(buf)) {
FREE(buf);
return NULL;
@@ -487,13 +673,30 @@ static boolean
ilo_can_create_resource(struct pipe_screen *screen,
const struct pipe_resource *templ)
{
+ struct ilo_screen *is = ilo_screen(screen);
+ enum pipe_format image_format;
+ struct ilo_image_info info;
struct ilo_image img;
if (templ->target == PIPE_BUFFER)
return (templ->width0 <= ilo_max_resource_size);
+ image_format = resource_get_image_format(templ, &is->dev, NULL);
+ resource_get_image_info(templ, &is->dev, image_format, &info);
+
memset(&img, 0, sizeof(img));
- ilo_image_init(&img, &ilo_screen(screen)->dev, templ);
+ ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
+
+ /* as in tex_init_image() */
+ if (ilo_dev_gen(&is->dev) == ILO_GEN(6) &&
+ templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ image_format == PIPE_FORMAT_Z32_FLOAT &&
+ img.aux.enables != (1 << templ->last_level)) {
+ info.format = pipe_to_surface_format(&is->dev, templ->format);
+ info.interleaved_stencil = true;
+ memset(&img, 0, sizeof(img));
+ ilo_image_init(&img, &ilo_screen(screen)->dev, &info);
+ }
return (img.bo_height <= ilo_max_resource_size / img.bo_stride);
}
diff --git a/src/gallium/drivers/ilo/ilo_resource.h b/src/gallium/drivers/ilo/ilo_resource.h
index d602e0cbf70..8378af54741 100644
--- a/src/gallium/drivers/ilo/ilo_resource.h
+++ b/src/gallium/drivers/ilo/ilo_resource.h
@@ -29,8 +29,8 @@
#define ILO_RESOURCE_H
#include "core/intel_winsys.h"
-#include "core/ilo_buffer.h"
#include "core/ilo_image.h"
+#include "core/ilo_vma.h"
#include "ilo_common.h"
#include "ilo_screen.h"
@@ -92,7 +92,10 @@ struct ilo_texture {
bool imported;
+ enum pipe_format image_format;
struct ilo_image image;
+ struct ilo_vma vma;
+ struct ilo_vma aux_vma;
/* XXX thread-safety */
struct ilo_texture_slice *slices[PIPE_MAX_TEXTURE_LEVELS];
@@ -103,14 +106,15 @@ struct ilo_texture {
struct ilo_buffer_resource {
struct pipe_resource base;
- struct ilo_buffer buffer;
+ uint32_t bo_size;
+ struct ilo_vma vma;
};
-static inline struct ilo_buffer *
-ilo_buffer(struct pipe_resource *res)
+static inline struct ilo_buffer_resource *
+ilo_buffer_resource(struct pipe_resource *res)
{
- return (res && res->target == PIPE_BUFFER) ?
- &((struct ilo_buffer_resource *) res)->buffer : NULL;
+ return (struct ilo_buffer_resource *)
+ ((res && res->target == PIPE_BUFFER) ? res : NULL);
}
static inline struct ilo_texture *
@@ -127,13 +131,14 @@ bool
ilo_resource_rename_bo(struct pipe_resource *res);
/**
- * Return the bo of the resource.
+ * Return the VMA of the resource.
*/
-static inline struct intel_bo *
-ilo_resource_get_bo(struct pipe_resource *res)
+static inline const struct ilo_vma *
+ilo_resource_get_vma(struct pipe_resource *res)
{
return (res->target == PIPE_BUFFER) ?
- ilo_buffer(res)->bo : ilo_texture(res)->image.bo;
+ &((struct ilo_buffer_resource *) res)->vma :
+ &((struct ilo_texture *) res)->vma;
}
static inline struct ilo_texture_slice *
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 94105559b80..ab4d1377c9f 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -193,6 +193,7 @@ ilo_get_compute_param(struct pipe_screen *screen,
uint32_t max_clock_frequency;
uint32_t max_compute_units;
uint32_t images_supported;
+ uint32_t subgroup_size;
} val;
const void *ptr;
int size;
@@ -284,6 +285,13 @@ ilo_get_compute_param(struct pipe_screen *screen,
ptr = &val.images_supported;
size = sizeof(val.images_supported);
break;
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ /* best case is actually SIMD32 */
+ val.subgroup_size = 16;
+
+ ptr = &val.subgroup_size;
+ size = sizeof(val.subgroup_size);
+ break;
default:
ptr = NULL;
size = 0;
@@ -443,6 +451,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_TEXTURE_GATHER_SM5:
return 0;
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return true;
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_QUERY_LOD:
@@ -457,6 +467,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -665,13 +677,6 @@ ilo_screen_fence_finish(struct pipe_screen *screen,
return signaled;
}
-static boolean
-ilo_screen_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- return ilo_screen_fence_finish(screen, fence, 0);
-}
-
/**
* Create a fence for \p bo. When \p bo is not NULL, it must be submitted
* before waited on or checked.
@@ -738,7 +743,6 @@ ilo_screen_create(struct intel_winsys *ws)
is->base.flush_frontbuffer = NULL;
is->base.fence_reference = ilo_screen_fence_reference;
- is->base.fence_signalled = ilo_screen_fence_signalled;
is->base.fence_finish = ilo_screen_fence_finish;
is->base.get_driver_query_info = NULL;
diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c
index 63534f33fa7..d89765a9d23 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -379,13 +379,12 @@ finalize_cbuf_state(struct ilo_context *ilo,
u_upload_data(ilo->uploader, 0, cbuf->cso[i].info.size,
cbuf->cso[i].user_buffer, &offset, &cbuf->cso[i].resource);
- cbuf->cso[i].info.buf = ilo_buffer(cbuf->cso[i].resource);
+ cbuf->cso[i].info.vma = ilo_resource_get_vma(cbuf->cso[i].resource);
cbuf->cso[i].info.offset = offset;
memset(&cbuf->cso[i].surface, 0, sizeof(cbuf->cso[i].surface));
ilo_state_surface_init_for_buffer(&cbuf->cso[i].surface,
ilo->dev, &cbuf->cso[i].info);
- cbuf->cso[i].surface.bo = cbuf->cso[i].info.buf->bo;
ilo->state_vector.dirty |= ILO_DIRTY_CBUF;
}
@@ -466,11 +465,9 @@ finalize_index_buffer(struct ilo_context *ilo)
memset(&info, 0, sizeof(info));
if (vec->ib.hw_resource) {
- info.buf = ilo_buffer(vec->ib.hw_resource);
- info.size = info.buf->bo_size;
+ info.vma = ilo_resource_get_vma(vec->ib.hw_resource);
+ info.size = info.vma->vm_size;
info.format = ilo_translate_index_size(vec->ib.hw_index_size);
-
- vec->ib.ib.bo = info.buf->bo;
}
ilo_state_index_buffer_set_info(&vec->ib.ib, dev, &info);
@@ -532,13 +529,11 @@ finalize_vertex_buffers(struct ilo_context *ilo)
const struct pipe_vertex_buffer *cso = &vec->vb.states[pipe_idx];
if (cso->buffer) {
- info.buf = ilo_buffer(cso->buffer);
+ info.vma = ilo_resource_get_vma(cso->buffer);
info.offset = cso->buffer_offset;
- info.size = info.buf->bo_size;
+ info.size = info.vma->vm_size - cso->buffer_offset;
info.stride = cso->stride;
-
- vec->vb.vb[i].bo = info.buf->bo;
} else {
memset(&info, 0, sizeof(info));
}
@@ -1566,24 +1561,23 @@ ilo_set_constant_buffer(struct pipe_context *pipe,
cso->info.size = buf[i].buffer_size;
if (buf[i].buffer) {
- cso->info.buf = ilo_buffer(buf[i].buffer);
+ cso->info.vma = ilo_resource_get_vma(buf[i].buffer);
cso->info.offset = buf[i].buffer_offset;
memset(&cso->surface, 0, sizeof(cso->surface));
ilo_state_surface_init_for_buffer(&cso->surface, dev, &cso->info);
- cso->surface.bo = cso->info.buf->bo;
cso->user_buffer = NULL;
cbuf->enabled_mask |= 1 << (index + i);
} else if (buf[i].user_buffer) {
- cso->info.buf = NULL;
+ cso->info.vma = NULL;
/* buffer_offset does not apply for user buffer */
cso->user_buffer = buf[i].user_buffer;
cbuf->enabled_mask |= 1 << (index + i);
} else {
- cso->info.buf = NULL;
+ cso->info.vma = NULL;
cso->info.size = 0;
cso->user_buffer = NULL;
@@ -1596,7 +1590,7 @@ ilo_set_constant_buffer(struct pipe_context *pipe,
pipe_resource_reference(&cso->resource, NULL);
- cso->info.buf = NULL;
+ cso->info.vma = NULL;
cso->info.size = 0;
cso->user_buffer = NULL;
@@ -1705,10 +1699,11 @@ ilo_set_framebuffer_state(struct pipe_context *pipe,
if (state->zsbuf) {
const struct ilo_surface_cso *cso =
(const struct ilo_surface_cso *) state->zsbuf;
+ const struct ilo_texture *tex = ilo_texture(cso->base.texture);
- fb->has_hiz = cso->u.zs.hiz_bo;
+ fb->has_hiz = cso->u.zs.hiz_vma;
fb->depth_offset_format =
- ilo_state_zs_get_depth_format(&cso->u.zs, dev);
+ ilo_format_translate_depth(dev, tex->image_format);
} else {
fb->has_hiz = false;
fb->depth_offset_format = GEN6_ZFORMAT_D32_FLOAT;
@@ -1854,10 +1849,11 @@ ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader,
}
static void
-ilo_set_shader_resources(struct pipe_context *pipe,
- unsigned start, unsigned count,
- struct pipe_surface **surfaces)
+ilo_set_shader_images(struct pipe_context *pipe, unsigned shader,
+ unsigned start, unsigned count,
+ struct pipe_image_view **views)
{
+#if 0
struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector;
struct ilo_resource_state *dst = &vec->resource;
unsigned i;
@@ -1886,6 +1882,7 @@ ilo_set_shader_resources(struct pipe_context *pipe,
}
vec->dirty |= ILO_DIRTY_RESOURCE;
+#endif
}
static void
@@ -1945,12 +1942,11 @@ ilo_create_stream_output_target(struct pipe_context *pipe,
target->base.buffer_size = buffer_size;
memset(&info, 0, sizeof(info));
- info.buf = ilo_buffer(res);
+ info.vma = ilo_resource_get_vma(res);
info.offset = buffer_offset;
info.size = buffer_size;
ilo_state_sol_buffer_init(&target->sb, dev, &info);
- target->sb.bo = info.buf->bo;
return &target->base;
}
@@ -2018,18 +2014,17 @@ ilo_create_sampler_view(struct pipe_context *pipe,
struct ilo_state_surface_buffer_info info;
memset(&info, 0, sizeof(info));
- info.buf = ilo_buffer(res);
+ info.vma = ilo_resource_get_vma(res);
+ info.offset = templ->u.buf.first_element * info.struct_size;
+ info.size = (templ->u.buf.last_element -
+ templ->u.buf.first_element + 1) * info.struct_size;
info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
info.format = ilo_format_translate_color(dev, templ->format);
info.format_size = util_format_get_blocksize(templ->format);
info.struct_size = info.format_size;
info.readonly = true;
- info.offset = templ->u.buf.first_element * info.struct_size;
- info.size = (templ->u.buf.last_element -
- templ->u.buf.first_element + 1) * info.struct_size;
ilo_state_surface_init_for_buffer(&view->surface, dev, &info);
- view->surface.bo = info.buf->bo;
} else {
struct ilo_texture *tex = ilo_texture(res);
struct ilo_state_surface_image_info info;
@@ -2042,23 +2037,8 @@ ilo_create_sampler_view(struct pipe_context *pipe,
}
memset(&info, 0, sizeof(info));
+
info.img = &tex->image;
-
- info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
-
- if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
- tex->image.separate_stencil) {
- info.format = ilo_format_translate_texture(dev,
- PIPE_FORMAT_Z32_FLOAT);
- } else {
- info.format = ilo_format_translate_texture(dev, templ->format);
- }
-
- info.is_cube_map = (tex->image.target == PIPE_TEXTURE_CUBE ||
- tex->image.target == PIPE_TEXTURE_CUBE_ARRAY);
- info.is_array = util_resource_is_array_texture(&tex->base);
- info.readonly = true;
-
info.level_base = templ->u.tex.first_level;
info.level_count = templ->u.tex.last_level -
templ->u.tex.first_level + 1;
@@ -2066,8 +2046,22 @@ ilo_create_sampler_view(struct pipe_context *pipe,
info.slice_count = templ->u.tex.last_layer -
templ->u.tex.first_layer + 1;
+ info.vma = &tex->vma;
+ info.access = ILO_STATE_SURFACE_ACCESS_SAMPLER;
+ info.type = tex->image.type;
+
+ if (templ->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT &&
+ tex->separate_s8) {
+ info.format = ilo_format_translate_texture(dev,
+ PIPE_FORMAT_Z32_FLOAT);
+ } else {
+ info.format = ilo_format_translate_texture(dev, templ->format);
+ }
+
+ info.is_array = util_resource_is_array_texture(&tex->base);
+ info.readonly = true;
+
ilo_state_surface_init_for_image(&view->surface, dev, &info);
- view->surface.bo = info.img->bo;
}
return &view->base;
@@ -2111,18 +2105,27 @@ ilo_create_surface(struct pipe_context *pipe,
assert(tex->base.target != PIPE_BUFFER);
memset(&info, 0, sizeof(info));
+
info.img = &tex->image;
- info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER;
- info.format = ilo_format_translate_render(dev, templ->format);
- info.is_array = util_resource_is_array_texture(&tex->base);
info.level_base = templ->u.tex.level;
info.level_count = 1;
info.slice_base = templ->u.tex.first_layer;
info.slice_count = templ->u.tex.last_layer -
templ->u.tex.first_layer + 1;
+ info.vma = &tex->vma;
+ if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level))
+ info.aux_vma = &tex->aux_vma;
+
+ info.access = ILO_STATE_SURFACE_ACCESS_DP_RENDER;
+
+ info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ?
+ GEN6_SURFTYPE_2D : tex->image.type;
+
+ info.format = ilo_format_translate_render(dev, templ->format);
+ info.is_array = util_resource_is_array_texture(&tex->base);
+
ilo_state_surface_init_for_image(&surf->u.rt, dev, &info);
- surf->u.rt.bo = info.img->bo;
} else {
struct ilo_state_zs_info info;
@@ -2131,13 +2134,19 @@ ilo_create_surface(struct pipe_context *pipe,
memset(&info, 0, sizeof(info));
if (templ->format == PIPE_FORMAT_S8_UINT) {
+ info.s_vma = &tex->vma;
info.s_img = &tex->image;
} else {
+ info.z_vma = &tex->vma;
info.z_img = &tex->image;
- info.s_img = (tex->separate_s8) ? &tex->separate_s8->image : NULL;
- info.hiz_enable =
- ilo_image_can_enable_aux(&tex->image, templ->u.tex.level);
+ if (tex->separate_s8) {
+ info.s_vma = &tex->separate_s8->vma;
+ info.s_img = &tex->separate_s8->image;
+ }
+
+ if (ilo_image_can_enable_aux(&tex->image, templ->u.tex.level))
+ info.hiz_vma = &tex->aux_vma;
}
info.level = templ->u.tex.level;
@@ -2145,16 +2154,15 @@ ilo_create_surface(struct pipe_context *pipe,
info.slice_count = templ->u.tex.last_layer -
templ->u.tex.first_layer + 1;
+ info.type = (tex->image.type == GEN6_SURFTYPE_CUBE) ?
+ GEN6_SURFTYPE_2D : tex->image.type;
+
+ info.format = ilo_format_translate_depth(dev, tex->image_format);
+ if (ilo_dev_gen(dev) == ILO_GEN(6) && !info.hiz_vma &&
+ tex->image_format == PIPE_FORMAT_Z24X8_UNORM)
+ info.format = GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+
ilo_state_zs_init(&surf->u.zs, dev, &info);
-
- if (info.z_img) {
- surf->u.zs.depth_bo = info.z_img->bo;
- if (info.hiz_enable)
- surf->u.zs.hiz_bo = info.z_img->aux.bo;
- }
-
- if (info.s_img)
- surf->u.zs.stencil_bo = info.s_img->bo;
}
return &surf->base;
@@ -2339,7 +2347,7 @@ ilo_init_state_functions(struct ilo_context *ilo)
ilo->base.set_scissor_states = ilo_set_scissor_states;
ilo->base.set_viewport_states = ilo_set_viewport_states;
ilo->base.set_sampler_views = ilo_set_sampler_views;
- ilo->base.set_shader_resources = ilo_set_shader_resources;
+ ilo->base.set_shader_images = ilo_set_shader_images;
ilo->base.set_vertex_buffers = ilo_set_vertex_buffers;
ilo->base.set_index_buffer = ilo_set_index_buffer;
@@ -2451,7 +2459,6 @@ void
ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct pipe_resource *res)
{
- struct intel_bo *bo = ilo_resource_get_bo(res);
uint32_t states = 0;
unsigned sh, i;
@@ -2482,10 +2489,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
for (i = 0; i < vec->so.count; i++) {
if (vec->so.states[i]->buffer == res) {
- struct ilo_stream_output_target *target =
- (struct ilo_stream_output_target *) vec->so.states[i];
-
- target->sb.bo = ilo_buffer(res)->bo;
states |= ILO_DIRTY_SO;
break;
}
@@ -2503,7 +2506,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
[PIPE_SHADER_GEOMETRY] = ILO_DIRTY_VIEW_GS,
[PIPE_SHADER_COMPUTE] = ILO_DIRTY_VIEW_CS,
};
- cso->surface.bo = bo;
states |= view_dirty_bits[sh];
break;
@@ -2515,7 +2517,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct ilo_cbuf_cso *cbuf = &vec->cbuf[sh].cso[i];
if (cbuf->resource == res) {
- cbuf->surface.bo = bo;
states |= ILO_DIRTY_CBUF;
break;
}
@@ -2528,7 +2529,6 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
(struct ilo_surface_cso *) vec->resource.states[i];
if (cso->base.texture == res) {
- cso->u.rt.bo = bo;
states |= ILO_DIRTY_RESOURCE;
break;
}
@@ -2540,27 +2540,19 @@ ilo_state_vector_resource_renamed(struct ilo_state_vector *vec,
struct ilo_surface_cso *cso =
(struct ilo_surface_cso *) vec->fb.state.cbufs[i];
if (cso && cso->base.texture == res) {
- cso->u.rt.bo = bo;
states |= ILO_DIRTY_FB;
break;
}
}
- if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res) {
- struct ilo_surface_cso *cso =
- (struct ilo_surface_cso *) vec->fb.state.zsbuf;
-
- cso->u.zs.depth_bo = bo;
-
+ if (vec->fb.state.zsbuf && vec->fb.state.zsbuf->texture == res)
states |= ILO_DIRTY_FB;
- }
}
for (i = 0; i < vec->cs_resource.count; i++) {
struct ilo_surface_cso *cso =
(struct ilo_surface_cso *) vec->cs_resource.states[i];
if (cso->base.texture == res) {
- cso->u.rt.bo = bo;
states |= ILO_DIRTY_CS_RESOURCE;
break;
}
diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h
index 3e6fd8a2554..66c93007eb1 100644
--- a/src/gallium/drivers/ilo/ilo_state.h
+++ b/src/gallium/drivers/ilo/ilo_state.h
@@ -202,7 +202,7 @@ struct ilo_cbuf_state {
};
struct ilo_resource_state {
- struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
+ struct pipe_surface *states[PIPE_MAX_SHADER_IMAGES];
unsigned count;
};
diff --git a/src/gallium/drivers/ilo/ilo_transfer.c b/src/gallium/drivers/ilo/ilo_transfer.c
index ec41473f94a..5abd3bebf68 100644
--- a/src/gallium/drivers/ilo/ilo_transfer.c
+++ b/src/gallium/drivers/ilo/ilo_transfer.c
@@ -100,7 +100,7 @@ resource_get_transfer_method(struct pipe_resource *res,
m = ILO_TRANSFER_MAP_SW_ZS;
need_convert = true;
}
- } else if (tex->image.format != tex->base.format) {
+ } else if (tex->image_format != tex->base.format) {
m = ILO_TRANSFER_MAP_SW_CONVERT;
need_convert = true;
}
@@ -268,23 +268,27 @@ xfer_alloc_staging_sys(struct ilo_transfer *xfer)
static void *
xfer_map(struct ilo_transfer *xfer)
{
+ const struct ilo_vma *vma;
void *ptr;
switch (xfer->method) {
case ILO_TRANSFER_MAP_CPU:
- ptr = intel_bo_map(ilo_resource_get_bo(xfer->base.resource),
- xfer->base.usage & PIPE_TRANSFER_WRITE);
+ vma = ilo_resource_get_vma(xfer->base.resource);
+ ptr = intel_bo_map(vma->bo, xfer->base.usage & PIPE_TRANSFER_WRITE);
break;
case ILO_TRANSFER_MAP_GTT:
- ptr = intel_bo_map_gtt(ilo_resource_get_bo(xfer->base.resource));
+ vma = ilo_resource_get_vma(xfer->base.resource);
+ ptr = intel_bo_map_gtt(vma->bo);
break;
case ILO_TRANSFER_MAP_GTT_ASYNC:
- ptr = intel_bo_map_gtt_async(ilo_resource_get_bo(xfer->base.resource));
+ vma = ilo_resource_get_vma(xfer->base.resource);
+ ptr = intel_bo_map_gtt_async(vma->bo);
break;
case ILO_TRANSFER_MAP_STAGING:
{
const struct ilo_screen *is = ilo_screen(xfer->staging.res->screen);
- struct intel_bo *bo = ilo_resource_get_bo(xfer->staging.res);
+
+ vma = ilo_resource_get_vma(xfer->staging.res);
/*
* We want a writable, optionally persistent and coherent, mapping
@@ -292,25 +296,29 @@ xfer_map(struct ilo_transfer *xfer)
* this turns out to be fairly simple.
*/
if (is->dev.has_llc)
- ptr = intel_bo_map(bo, true);
+ ptr = intel_bo_map(vma->bo, true);
else
- ptr = intel_bo_map_gtt(bo);
+ ptr = intel_bo_map_gtt(vma->bo);
if (ptr && xfer->staging.res->target == PIPE_BUFFER)
ptr += (xfer->base.box.x % ILO_TRANSFER_MAP_BUFFER_ALIGNMENT);
-
}
break;
case ILO_TRANSFER_MAP_SW_CONVERT:
case ILO_TRANSFER_MAP_SW_ZS:
+ vma = NULL;
ptr = xfer->staging.sys;
break;
default:
assert(!"unknown mapping method");
+ vma = NULL;
ptr = NULL;
break;
}
+ if (ptr && vma)
+ ptr = (void *) ((char *) ptr + vma->bo_offset);
+
return ptr;
}
@@ -324,10 +332,10 @@ xfer_unmap(struct ilo_transfer *xfer)
case ILO_TRANSFER_MAP_CPU:
case ILO_TRANSFER_MAP_GTT:
case ILO_TRANSFER_MAP_GTT_ASYNC:
- intel_bo_unmap(ilo_resource_get_bo(xfer->base.resource));
+ intel_bo_unmap(ilo_resource_get_vma(xfer->base.resource)->bo);
break;
case ILO_TRANSFER_MAP_STAGING:
- intel_bo_unmap(ilo_resource_get_bo(xfer->staging.res));
+ intel_bo_unmap(ilo_resource_get_vma(xfer->staging.res)->bo);
break;
default:
break;
@@ -541,9 +549,12 @@ tex_staging_sys_map_bo(struct ilo_texture *tex,
if (prefer_cpu && (tex->image.tiling == GEN6_TILING_NONE ||
!linear_view))
- ptr = intel_bo_map(tex->image.bo, !for_read_back);
+ ptr = intel_bo_map(tex->vma.bo, !for_read_back);
else
- ptr = intel_bo_map_gtt(tex->image.bo);
+ ptr = intel_bo_map_gtt(tex->vma.bo);
+
+ if (ptr)
+ ptr = (void *) ((char *) ptr + tex->vma.bo_offset);
return ptr;
}
@@ -551,7 +562,7 @@ tex_staging_sys_map_bo(struct ilo_texture *tex,
static void
tex_staging_sys_unmap_bo(struct ilo_texture *tex)
{
- intel_bo_unmap(tex->image.bo);
+ intel_bo_unmap(tex->vma.bo);
}
static bool
@@ -590,7 +601,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
- assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
+ assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
dst_cpp = 4;
dst_s8_pos = 3;
@@ -598,7 +609,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
}
else {
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
- assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
+ assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
dst_cpp = 8;
dst_s8_pos = 4;
@@ -644,7 +655,7 @@ tex_staging_sys_zs_read(struct ilo_texture *tex,
tex_staging_sys_unmap_bo(s8_tex);
}
else {
- assert(tex->image.format == PIPE_FORMAT_S8_UINT);
+ assert(tex->image_format == PIPE_FORMAT_S8_UINT);
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y;
@@ -717,7 +728,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
s8_tile_offset = tex_tile_choose_offset_func(s8_tex, &s8_tiles_per_row);
if (tex->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
- assert(tex->image.format == PIPE_FORMAT_Z24X8_UNORM);
+ assert(tex->image_format == PIPE_FORMAT_Z24X8_UNORM);
src_cpp = 4;
src_s8_pos = 3;
@@ -725,7 +736,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
}
else {
assert(tex->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
- assert(tex->image.format == PIPE_FORMAT_Z32_FLOAT);
+ assert(tex->image_format == PIPE_FORMAT_Z32_FLOAT);
src_cpp = 8;
src_s8_pos = 4;
@@ -771,7 +782,7 @@ tex_staging_sys_zs_write(struct ilo_texture *tex,
tex_staging_sys_unmap_bo(s8_tex);
}
else {
- assert(tex->image.format == PIPE_FORMAT_S8_UINT);
+ assert(tex->image_format == PIPE_FORMAT_S8_UINT);
for (slice = 0; slice < box->depth; slice++) {
unsigned mem_x, mem_y;
@@ -829,8 +840,8 @@ tex_staging_sys_convert_write(struct ilo_texture *tex,
else
dst_slice_stride = 0;
- if (unlikely(tex->image.format == tex->base.format)) {
- util_copy_box(dst, tex->image.format, tex->image.bo_stride,
+ if (unlikely(tex->image_format == tex->base.format)) {
+ util_copy_box(dst, tex->image_format, tex->image.bo_stride,
dst_slice_stride, 0, 0, 0, box->width, box->height, box->depth,
xfer->staging.sys, xfer->base.stride, xfer->base.layer_stride,
0, 0, 0);
@@ -842,7 +853,7 @@ tex_staging_sys_convert_write(struct ilo_texture *tex,
switch (tex->base.format) {
case PIPE_FORMAT_ETC1_RGB8:
- assert(tex->image.format == PIPE_FORMAT_R8G8B8X8_UNORM);
+ assert(tex->image_format == PIPE_FORMAT_R8G8B8X8_UNORM);
for (slice = 0; slice < box->depth; slice++) {
const void *src =
@@ -1055,7 +1066,7 @@ choose_transfer_method(struct ilo_context *ilo, struct ilo_transfer *xfer)
return false;
/* see if we can avoid blocking */
- if (is_bo_busy(ilo, ilo_resource_get_bo(res), &need_submit)) {
+ if (is_bo_busy(ilo, ilo_resource_get_vma(res)->bo, &need_submit)) {
bool resource_renamed;
if (!xfer_unblock(xfer, &resource_renamed)) {
@@ -1078,11 +1089,11 @@ static void
buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
unsigned usage, int offset, int size, const void *data)
{
- struct ilo_buffer *buf = ilo_buffer(res);
+ struct ilo_buffer_resource *buf = ilo_buffer_resource(res);
bool need_submit;
/* see if we can avoid blocking */
- if (is_bo_busy(ilo, buf->bo, &need_submit)) {
+ if (is_bo_busy(ilo, buf->vma.bo, &need_submit)) {
bool unblocked = false;
if ((usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
@@ -1103,9 +1114,12 @@ buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
templ.bind = PIPE_BIND_TRANSFER_WRITE;
staging = ilo->base.screen->resource_create(ilo->base.screen, &templ);
if (staging) {
+ const struct ilo_vma *staging_vma = ilo_resource_get_vma(staging);
struct pipe_box staging_box;
- intel_bo_pwrite(ilo_buffer(staging)->bo, 0, size, data);
+ /* offset by staging_vma->bo_offset for pwrite */
+ intel_bo_pwrite(staging_vma->bo, staging_vma->bo_offset,
+ size, data);
u_box_1d(0, size, &staging_box);
ilo_blitter_blt_copy_resource(ilo->blitter,
@@ -1123,7 +1137,8 @@ buf_pwrite(struct ilo_context *ilo, struct pipe_resource *res,
ilo_cp_submit(ilo->cp, "syncing for pwrites");
}
- intel_bo_pwrite(buf->bo, offset, size, data);
+ /* offset by buf->vma.bo_offset for pwrite */
+ intel_bo_pwrite(buf->vma.bo, buf->vma.bo_offset + offset, size, data);
}
static void
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
index 1de43f77ee0..1feb415c9e5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
@@ -78,7 +78,7 @@ lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func)
/**
* Whether the blending factors are complementary of each other.
*/
-static INLINE boolean
+static inline boolean
lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)
{
return dst_factor == (src_factor ^ 0x10);
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 0d47c0d517c..c273b25f096 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -169,7 +169,7 @@ llvmpipe_user_buffer_create(struct pipe_screen *screen,
unsigned bind_flags);
-static INLINE struct llvmpipe_context *
+static inline struct llvmpipe_context *
llvmpipe_context( struct pipe_context *pipe )
{
return (struct llvmpipe_context *)pipe;
diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h
index e0f7d8e1bc3..1038c5fe151 100644
--- a/src/gallium/drivers/llvmpipe/lp_debug.h
+++ b/src/gallium/drivers/llvmpipe/lp_debug.h
@@ -71,7 +71,7 @@ extern int LP_DEBUG;
void st_debug_init( void );
-static INLINE void
+static inline void
LP_DBG( unsigned flag, const char *fmt, ... )
{
if (LP_DEBUG & flag)
diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h b/src/gallium/drivers/llvmpipe/lp_fence.h
index 3c591187801..d7f0c153ec8 100644
--- a/src/gallium/drivers/llvmpipe/lp_fence.h
+++ b/src/gallium/drivers/llvmpipe/lp_fence.h
@@ -72,7 +72,7 @@ llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
void
lp_fence_destroy(struct lp_fence *fence);
-static INLINE void
+static inline void
lp_fence_reference(struct lp_fence **ptr,
struct lp_fence *f)
{
@@ -85,7 +85,7 @@ lp_fence_reference(struct lp_fence **ptr,
*ptr = f;
}
-static INLINE boolean
+static inline boolean
lp_fence_issued(const struct lp_fence *fence)
{
return fence->issued;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index c209f47f0f5..c19f9318006 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -184,7 +184,7 @@ union lp_rast_cmd_arg {
/* Cast wrappers. Hopefully these compile to noops!
*/
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
{
union lp_rast_cmd_arg arg;
@@ -192,7 +192,7 @@ lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
return arg;
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
unsigned plane_mask)
{
@@ -208,7 +208,7 @@ lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
* All planes are enabled, so instead of the plane mask we pass the upper
* left coordinates of the a block that fully encloses the triangle.
*/
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_triangle_contained( const struct lp_rast_triangle *triangle,
unsigned x, unsigned y)
{
@@ -218,7 +218,7 @@ lp_rast_arg_triangle_contained( const struct lp_rast_triangle *triangle,
return arg;
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_state( const struct lp_rast_state *state )
{
union lp_rast_cmd_arg arg;
@@ -226,7 +226,7 @@ lp_rast_arg_state( const struct lp_rast_state *state )
return arg;
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_fence( struct lp_fence *fence )
{
union lp_rast_cmd_arg arg;
@@ -235,7 +235,7 @@ lp_rast_arg_fence( struct lp_fence *fence )
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
{
union lp_rast_cmd_arg arg;
@@ -245,7 +245,7 @@ lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_query( struct llvmpipe_query *pq )
{
union lp_rast_cmd_arg arg;
@@ -253,7 +253,7 @@ lp_rast_arg_query( struct llvmpipe_query *pq )
return arg;
}
-static INLINE union lp_rast_cmd_arg
+static inline union lp_rast_cmd_arg
lp_rast_arg_null( void )
{
union lp_rast_cmd_arg arg;
@@ -312,7 +312,7 @@ lp_debug_draw_bins_by_coverage( struct lp_scene *scene );
#include
#include "util/u_sse.h"
-static INLINE __m128i
+static inline __m128i
lp_plane_to_m128i(const struct lp_rast_plane *plane)
{
return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index e6ebbcd526d..9aa7e874657 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -145,7 +145,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
* Get the pointer to a 4x4 color block (within a 64x64 tile).
* \param x, y location of 4x4 block in window coords
*/
-static INLINE uint8_t *
+static inline uint8_t *
lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
unsigned buf, unsigned x, unsigned y,
unsigned layer)
@@ -186,7 +186,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
* Get the pointer to a 4x4 depth block (within a 64x64 tile).
* \param x, y location of 4x4 block in window coords
*/
-static INLINE uint8_t *
+static inline uint8_t *
lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
unsigned x, unsigned y, unsigned layer)
{
@@ -222,7 +222,7 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
* triangle in/out tests.
* \param x, y location of 4x4 block in window coords
*/
-static INLINE void
+static inline void
lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y )
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 41f6fbfa059..c9b9221d87c 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -63,7 +63,7 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
-static INLINE unsigned
+static inline unsigned
build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
{
unsigned mask = 0;
@@ -94,7 +94,7 @@ build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy)
}
-static INLINE void
+static inline void
build_masks(int64_t c,
int64_t cdiff,
int64_t dcdx,
@@ -167,7 +167,7 @@ lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
#include "util/u_sse.h"
-static INLINE void
+static inline void
build_masks_32(int c,
int cdiff,
int dcdx,
@@ -213,7 +213,7 @@ build_masks_32(int c,
}
-static INLINE unsigned
+static inline unsigned
build_mask_linear_32(int c, int dcdx, int dcdy)
{
__m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3);
@@ -239,7 +239,7 @@ build_mask_linear_32(int c, int dcdx, int dcdy)
return _mm_movemask_epi8(result);
}
-static INLINE unsigned
+static inline unsigned
sign_bits4(const __m128i *cstep, int cdiff)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index a226ff0c485..b1464bb54c4 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -207,7 +207,7 @@ boolean lp_scene_is_resource_referenced(const struct lp_scene *scene,
* Allocate space for a command/data in the bin's data buffer.
* Grow the block list if needed.
*/
-static INLINE void *
+static inline void *
lp_scene_alloc( struct lp_scene *scene, unsigned size)
{
struct data_block_list *list = &scene->data;
@@ -240,7 +240,7 @@ lp_scene_alloc( struct lp_scene *scene, unsigned size)
/**
* As above, but with specific alignment.
*/
-static INLINE void *
+static inline void *
lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
unsigned alignment )
{
@@ -272,7 +272,7 @@ lp_scene_alloc_aligned( struct lp_scene *scene, unsigned size,
/* Put back data if we decide not to use it, eg. culled triangles.
*/
-static INLINE void
+static inline void
lp_scene_putback_data( struct lp_scene *scene, unsigned size)
{
struct data_block_list *list = &scene->data;
@@ -282,7 +282,7 @@ lp_scene_putback_data( struct lp_scene *scene, unsigned size)
/** Return pointer to a particular tile's bin. */
-static INLINE struct cmd_bin *
+static inline struct cmd_bin *
lp_scene_get_bin(struct lp_scene *scene, unsigned x, unsigned y)
{
return &scene->tile[x][y];
@@ -296,7 +296,7 @@ lp_scene_bin_reset(struct lp_scene *scene, unsigned x, unsigned y);
/* Add a command to bin[x][y].
*/
-static INLINE boolean
+static inline boolean
lp_scene_bin_command( struct lp_scene *scene,
unsigned x, unsigned y,
unsigned cmd,
@@ -328,7 +328,7 @@ lp_scene_bin_command( struct lp_scene *scene,
}
-static INLINE boolean
+static inline boolean
lp_scene_bin_cmd_with_state( struct lp_scene *scene,
unsigned x, unsigned y,
const struct lp_rast_state *state,
@@ -354,7 +354,7 @@ lp_scene_bin_cmd_with_state( struct lp_scene *scene,
/* Add a command to all active bins.
*/
-static INLINE boolean
+static inline boolean
lp_scene_bin_everywhere( struct lp_scene *scene,
unsigned cmd,
const union lp_rast_cmd_arg arg )
@@ -371,7 +371,7 @@ lp_scene_bin_everywhere( struct lp_scene *scene,
}
-static INLINE unsigned
+static inline unsigned
lp_scene_get_num_bins( const struct lp_scene *scene )
{
return scene->tiles_x * scene->tiles_y;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 47f1897c732..14eeab03387 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -288,10 +288,14 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_VERTEXID_NOBASE:
return 0;
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
}
/* should only get here on unhandled cases */
@@ -528,18 +532,6 @@ llvmpipe_fence_reference(struct pipe_screen *screen,
}
-/**
- * Has the fence been executed/finished?
- */
-static boolean
-llvmpipe_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct lp_fence *f = (struct lp_fence *) fence;
- return lp_fence_signalled(f);
-}
-
-
/**
* Wait for the fence to finish.
*/
@@ -550,6 +542,9 @@ llvmpipe_fence_finish(struct pipe_screen *screen,
{
struct lp_fence *f = (struct lp_fence *) fence_handle;
+ if (!timeout)
+ return lp_fence_signalled(f);
+
lp_fence_wait(f);
return TRUE;
}
@@ -601,7 +596,6 @@ llvmpipe_create_screen(struct sw_winsys *winsys)
screen->base.context_create = llvmpipe_create_context;
screen->base.flush_frontbuffer = llvmpipe_flush_frontbuffer;
screen->base.fence_reference = llvmpipe_fence_reference;
- screen->base.fence_signalled = llvmpipe_fence_signalled;
screen->base.fence_finish = llvmpipe_fence_finish;
screen->base.get_timestamp = llvmpipe_get_timestamp;
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.h b/src/gallium/drivers/llvmpipe/lp_screen.h
index 8b8ea1afac9..00bf20c8c5f 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.h
+++ b/src/gallium/drivers/llvmpipe/lp_screen.h
@@ -62,7 +62,7 @@ struct llvmpipe_screen
-static INLINE struct llvmpipe_screen *
+static inline struct llvmpipe_screen *
llvmpipe_screen( struct pipe_screen *pipe )
{
return (struct llvmpipe_screen *)pipe;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h
index c944ad26756..a42df2dc9e0 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup.h
@@ -159,7 +159,7 @@ void
lp_setup_end_query(struct lp_setup_context *setup,
struct llvmpipe_query *pq);
-static INLINE unsigned
+static inline unsigned
lp_clamp_viewport_idx(int idx)
{
return (PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c
index 6c05b90e64a..a190254d9df 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_line.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c
@@ -233,7 +233,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
-static INLINE int subpixel_snap( float a )
+static inline int subpixel_snap( float a )
{
return util_iround(FIXED_ONE * a);
}
@@ -262,14 +262,14 @@ print_line(struct lp_setup_context *setup,
}
-static INLINE boolean sign(float x){
+static inline boolean sign(float x){
return x >= 0;
}
/* Used on positive floats only:
*/
-static INLINE float fracf(float f)
+static inline float fracf(float f)
{
return f - floorf(f);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index f065676a7fb..75544b52493 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -296,7 +296,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
}
-static INLINE int
+static inline int
subpixel_snap(float a)
{
return util_iround(FIXED_ONE * a);
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index a2f55ed3a1e..98a9d4bc28b 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -48,13 +48,13 @@
#include
#endif
-static INLINE int
+static inline int
subpixel_snap(float a)
{
return util_iround(FIXED_ONE * a);
}
-static INLINE float
+static inline float
fixed_to_float(int a)
{
return a * (1.0f / FIXED_ONE);
@@ -579,7 +579,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
*
* Undefined if no bit set exists, so code should check against 0 first.
*/
-static INLINE uint32_t
+static inline uint32_t
floor_pot(uint32_t n)
{
#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
@@ -841,7 +841,7 @@ static void retry_triangle_ccw( struct lp_setup_context *setup,
/**
* Calculate fixed position data for a triangle
*/
-static INLINE void
+static inline void
calc_fixed_position( struct lp_setup_context *setup,
struct fixed_position* position,
const float (*v0)[4],
@@ -873,7 +873,7 @@ calc_fixed_position( struct lp_setup_context *setup,
* Rotate a triangle, flipping its clockwise direction,
* Swaps values for xy[0] and xy[1]
*/
-static INLINE void
+static inline void
rotate_fixed_position_01( struct fixed_position* position )
{
int x, y;
@@ -898,7 +898,7 @@ rotate_fixed_position_01( struct fixed_position* position )
* Rotate a triangle, flipping its clockwise direction,
* Swaps values for xy[1] and xy[2]
*/
-static INLINE void
+static inline void
rotate_fixed_position_12( struct fixed_position* position )
{
int x, y;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
index 89992007849..534c5f48a64 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_vbuf.c
@@ -122,7 +122,7 @@ lp_setup_set_primitive(struct vbuf_render *vbr, unsigned prim)
typedef const float (*const_float4_ptr)[4];
-static INLINE const_float4_ptr get_vert( const void *vertex_buffer,
+static inline const_float4_ptr get_vert( const void *vertex_buffer,
int index,
int stride )
{
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index b5ce8683f1a..fd6c49aacd8 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -840,7 +840,7 @@ store_unswizzled_block(struct gallivm_state *gallivm,
*
* A format which has irregular channel sizes such as R3_G3_B2 or R5_G6_B5.
*/
-static INLINE boolean
+static inline boolean
is_arithmetic_format(const struct util_format_description *format_desc)
{
boolean arith = false;
@@ -860,7 +860,7 @@ is_arithmetic_format(const struct util_format_description *format_desc)
* to floats for blending, and furthermore has "natural" packed AoS -> unpacked
* SoA conversion.
*/
-static INLINE boolean
+static inline boolean
format_expands_to_float_soa(const struct util_format_description *format_desc)
{
if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
@@ -876,7 +876,7 @@ format_expands_to_float_soa(const struct util_format_description *format_desc)
*
* e.g. RGBA16F = 4x half-float and R3G3B2 = 1x byte
*/
-static INLINE void
+static inline void
lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
struct lp_type* type)
{
@@ -924,7 +924,7 @@ lp_mem_type_from_format_desc(const struct util_format_description *format_desc,
*
* e.g. RGBA16F = 4x float, R3G3B2 = 3x byte
*/
-static INLINE void
+static inline void
lp_blend_type_from_format_desc(const struct util_format_description *format_desc,
struct lp_type* type)
{
@@ -996,7 +996,7 @@ lp_blend_type_from_format_desc(const struct util_format_description *format_desc
*
* but we try to avoid division and multiplication through shifts.
*/
-static INLINE LLVMValueRef
+static inline LLVMValueRef
scale_bits(struct gallivm_state *gallivm,
int src_bits,
int dst_bits,
@@ -1108,7 +1108,7 @@ scale_bits(struct gallivm_state *gallivm,
/**
* If RT is a smallfloat (needing denorms) format
*/
-static INLINE int
+static inline int
have_smallfloat_format(struct lp_type dst_type,
enum pipe_format format)
{
@@ -2880,7 +2880,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
/**
* Return the blend factor equivalent to a destination alpha of one.
*/
-static INLINE unsigned
+static inline unsigned
force_dst_alpha_one(unsigned factor, boolean clamped_zero)
{
switch(factor) {
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index 4b6c8a7a6a5..e1b51c9c9a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -77,7 +77,7 @@ unsigned __int64 __rdtsc();
#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
-static INLINE uint64_t
+static inline uint64_t
rdtsc(void)
{
uint32_t hi, lo;
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index 9fbd3a21648..3d315bb9a73 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -106,21 +106,21 @@ struct llvmpipe_transfer
/** cast wrappers */
-static INLINE struct llvmpipe_resource *
+static inline struct llvmpipe_resource *
llvmpipe_resource(struct pipe_resource *pt)
{
return (struct llvmpipe_resource *) pt;
}
-static INLINE const struct llvmpipe_resource *
+static inline const struct llvmpipe_resource *
llvmpipe_resource_const(const struct pipe_resource *pt)
{
return (const struct llvmpipe_resource *) pt;
}
-static INLINE struct llvmpipe_transfer *
+static inline struct llvmpipe_transfer *
llvmpipe_transfer(struct pipe_transfer *pt)
{
return (struct llvmpipe_transfer *) pt;
@@ -131,7 +131,7 @@ void llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen);
void llvmpipe_init_context_resource_funcs(struct pipe_context *pipe);
-static INLINE boolean
+static inline boolean
llvmpipe_resource_is_texture(const struct pipe_resource *resource)
{
switch (resource->target) {
@@ -153,7 +153,7 @@ llvmpipe_resource_is_texture(const struct pipe_resource *resource)
}
-static INLINE boolean
+static inline boolean
llvmpipe_resource_is_1d(const struct pipe_resource *resource)
{
switch (resource->target) {
@@ -175,7 +175,7 @@ llvmpipe_resource_is_1d(const struct pipe_resource *resource)
}
-static INLINE unsigned
+static inline unsigned
llvmpipe_layer_stride(struct pipe_resource *resource,
unsigned level)
{
@@ -185,7 +185,7 @@ llvmpipe_layer_stride(struct pipe_resource *resource,
}
-static INLINE unsigned
+static inline unsigned
llvmpipe_resource_stride(struct pipe_resource *resource,
unsigned level)
{
diff --git a/src/gallium/drivers/nouveau/Makefile.am b/src/gallium/drivers/nouveau/Makefile.am
index d05f0a17ab4..c52d62e54a2 100644
--- a/src/gallium/drivers/nouveau/Makefile.am
+++ b/src/gallium/drivers/nouveau/Makefile.am
@@ -20,8 +20,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index ca3c806e92f..cce60550ae5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1153,8 +1153,8 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
switch (info->type) {
PROG_TYPE_CASE(VERTEX, VERTEX);
-// PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL);
-// PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL);
+ PROG_TYPE_CASE(TESS_CTRL, TESSELLATION_CONTROL);
+ PROG_TYPE_CASE(TESS_EVAL, TESSELLATION_EVAL);
PROG_TYPE_CASE(GEOMETRY, GEOMETRY);
PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
PROG_TYPE_CASE(COMPUTE, COMPUTE);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 529dcb9bdc2..3ddaeafebbd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -106,6 +106,7 @@ enum operation
OP_MEMBAR, // memory barrier (mfence, lfence, sfence)
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1]
+ OP_AFETCH, // fetch base address of shader input (a[%r1+0x10])
OP_EXPORT,
OP_LINTERP,
OP_PINTERP,
@@ -372,7 +373,8 @@ enum SVSemantic
SV_SAMPLE_INDEX,
SV_SAMPLE_POS,
SV_SAMPLE_MASK,
- SV_TESS_FACTOR,
+ SV_TESS_OUTER,
+ SV_TESS_INNER,
SV_TESS_COORD,
SV_TID,
SV_CTAID,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
index 51b9225156b..fa8ee072a92 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
@@ -332,6 +332,9 @@ BasicBlock::splitBefore(Instruction *insn, bool attach)
BasicBlock *bb = new BasicBlock(func);
assert(!insn || insn->op != OP_PHI);
+ bb->joinAt = joinAt;
+ joinAt = NULL;
+
splitCommon(insn, bb, attach);
return bb;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
index 708c5b322ee..19418c0e0f1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -428,8 +428,7 @@ BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex)
{
Symbol *sym = new_Symbol(prog, FILE_SYSTEM_VALUE, 0);
- assert(svIndex < 4 ||
- (svName == SV_CLIP_DISTANCE || svName == SV_TESS_FACTOR));
+ assert(svIndex < 4 || svName == SV_CLIP_DISTANCE);
switch (svName) {
case SV_POSITION:
@@ -438,7 +437,9 @@ BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex)
case SV_POINT_SIZE:
case SV_POINT_COORD:
case SV_CLIP_DISTANCE:
- case SV_TESS_FACTOR:
+ case SV_TESS_OUTER:
+ case SV_TESS_INNER:
+ case SV_TESS_COORD:
sym->reg.type = TYPE_F32;
break;
default:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index dba56bf2716..2b9edcf9172 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -69,18 +69,6 @@ struct nv50_ir_varying
# define NV50_IR_DEBUG_REG_ALLOC 0
#endif
-#define NV50_SEMANTIC_CLIPDISTANCE (TGSI_SEMANTIC_COUNT + 0)
-#define NV50_SEMANTIC_TESSFACTOR (TGSI_SEMANTIC_COUNT + 7)
-#define NV50_SEMANTIC_TESSCOORD (TGSI_SEMANTIC_COUNT + 8)
-#define NV50_SEMANTIC_COUNT (TGSI_SEMANTIC_COUNT + 10)
-
-#define NV50_TESS_PART_FRACT_ODD 0
-#define NV50_TESS_PART_FRACT_EVEN 1
-#define NV50_TESS_PART_POW2 2
-#define NV50_TESS_PART_INTEGER 3
-
-#define NV50_PRIM_PATCHES PIPE_PRIM_MAX
-
struct nv50_ir_prog_symbol
{
uint32_t label;
@@ -151,10 +139,10 @@ struct nv50_ir_prog_info
} gp;
struct {
unsigned numColourResults;
- boolean writesDepth;
- boolean earlyFragTests;
- boolean separateFragData;
- boolean usesDiscard;
+ bool writesDepth;
+ bool earlyFragTests;
+ bool separateFragData;
+ bool usesDiscard;
} fp;
struct {
uint32_t inputOffset; /* base address for user args */
@@ -180,11 +168,11 @@ struct nv50_ir_prog_info
int8_t viewportId; /* output index of ViewportIndex */
uint8_t fragDepth; /* output index of FragDepth */
uint8_t sampleMask; /* output index of SampleMask */
- boolean sampleInterp; /* perform sample interp on all fp inputs */
+ bool sampleInterp; /* perform sample interp on all fp inputs */
uint8_t backFaceColor[2]; /* input/output indices of back face colour */
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */
- boolean fp64; /* program uses fp64 math */
- boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */
+ bool fp64; /* program uses fp64 math */
+ bool nv50styleSurfaces; /* generate gX[] access for raw buffers */
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index ab8bf2e5504..f06056f8f17 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -77,6 +77,7 @@ private:
void emitMOV(const Instruction *);
void emitINTERP(const Instruction *);
+ void emitAFETCH(const Instruction *);
void emitPFETCH(const Instruction *);
void emitVFETCH(const Instruction *);
void emitEXPORT(const Instruction *);
@@ -120,6 +121,8 @@ private:
void emitPIXLD(const Instruction *);
+ void emitBAR(const Instruction *);
+
void emitFlow(const Instruction *);
inline void defId(const ValueDef&, const int pos);
@@ -1249,6 +1252,13 @@ CodeEmitterGK110::emitPIXLD(const Instruction *i)
code[1] |= 0x00070000;
}
+void
+CodeEmitterGK110::emitBAR(const Instruction *i)
+{
+ /* TODO */
+ emitNOP(i);
+}
+
void
CodeEmitterGK110::emitFlow(const Instruction *i)
{
@@ -1329,6 +1339,23 @@ CodeEmitterGK110::emitFlow(const Instruction *i)
}
}
+void
+CodeEmitterGK110::emitAFETCH(const Instruction *i)
+{
+ uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff;
+
+ code[0] = 0x00000002 | (offset << 23);
+ code[1] = 0x7d000000 | (offset >> 9);
+
+ if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
+ code[1] |= 0x8;
+
+ emitPredicate(i);
+
+ defId(i->def(0), 2);
+ srcId(i->src(0).getIndirect(0), 10);
+}
+
void
CodeEmitterGK110::emitPFETCH(const Instruction *i)
{
@@ -1698,6 +1725,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
case OP_EXPORT:
emitEXPORT(insn);
break;
+ case OP_AFETCH:
+ emitAFETCH(insn);
+ break;
case OP_PFETCH:
emitPFETCH(insn);
break;
@@ -1856,6 +1886,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
emitNOP(insn);
insn->join = 1;
break;
+ case OP_BAR:
+ emitBAR(insn);
+ break;
case OP_PHI:
case OP_UNION:
case OP_CONSTRAINT:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 399a6f1db13..ef5c87d0437 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -174,6 +174,7 @@ private:
void emitALD();
void emitAST();
void emitISBERD();
+ void emitAL2P();
void emitIPA();
void emitPIXLD();
@@ -2203,6 +2204,17 @@ CodeEmitterGM107::emitISBERD()
emitGPR (0x00, insn->def(0));
}
+void
+CodeEmitterGM107::emitAL2P()
+{
+ emitInsn (0xefa00000);
+ emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
+ emitO (0x20);
+ emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
+ emitGPR (0x08, insn->src(0).getIndirect(0));
+ emitGPR (0x00, insn->def(0));
+}
+
void
CodeEmitterGM107::emitIPA()
{
@@ -2441,8 +2453,14 @@ CodeEmitterGM107::emitTXQ()
break;
}
- emitInsn (0xdf4a0000);
- emitField(0x24, 13, insn->tex.r);
+ if (insn->tex.rIndirectSrc >= 0) {
+ emitInsn (0xdf500000);
+ } else {
+ emitInsn (0xdf480000);
+ emitField(0x24, 13, insn->tex.r);
+ }
+
+ emitField(0x31, 1, insn->tex.liveOnly);
emitField(0x1f, 4, insn->tex.mask);
emitField(0x16, 6, type);
emitGPR (0x08, insn->src(0));
@@ -2753,6 +2771,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_PFETCH:
emitISBERD();
break;
+ case OP_AFETCH:
+ emitAL2P();
+ break;
case OP_LINTERP:
case OP_PINTERP:
emitIPA();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 1bfc8e32e84..67ea6df773c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -499,10 +499,14 @@ CodeEmitterNV50::emitForm_MAD(const Instruction *i)
setSrc(i, 2, 2);
if (i->getIndirect(0, 0)) {
- assert(!i->getIndirect(1, 0));
+ assert(!i->srcExists(1) || !i->getIndirect(1, 0));
+ assert(!i->srcExists(2) || !i->getIndirect(2, 0));
setAReg16(i, 0);
- } else {
+ } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
+ assert(!i->srcExists(2) || !i->getIndirect(2, 0));
setAReg16(i, 1);
+ } else {
+ setAReg16(i, 2);
}
}
@@ -546,7 +550,7 @@ CodeEmitterNV50::emitForm_MUL(const Instruction *i)
}
// usual immediate form
-// - 1 to 3 sources where last is immediate (rir, gir)
+// - 1 to 3 sources where second is immediate (rir, gir)
// - no address or predicate possible
void
CodeEmitterNV50::emitForm_IMM(const Instruction *i)
@@ -562,7 +566,7 @@ CodeEmitterNV50::emitForm_IMM(const Instruction *i)
if (Target::operationSrcNr[i->op] > 1) {
setSrc(i, 0, 0);
setImmediate(i, 1);
- setSrc(i, 2, 1);
+ // If there is another source, it has to be the same as the dest reg.
} else {
setImmediate(i, 0);
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 472e3a84119..f607f3ba3ec 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -85,6 +85,7 @@ private:
void emitCCTL(const Instruction *);
void emitINTERP(const Instruction *);
+ void emitAFETCH(const Instruction *);
void emitPFETCH(const Instruction *);
void emitVFETCH(const Instruction *);
void emitEXPORT(const Instruction *);
@@ -1450,6 +1451,7 @@ CodeEmitterNVC0::emitBAR(const Instruction *i)
ImmediateValue *imm = i->getSrc(0)->asImm();
assert(imm);
code[0] |= imm->reg.data.u32 << 20;
+ code[1] |= 0x8000;
}
// thread count
@@ -1460,6 +1462,7 @@ CodeEmitterNVC0::emitBAR(const Instruction *i)
assert(imm);
code[0] |= imm->reg.data.u32 << 26;
code[1] |= imm->reg.data.u32 >> 6;
+ code[1] |= 0x4000;
}
if (i->srcExists(2) && (i->predSrc != 2)) {
@@ -1493,6 +1496,21 @@ CodeEmitterNVC0::emitBAR(const Instruction *i)
}
}
+void
+CodeEmitterNVC0::emitAFETCH(const Instruction *i)
+{
+ code[0] = 0x00000006;
+ code[1] = 0x0c000000 | (i->src(0).get()->reg.data.offset & 0x7ff);
+
+ if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT)
+ code[0] |= 0x200;
+
+ emitPredicate(i);
+
+ defId(i->def(0), 14);
+ srcId(i->src(0).getIndirect(0), 20);
+}
+
void
CodeEmitterNVC0::emitPFETCH(const Instruction *i)
{
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index ecd115f9807..4847a0f3355 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -372,6 +372,10 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
+ case TGSI_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD;
+ case TGSI_SEMANTIC_TESSOUTER: return nv50_ir::SV_TESS_OUTER;
+ case TGSI_SEMANTIC_TESSINNER: return nv50_ir::SV_TESS_INNER;
+ case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
default:
assert(0);
return nv50_ir::SV_CLOCK;
@@ -434,7 +438,6 @@ nv50_ir::DataType Instruction::inferSrcType() const
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_USNE:
case TGSI_OPCODE_USHR:
- case TGSI_OPCODE_UCMP:
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
@@ -827,7 +830,7 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
tgsi_dump(tokens, 0);
- mainTempsInLMem = FALSE;
+ mainTempsInLMem = false;
}
Source::~Source()
@@ -938,7 +941,7 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
info->prop.gp.instanceCount = prop->u[0].Data;
break;
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
- info->prop.fp.separateFragData = TRUE;
+ info->prop.fp.separateFragData = true;
break;
case TGSI_PROPERTY_FS_COORD_ORIGIN:
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
@@ -947,6 +950,24 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
info->io.genUserClip = -1;
break;
+ case TGSI_PROPERTY_TCS_VERTICES_OUT:
+ info->prop.tp.outputPatchSize = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_PRIM_MODE:
+ info->prop.tp.domain = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_SPACING:
+ info->prop.tp.partitioning = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
+ info->prop.tp.winding = prop->u[0].Data;
+ break;
+ case TGSI_PROPERTY_TES_POINT_MODE:
+ if (prop->u[0].Data)
+ info->prop.tp.outputPrim = PIPE_PRIM_POINTS;
+ else
+ info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
+ break;
default:
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
break;
@@ -1035,6 +1056,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
if (decl->Interp.Location || info->io.sampleInterp)
info->in[i].centroid = 1;
}
+
+ if (sn == TGSI_SEMANTIC_PATCH)
+ info->in[i].patch = 1;
+ if (sn == TGSI_SEMANTIC_PATCH)
+ info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
}
}
break;
@@ -1069,6 +1095,13 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_SEMANTIC_VIEWPORT_INDEX:
info->io.viewportId = i;
break;
+ case TGSI_SEMANTIC_PATCH:
+ info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
+ /* fallthrough */
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_TESSINNER:
+ info->out[i].patch = 1;
+ break;
default:
break;
}
@@ -1092,6 +1125,13 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
info->sv[i].sn = sn;
info->sv[i].si = si;
info->sv[i].input = inferSysValDirection(sn);
+
+ switch (sn) {
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_TESSINNER:
+ info->sv[i].patch = 1;
+ break;
+ }
}
break;
case TGSI_FILE_RESOURCE:
@@ -1156,7 +1196,7 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
} else
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
- mainTempsInLMem = TRUE;
+ mainTempsInLMem = true;
}
}
@@ -1164,12 +1204,22 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
Instruction::SrcRegister src = insn.getSrc(s);
if (src.getFile() == TGSI_FILE_TEMPORARY) {
if (src.isIndirect(0))
- mainTempsInLMem = TRUE;
+ mainTempsInLMem = true;
} else
if (src.getFile() == TGSI_FILE_RESOURCE) {
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
+ } else
+ if (src.getFile() == TGSI_FILE_OUTPUT) {
+ if (src.isIndirect(0)) {
+ // We don't know which one is accessed, just mark everything for
+ // reading. This is an extremely unlikely occurrence.
+ for (unsigned i = 0; i < info->numOutputs; ++i)
+ info->out[i].oread = 1;
+ } else {
+ info->out[src.getIndex(0)].oread = 1;
+ }
}
if (src.getFile() != TGSI_FILE_INPUT)
continue;
@@ -1246,6 +1296,7 @@ private:
Value *shiftAddress(Value *);
Value *getVertexBase(int s);
+ Value *getOutputBase(int s);
DataArray *getArrayForFile(unsigned file, int idx);
Value *fetchSrc(int s, int c);
Value *acquireDst(int d, int c);
@@ -1343,6 +1394,8 @@ private:
Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
uint8_t vtxBaseValid;
+ Value *outBase; // base address of vertex out patch (for TCP)
+
Stack condBBs; // fork BB, then else clause BB
Stack joinBBs; // fork BB, for inserting join ops on ENDIF
Stack loopBBs; // loop headers
@@ -1475,6 +1528,22 @@ Converter::getVertexBase(int s)
return vtxBase[s];
}
+Value *
+Converter::getOutputBase(int s)
+{
+ assert(s < 5);
+ if (!(vtxBaseValid & (1 << s))) {
+ Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
+ if (tgsi.getSrc(s).isIndirect(1))
+ offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
+ fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
+ offset);
+ vtxBaseValid |= 1 << s;
+ vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
+ }
+ return vtxBase[s];
+}
+
Value *
Converter::fetchSrc(int s, int c)
{
@@ -1488,6 +1557,9 @@ Converter::fetchSrc(int s, int c)
if (src.is2D()) {
switch (src.getFile()) {
+ case TGSI_FILE_OUTPUT:
+ dimRel = getOutputBase(s);
+ break;
case TGSI_FILE_INPUT:
dimRel = getVertexBase(s);
break;
@@ -1542,6 +1614,7 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
const int idx2d = src.is2D() ? src.getIndex(1) : 0;
const int idx = src.getIndex(0);
const int swz = src.getSwizzle(c);
+ Instruction *ld;
switch (src.getFile()) {
case TGSI_FILE_IMMEDIATE:
@@ -1569,13 +1642,19 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
if (ptr)
return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
}
- return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
+ ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
+ ld->perPatch = info->in[idx].patch;
+ return ld->getDef(0);
case TGSI_FILE_OUTPUT:
- assert(!"load from output file");
- return NULL;
+ assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
+ ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
+ ld->perPatch = info->out[idx].patch;
+ return ld->getDef(0);
case TGSI_FILE_SYSTEM_VALUE:
assert(!ptr);
- return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
+ ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
+ ld->perPatch = info->sv[idx].patch;
+ return ld->getDef(0);
default:
return getArrayForFile(src.getFile(), idx2d)->load(
sub.cur->values, idx, swz, shiftAddress(ptr));
@@ -1645,7 +1724,8 @@ Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
viewport != NULL)
mkOp1(OP_MOV, TYPE_U32, viewport, val);
else
- mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val);
+ mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =
+ info->out[idx].patch;
}
} else
if (f == TGSI_FILE_TEMPORARY ||
@@ -1687,6 +1767,7 @@ Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
join->fixed = 1;
conv->insertHead(join);
+ assert(!fork->joinAt);
fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
fork->insertBefore(fork->getExit(), fork->joinAt);
}
@@ -1728,7 +1809,7 @@ Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
}
tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
- setTexRS(tex, c, 1, -1);
+ setTexRS(tex, ++c, 1, -1);
bb->insertTail(tex);
}
@@ -2569,6 +2650,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
}
break;
case TGSI_OPCODE_UCMP:
+ srcTy = TYPE_U32;
+ /* fallthrough */
case TGSI_OPCODE_CMP:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
@@ -3282,10 +3365,21 @@ Converter::run()
clipVtx[c] = getScratch();
}
- if (prog->getType() == Program::TYPE_FRAGMENT) {
+ switch (prog->getType()) {
+ case Program::TYPE_TESSELLATION_CONTROL:
+ outBase = mkOp2v(
+ OP_SUB, TYPE_U32, getSSA(),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
+ mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
+ break;
+ case Program::TYPE_FRAGMENT: {
Symbol *sv = mkSysVal(SV_POSITION, 3);
fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+ break;
+ }
+ default:
+ break;
}
if (info->io.viewportId >= 0)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 596ac95d489..1f3fce2bb9a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -176,7 +176,7 @@ GM107LoweringPass::handlePOPCNT(Instruction *i)
i->getSrc(0), i->getSrc(1));
i->setSrc(0, tmp);
i->setSrc(1, NULL);
- return TRUE;
+ return true;
}
//
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 2c7f7e326b2..bea293bac99 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -871,6 +871,7 @@ NV50LoweringPreSSA::handleTXL(TexInstruction *i)
BasicBlock *joinBB = i->bb->splitAfter(i);
bld.setPosition(currBB, true);
+ assert(!currBB->joinAt);
currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);
for (int l = 0; l <= 3; ++l) {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 7a5d1ce0299..c3c302da5c8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -559,6 +559,12 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
} else
if (i->isNop()) {
bb->remove(i);
+ } else
+ if (i->op == OP_BAR && i->subOp == NV50_IR_SUBOP_BAR_SYNC &&
+ prog->getType() != Program::TYPE_COMPUTE) {
+ // It seems like barriers are never required for tessellation since
+ // the warp size is 32, and there are always at most 32 tcs threads.
+ bb->remove(i);
} else {
// TODO: Move this to before register allocation for operations that
// need the $c register !
@@ -956,7 +962,43 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
bool
NVC0LoweringPass::handleTXQ(TexInstruction *txq)
{
- // TODO: indirect resource/sampler index
+ if (txq->tex.rIndirectSrc < 0)
+ return true;
+
+ Value *ticRel = txq->getIndirectR();
+ const int chipset = prog->getTarget()->getChipset();
+
+ txq->setIndirectS(NULL);
+ txq->tex.sIndirectSrc = -1;
+
+ assert(ticRel);
+
+ if (chipset < NVISA_GK104_CHIPSET) {
+ LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
+
+ txq->setSrc(txq->tex.rIndirectSrc, NULL);
+ if (txq->tex.r)
+ ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
+ ticRel, bld.mkImm(txq->tex.r));
+
+ bld.mkOp2(OP_SHL, TYPE_U32, src, ticRel, bld.mkImm(0x17));
+
+ txq->moveSources(0, 1);
+ txq->setSrc(0, src);
+ } else {
+ Value *hnd = loadTexHandle(
+ bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+ txq->getIndirectR(), bld.mkImm(2)),
+ txq->tex.r);
+ txq->tex.r = 0xff;
+ txq->tex.s = 0x1f;
+
+ txq->setIndirectR(NULL);
+ txq->moveSources(0, 1);
+ txq->setSrc(0, hnd);
+ txq->tex.rIndirectSrc = 0;
+ }
+
return true;
}
@@ -1485,6 +1527,10 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
i->op = OP_MOV;
i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0));
}
+ if (sv == SV_VERTEX_COUNT) {
+ bld.setPosition(i, true);
+ bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0), bld.mkImm(0x808));
+ }
return true;
}
@@ -1554,7 +1600,7 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
break;
default:
- if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
+ if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
ld = bld.mkFetch(i->getDef(0), i->dType,
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
@@ -1705,6 +1751,7 @@ NVC0LoweringPass::checkPredicate(Instruction *insn)
bool
NVC0LoweringPass::visit(Instruction *i)
{
+ bool ret = true;
bld.setPosition(i, false);
if (i->cc != CC_ALWAYS)
@@ -1736,7 +1783,8 @@ NVC0LoweringPass::visit(Instruction *i)
case OP_SQRT:
return handleSQRT(i);
case OP_EXPORT:
- return handleEXPORT(i);
+ ret = handleEXPORT(i);
+ break;
case OP_EMIT:
case OP_RESTART:
return handleOUT(i);
@@ -1775,6 +1823,9 @@ NVC0LoweringPass::visit(Instruction *i)
i->setIndirect(0, 0, ptr);
i->subOp = NV50_IR_SUBOP_LDC_IS;
}
+ } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
+ assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
+ i->op = OP_VFETCH;
}
break;
case OP_ATOM:
@@ -1796,7 +1847,20 @@ NVC0LoweringPass::visit(Instruction *i)
default:
break;
}
- return true;
+
+ /* Kepler+ has a special opcode to compute a new base address to be used
+ * for indirect loads.
+ */
+ if (targ->getChipset() >= NVISA_GK104_CHIPSET && !i->perPatch &&
+ (i->op == OP_VFETCH || i->op == OP_EXPORT) && i->src(0).isIndirect(0)) {
+ Instruction *afetch = bld.mkOp1(OP_AFETCH, TYPE_U32, bld.getSSA(),
+ cloneShallow(func, i->getSrc(0)));
+ afetch->setIndirect(0, 0, i->getIndirect(0, 0));
+ i->src(0).get()->reg.data.offset = 0;
+ i->setIndirect(0, 0, afetch->getDef(0));
+ }
+
+ return ret;
}
bool
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index ae739eeda83..cea96dcdfc5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -608,9 +608,12 @@ ConstantFolding::expr(Instruction *i,
case OP_FMA: {
i->op = OP_ADD;
+ /* Move the immediate to the second arg, otherwise the ADD operation
+ * won't be emittable
+ */
i->setSrc(1, i->getSrc(0));
- i->src(1).mod = i->src(2).mod;
i->setSrc(0, i->getSrc(2));
+ i->src(0).mod = i->src(2).mod;
i->setSrc(2, NULL);
ImmediateValue src0;
@@ -2082,6 +2085,8 @@ MemoryOpt::runOpt(BasicBlock *bb)
}
if (ldst->getPredicate()) // TODO: handle predicated ld/st
continue;
+ if (ldst->perPatch) // TODO: create separate per-patch lists
+ continue;
if (isLoad) {
DataFile file = ldst->src(0).getFile();
@@ -2515,6 +2520,8 @@ Instruction::isResultEqual(const Instruction *that) const
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:
return true;
+ case FILE_SHADER_OUTPUT:
+ return bb->getProgram()->getType() == Program::TYPE_TESSELLATION_EVAL;
default:
return false;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index ef3de6ff92a..9ebdc6586db 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -135,6 +135,7 @@ const char *operationStr[OP_LAST + 1] =
"membar",
"vfetch",
"pfetch",
+ "afetch",
"export",
"linterp",
"pinterp",
@@ -258,7 +259,8 @@ static const char *SemanticStr[SV_LAST + 1] =
"SAMPLE_INDEX",
"SAMPLE_POS",
"SAMPLE_MASK",
- "TESS_FACTOR",
+ "TESS_OUTER",
+ "TESS_INNER",
"TESS_COORD",
"TID",
"CTAID",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 898653c9953..78bc97f4397 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -2066,6 +2066,8 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
condenseDefs(i);
if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8)
addHazard(i, i->src(0).getIndirect(0));
+ if (i->src(0).isIndirect(1) && typeSizeof(i->dType) >= 8)
+ addHazard(i, i->src(0).getIndirect(1));
} else
if (i->op == OP_UNION ||
i->op == OP_MERGE ||
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 7992f539782..fe530c76b62 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -41,7 +41,7 @@ const uint8_t Target::operationSrcNr[] =
0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK,
0, 0, 0, // PRERET,CONT,BREAK
0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
- 1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
+ 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
1, 1, // EMIT, RESTART
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
@@ -96,8 +96,8 @@ const OpClass Target::operationClass[] =
OPCLASS_FLOW, OPCLASS_FLOW,
// MEMBAR
OPCLASS_CONTROL,
- // VFETCH, PFETCH, EXPORT
- OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
+ // VFETCH, PFETCH, AFETCH, EXPORT
+ OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
// LINTERP, PINTERP
OPCLASS_SFU, OPCLASS_SFU,
// EMIT, RESTART
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index ca545a6024a..f3ddcaa5199 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -118,7 +118,7 @@ void TargetNV50::initOpInfo()
static const uint32_t shortForm[(OP_LAST + 31) / 32] =
{
// MOV,ADD,SUB,MUL,MAD,SAD,L/PINTERP,RCP,TEX,TXF
- 0x00014e40, 0x00000040, 0x00000498, 0x00000000
+ 0x00014e40, 0x00000040, 0x00000930, 0x00000000
};
static const operation noDestList[] =
{
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 7d4a859dde4..27df0eba66b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -286,7 +286,8 @@ TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const
case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4;
case SV_POINT_COORD: return 0x2e0 + idx * 4;
case SV_FACE: return 0x3fc;
- case SV_TESS_FACTOR: return 0x000 + idx * 4;
+ case SV_TESS_OUTER: return 0x000 + idx * 4;
+ case SV_TESS_INNER: return 0x010 + idx * 4;
case SV_TESS_COORD: return 0x2f0 + idx * 4;
case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0;
case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0;
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 09cdbb53ecb..67e181e803a 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -22,13 +22,13 @@ struct nouveau_transfer {
uint32_t offset;
};
-static INLINE struct nouveau_transfer *
+static inline struct nouveau_transfer *
nouveau_transfer(struct pipe_transfer *transfer)
{
return (struct nouveau_transfer *)transfer;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_malloc(struct nv04_resource *buf)
{
if (!buf->data)
@@ -36,16 +36,11 @@ nouveau_buffer_malloc(struct nv04_resource *buf)
return !!buf->data;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_allocate(struct nouveau_screen *screen,
struct nv04_resource *buf, unsigned domain)
{
- uint32_t size = buf->base.width0;
-
- if (buf->base.bind & (PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_COMPUTE_RESOURCE |
- PIPE_BIND_SHADER_RESOURCE))
- size = align(size, 0x100);
+ uint32_t size = align(buf->base.width0, 0x100);
if (domain == NOUVEAU_BO_VRAM) {
buf->mm = nouveau_mm_allocate(screen->mm_VRAM, size,
@@ -58,12 +53,12 @@ nouveau_buffer_allocate(struct nouveau_screen *screen,
buf->mm = nouveau_mm_allocate(screen->mm_GART, size,
&buf->bo, &buf->offset);
if (!buf->bo)
- return FALSE;
+ return false;
NOUVEAU_DRV_STAT(screen, buf_obj_current_bytes_sys, buf->base.width0);
} else {
assert(domain == 0);
if (!nouveau_buffer_malloc(buf))
- return FALSE;
+ return false;
}
buf->domain = domain;
if (buf->bo)
@@ -71,10 +66,10 @@ nouveau_buffer_allocate(struct nouveau_screen *screen,
util_range_set_empty(&buf->valid_buffer_range);
- return TRUE;
+ return true;
}
-static INLINE void
+static inline void
release_allocation(struct nouveau_mm_allocation **mm,
struct nouveau_fence *fence)
{
@@ -82,7 +77,7 @@ release_allocation(struct nouveau_mm_allocation **mm,
(*mm) = NULL;
}
-INLINE void
+inline void
nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
{
nouveau_bo_ref(NULL, &buf->bo);
@@ -98,7 +93,7 @@ nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
buf->domain = 0;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_reallocate(struct nouveau_screen *screen,
struct nv04_resource *buf, unsigned domain)
{
@@ -139,13 +134,13 @@ nouveau_buffer_destroy(struct pipe_screen *pscreen,
*/
static uint8_t *
nouveau_transfer_staging(struct nouveau_context *nv,
- struct nouveau_transfer *tx, boolean permit_pb)
+ struct nouveau_transfer *tx, bool permit_pb)
{
const unsigned adj = tx->base.box.x & NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK;
const unsigned size = align(tx->base.box.width, 4) + adj;
if (!nv->push_data)
- permit_pb = FALSE;
+ permit_pb = false;
if ((size <= NOUVEAU_TRANSFER_PUSHBUF_THRESHOLD) && permit_pb) {
tx->map = align_malloc(size, NOUVEAU_MIN_BUFFER_MAP_ALIGN);
@@ -167,7 +162,7 @@ nouveau_transfer_staging(struct nouveau_context *nv,
* buffer. Also updates buf->data if present.
*
* Maybe just migrate to GART right away if we actually need to do this. */
-static boolean
+static bool
nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
{
struct nv04_resource *buf = nv04_resource(tx->base.resource);
@@ -180,12 +175,12 @@ nouveau_transfer_read(struct nouveau_context *nv, struct nouveau_transfer *tx)
buf->bo, buf->offset + base, buf->domain, size);
if (nouveau_bo_wait(tx->bo, NOUVEAU_BO_RD, nv->client))
- return FALSE;
+ return false;
if (buf->data)
memcpy(buf->data + base, tx->map, size);
- return TRUE;
+ return true;
}
static void
@@ -195,7 +190,7 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
struct nv04_resource *buf = nv04_resource(tx->base.resource);
uint8_t *data = tx->map + offset;
const unsigned base = tx->base.box.x + offset;
- const boolean can_cb = !((base | size) & 3);
+ const bool can_cb = !((base | size) & 3);
if (buf->data)
memcpy(data, buf->data + base, size);
@@ -224,32 +219,32 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
/* Does a CPU wait for the buffer's backing data to become reliably accessible
* for write/read by waiting on the buffer's relevant fences.
*/
-static INLINE boolean
+static inline bool
nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
{
if (rw == PIPE_TRANSFER_READ) {
if (!buf->fence_wr)
- return TRUE;
+ return true;
NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
!nouveau_fence_signalled(buf->fence_wr));
if (!nouveau_fence_wait(buf->fence_wr))
- return FALSE;
+ return false;
} else {
if (!buf->fence)
- return TRUE;
+ return true;
NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
!nouveau_fence_signalled(buf->fence));
if (!nouveau_fence_wait(buf->fence))
- return FALSE;
+ return false;
nouveau_fence_ref(NULL, &buf->fence);
}
nouveau_fence_ref(NULL, &buf->fence_wr);
- return TRUE;
+ return true;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
{
if (rw == PIPE_TRANSFER_READ)
@@ -258,7 +253,7 @@ nouveau_buffer_busy(struct nv04_resource *buf, unsigned rw)
return (buf->fence && !nouveau_fence_signalled(buf->fence));
}
-static INLINE void
+static inline void
nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
struct pipe_resource *resource,
const struct pipe_box *box,
@@ -280,7 +275,7 @@ nouveau_buffer_transfer_init(struct nouveau_transfer *tx,
tx->map = NULL;
}
-static INLINE void
+static inline void
nouveau_buffer_transfer_del(struct nouveau_context *nv,
struct nouveau_transfer *tx)
{
@@ -297,11 +292,11 @@ nouveau_buffer_transfer_del(struct nouveau_context *nv,
}
/* Creates a cache in system memory of the buffer data. */
-static boolean
+static bool
nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
{
struct nouveau_transfer tx;
- boolean ret;
+ bool ret;
tx.base.resource = &buf->base;
tx.base.box.x = 0;
tx.base.box.width = buf->base.width0;
@@ -310,13 +305,13 @@ nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
if (!buf->data)
if (!nouveau_buffer_malloc(buf))
- return FALSE;
+ return false;
if (!(buf->status & NOUVEAU_BUFFER_STATUS_DIRTY))
- return TRUE;
+ return true;
nv->stats.buf_cache_count++;
- if (!nouveau_transfer_staging(nv, &tx, FALSE))
- return FALSE;
+ if (!nouveau_transfer_staging(nv, &tx, false))
+ return false;
ret = nouveau_transfer_read(nv, &tx);
if (ret) {
@@ -335,15 +330,15 @@ nouveau_buffer_cache(struct nouveau_context *nv, struct nv04_resource *buf)
* resource. This can be useful if we would otherwise have to wait for a read
* operation to complete on this data.
*/
-static INLINE boolean
+static inline bool
nouveau_buffer_should_discard(struct nv04_resource *buf, unsigned usage)
{
if (!(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
- return FALSE;
+ return false;
if (unlikely(buf->base.bind & PIPE_BIND_SHARED))
- return FALSE;
+ return false;
if (unlikely(usage & PIPE_TRANSFER_PERSISTENT))
- return FALSE;
+ return false;
return buf->mm && nouveau_buffer_busy(buf, PIPE_TRANSFER_WRITE);
}
@@ -413,7 +408,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
* back into VRAM on unmap. */
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
buf->status &= NOUVEAU_BUFFER_STATUS_REALLOC_MASK;
- nouveau_transfer_staging(nv, tx, TRUE);
+ nouveau_transfer_staging(nv, tx, true);
} else {
if (buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
/* The GPU is currently writing to this buffer. Copy its current
@@ -424,13 +419,13 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
align_free(buf->data);
buf->data = NULL;
}
- nouveau_transfer_staging(nv, tx, FALSE);
+ nouveau_transfer_staging(nv, tx, false);
nouveau_transfer_read(nv, tx);
} else {
/* The buffer is currently idle. Create a staging area for writes,
* and make sure that the cached data is up-to-date. */
if (usage & PIPE_TRANSFER_WRITE)
- nouveau_transfer_staging(nv, tx, TRUE);
+ nouveau_transfer_staging(nv, tx, true);
if (!buf->data)
nouveau_buffer_cache(nv, buf);
}
@@ -482,7 +477,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
/* The whole range is being discarded, so it doesn't matter what was
* there before. No need to copy anything over. */
- nouveau_transfer_staging(nv, tx, TRUE);
+ nouveau_transfer_staging(nv, tx, true);
map = tx->map;
} else
if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
@@ -493,7 +488,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
} else {
/* It is expected that the returned buffer be a representation of the
* data in question, so we must copy it over from the buffer. */
- nouveau_transfer_staging(nv, tx, TRUE);
+ nouveau_transfer_staging(nv, tx, true);
if (tx->map)
memcpy(tx->map, map, box->width);
map = tx->map;
@@ -544,7 +539,7 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
const uint8_t bind = buf->base.bind;
/* make sure we invalidate dedicated caches */
if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
- nv->vbo_dirty = TRUE;
+ nv->vbo_dirty = true;
}
util_range_add(&buf->valid_buffer_range,
@@ -639,7 +634,7 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
{
struct nouveau_screen *screen = nouveau_screen(pscreen);
struct nv04_resource *buffer;
- boolean ret;
+ bool ret;
buffer = CALLOC_STRUCT(nv04_resource);
if (!buffer)
@@ -683,7 +678,7 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
}
ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
- if (ret == FALSE)
+ if (ret == false)
goto fail;
if (buffer->domain == NOUVEAU_BO_VRAM && screen->hint_buf_keep_sysmem_copy)
@@ -730,20 +725,20 @@ nouveau_user_buffer_create(struct pipe_screen *pscreen, void *ptr,
return &buffer->base;
}
-static INLINE boolean
+static inline bool
nouveau_buffer_data_fetch(struct nouveau_context *nv, struct nv04_resource *buf,
struct nouveau_bo *bo, unsigned offset, unsigned size)
{
if (!nouveau_buffer_malloc(buf))
- return FALSE;
+ return false;
if (nouveau_bo_map(bo, NOUVEAU_BO_RD, nv->client))
- return FALSE;
+ return false;
memcpy(buf->data, (uint8_t *)bo->map + offset, size);
- return TRUE;
+ return true;
}
/* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
-boolean
+bool
nouveau_buffer_migrate(struct nouveau_context *nv,
struct nv04_resource *buf, const unsigned new_domain)
{
@@ -758,7 +753,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
if (new_domain == NOUVEAU_BO_GART && old_domain == 0) {
if (!nouveau_buffer_allocate(screen, buf, new_domain))
- return FALSE;
+ return false;
ret = nouveau_bo_map(buf->bo, 0, nv->client);
if (ret)
return ret;
@@ -771,7 +766,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
if (new_domain == NOUVEAU_BO_VRAM) {
/* keep a system memory copy of our data in case we hit a fallback */
if (!nouveau_buffer_data_fetch(nv, buf, buf->bo, buf->offset, size))
- return FALSE;
+ return false;
if (nouveau_mesa_debug)
debug_printf("migrating %u KiB to VRAM\n", size / 1024);
}
@@ -792,28 +787,28 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
if (new_domain == NOUVEAU_BO_VRAM && old_domain == 0) {
struct nouveau_transfer tx;
if (!nouveau_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
- return FALSE;
+ return false;
tx.base.resource = &buf->base;
tx.base.box.x = 0;
tx.base.box.width = buf->base.width0;
tx.bo = NULL;
tx.map = NULL;
- if (!nouveau_transfer_staging(nv, &tx, FALSE))
- return FALSE;
+ if (!nouveau_transfer_staging(nv, &tx, false))
+ return false;
nouveau_transfer_write(nv, &tx, 0, tx.base.box.width);
nouveau_buffer_transfer_del(nv, &tx);
} else
- return FALSE;
+ return false;
assert(buf->domain == new_domain);
- return TRUE;
+ return true;
}
/* Migrate data from glVertexAttribPointer(non-VBO) user buffers to GART.
* We'd like to only allocate @size bytes here, but then we'd have to rebase
* the vertex indices ...
*/
-boolean
+bool
nouveau_user_buffer_upload(struct nouveau_context *nv,
struct nv04_resource *buf,
unsigned base, unsigned size)
@@ -825,20 +820,20 @@ nouveau_user_buffer_upload(struct nouveau_context *nv,
buf->base.width0 = base + size;
if (!nouveau_buffer_reallocate(screen, buf, NOUVEAU_BO_GART))
- return FALSE;
+ return false;
ret = nouveau_bo_map(buf->bo, 0, nv->client);
if (ret)
- return FALSE;
+ return false;
memcpy((uint8_t *)buf->bo->map + buf->offset + base, buf->data + base, size);
- return TRUE;
+ return true;
}
/* Scratch data allocation. */
-static INLINE int
+static inline int
nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
unsigned size)
{
@@ -875,7 +870,7 @@ nouveau_scratch_runout_release(struct nouveau_context *nv)
/* Allocate an extra bo if we can't fit everything we need simultaneously.
* (Could happen for very large user arrays.)
*/
-static INLINE boolean
+static inline bool
nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
{
int ret;
@@ -909,7 +904,7 @@ nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
/* Continue to next scratch buffer, if available (no wrapping, large enough).
* Allocate it if it has not yet been created.
*/
-static INLINE boolean
+static inline bool
nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
{
struct nouveau_bo *bo;
@@ -917,14 +912,14 @@ nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
const unsigned i = (nv->scratch.id + 1) % NOUVEAU_MAX_SCRATCH_BUFS;
if ((size > nv->scratch.bo_size) || (i == nv->scratch.wrap))
- return FALSE;
+ return false;
nv->scratch.id = i;
bo = nv->scratch.bo[i];
if (!bo) {
ret = nouveau_scratch_bo_alloc(nv, &bo, nv->scratch.bo_size);
if (ret)
- return FALSE;
+ return false;
nv->scratch.bo[i] = bo;
}
nv->scratch.current = bo;
@@ -937,10 +932,10 @@ nouveau_scratch_next(struct nouveau_context *nv, unsigned size)
return !ret;
}
-static boolean
+static bool
nouveau_scratch_more(struct nouveau_context *nv, unsigned min_size)
{
- boolean ret;
+ bool ret;
ret = nouveau_scratch_next(nv, min_size);
if (!ret)
diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.h b/src/gallium/drivers/nouveau/nouveau_buffer.h
index de77f481da3..7e6a6cc804b 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -58,7 +58,7 @@ nouveau_copy_buffer(struct nouveau_context *,
struct nv04_resource *dst, unsigned dst_pos,
struct nv04_resource *src, unsigned src_pos, unsigned size);
-boolean
+bool
nouveau_buffer_migrate(struct nouveau_context *,
struct nv04_resource *, unsigned domain);
@@ -66,20 +66,20 @@ void *
nouveau_resource_map_offset(struct nouveau_context *, struct nv04_resource *,
uint32_t offset, uint32_t flags);
-static INLINE void
+static inline void
nouveau_resource_unmap(struct nv04_resource *res)
{
/* no-op */
}
-static INLINE struct nv04_resource *
+static inline struct nv04_resource *
nv04_resource(struct pipe_resource *resource)
{
return (struct nv04_resource *)resource;
}
/* is resource mapped into the GPU's address space (i.e. VRAM or GART) ? */
-static INLINE boolean
+static inline bool
nouveau_resource_mapped_by_gpu(struct pipe_resource *resource)
{
return nv04_resource(resource)->domain != 0;
@@ -93,7 +93,7 @@ struct pipe_resource *
nouveau_user_buffer_create(struct pipe_screen *screen, void *ptr,
unsigned bytes, unsigned usage);
-boolean
+bool
nouveau_user_buffer_upload(struct nouveau_context *, struct nv04_resource *,
unsigned base, unsigned size);
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
index c2ba0159afe..24deb7ee4c0 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -13,7 +13,7 @@ struct nouveau_context {
struct nouveau_client *client;
struct nouveau_pushbuf *pushbuf;
- boolean vbo_dirty;
+ bool vbo_dirty;
void (*copy_data)(struct nouveau_context *,
struct nouveau_bo *dst, unsigned, unsigned,
@@ -53,7 +53,7 @@ struct nouveau_context {
} stats;
};
-static INLINE struct nouveau_context *
+static inline struct nouveau_context *
nouveau_context(struct pipe_context *pipe)
{
return (struct nouveau_context *)pipe;
@@ -69,7 +69,7 @@ nouveau_scratch_runout_release(struct nouveau_context *);
* because we don't want to un-bo_ref each allocation every time. This is less
* work, and we need the wrap index anyway for extreme situations.
*/
-static INLINE void
+static inline void
nouveau_scratch_done(struct nouveau_context *nv)
{
nv->scratch.wrap = nv->scratch.id;
@@ -84,7 +84,7 @@ void *
nouveau_scratch_get(struct nouveau_context *, unsigned size, uint64_t *gpu_addr,
struct nouveau_bo **);
-static INLINE void
+static inline void
nouveau_context_destroy(struct nouveau_context *ctx)
{
int i;
@@ -96,7 +96,7 @@ nouveau_context_destroy(struct nouveau_context *ctx)
FREE(ctx);
}
-static INLINE void
+static inline void
nouveau_context_update_frame_stats(struct nouveau_context *nv)
{
nv->stats.buf_cache_frame <<= 1;
@@ -104,7 +104,7 @@ nouveau_context_update_frame_stats(struct nouveau_context *nv)
nv->stats.buf_cache_count = 0;
nv->stats.buf_cache_frame |= 1;
if ((nv->stats.buf_cache_frame & 0xf) == 0xf)
- nv->screen->hint_buf_keep_sysmem_copy = TRUE;
+ nv->screen->hint_buf_keep_sysmem_copy = true;
}
}
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
index 17a5174594d..abcdb479954 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -28,13 +28,13 @@
#include
#endif
-boolean
+bool
nouveau_fence_new(struct nouveau_screen *screen, struct nouveau_fence **fence,
- boolean emit)
+ bool emit)
{
*fence = CALLOC_STRUCT(nouveau_fence);
if (!*fence)
- return FALSE;
+ return false;
(*fence)->screen = screen;
(*fence)->ref = 1;
@@ -43,7 +43,7 @@ nouveau_fence_new(struct nouveau_screen *screen, struct nouveau_fence **fence,
if (emit)
nouveau_fence_emit(*fence);
- return TRUE;
+ return true;
}
static void
@@ -58,7 +58,7 @@ nouveau_fence_trigger_work(struct nouveau_fence *fence)
}
}
-boolean
+bool
nouveau_fence_work(struct nouveau_fence *fence,
void (*func)(void *), void *data)
{
@@ -66,16 +66,16 @@ nouveau_fence_work(struct nouveau_fence *fence,
if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
func(data);
- return TRUE;
+ return true;
}
work = CALLOC_STRUCT(nouveau_fence_work);
if (!work)
- return FALSE;
+ return false;
work->func = func;
work->data = data;
LIST_ADD(&work->list, &fence->work);
- return TRUE;
+ return true;
}
void
@@ -132,7 +132,7 @@ nouveau_fence_del(struct nouveau_fence *fence)
}
void
-nouveau_fence_update(struct nouveau_screen *screen, boolean flushed)
+nouveau_fence_update(struct nouveau_screen *screen, bool flushed)
{
struct nouveau_fence *fence;
struct nouveau_fence *next = NULL;
@@ -167,21 +167,21 @@ nouveau_fence_update(struct nouveau_screen *screen, boolean flushed)
#define NOUVEAU_FENCE_MAX_SPINS (1 << 31)
-boolean
+bool
nouveau_fence_signalled(struct nouveau_fence *fence)
{
struct nouveau_screen *screen = fence->screen;
if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
- return TRUE;
+ return true;
if (fence->state >= NOUVEAU_FENCE_STATE_EMITTED)
- nouveau_fence_update(screen, FALSE);
+ nouveau_fence_update(screen, false);
return fence->state == NOUVEAU_FENCE_STATE_SIGNALLED;
}
-boolean
+bool
nouveau_fence_wait(struct nouveau_fence *fence)
{
struct nouveau_screen *screen = fence->screen;
@@ -195,16 +195,16 @@ nouveau_fence_wait(struct nouveau_fence *fence)
if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
- return FALSE;
+ return false;
if (fence == screen->fence.current)
nouveau_fence_next(screen);
do {
- nouveau_fence_update(screen, FALSE);
+ nouveau_fence_update(screen, false);
if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
- return TRUE;
+ return true;
if (!spins)
NOUVEAU_DRV_STAT(screen, any_non_kernel_fence_sync_count, 1);
spins++;
@@ -218,7 +218,7 @@ nouveau_fence_wait(struct nouveau_fence *fence)
fence->sequence,
screen->fence.sequence_ack, screen->fence.sequence);
- return FALSE;
+ return false;
}
void
@@ -229,5 +229,5 @@ nouveau_fence_next(struct nouveau_screen *screen)
nouveau_fence_ref(NULL, &screen->fence.current);
- nouveau_fence_new(screen, &screen->fence.current, FALSE);
+ nouveau_fence_new(screen, &screen->fence.current, false);
}
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h
index 7bb132a5d15..a1587051b0f 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -29,15 +29,15 @@ struct nouveau_fence {
void nouveau_fence_emit(struct nouveau_fence *);
void nouveau_fence_del(struct nouveau_fence *);
-boolean nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
- boolean emit);
-boolean nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
-void nouveau_fence_update(struct nouveau_screen *, boolean flushed);
-void nouveau_fence_next(struct nouveau_screen *);
-boolean nouveau_fence_wait(struct nouveau_fence *);
-boolean nouveau_fence_signalled(struct nouveau_fence *);
+bool nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
+ bool emit);
+bool nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
+void nouveau_fence_update(struct nouveau_screen *, bool flushed);
+void nouveau_fence_next(struct nouveau_screen *);
+bool nouveau_fence_wait(struct nouveau_fence *);
+bool nouveau_fence_signalled(struct nouveau_fence *);
-static INLINE void
+static inline void
nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
{
if (fence)
@@ -51,7 +51,7 @@ nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
*ref = fence;
}
-static INLINE struct nouveau_fence *
+static inline struct nouveau_fence *
nouveau_fence(struct pipe_fence_handle *fence)
{
return (struct nouveau_fence *)fence;
diff --git a/src/gallium/drivers/nouveau/nouveau_gldefs.h b/src/gallium/drivers/nouveau/nouveau_gldefs.h
index ff97aaa9af0..1538c7b6e57 100644
--- a/src/gallium/drivers/nouveau/nouveau_gldefs.h
+++ b/src/gallium/drivers/nouveau/nouveau_gldefs.h
@@ -1,7 +1,7 @@
#ifndef __NOUVEAU_GLDEFS_H__
#define __NOUVEAU_GLDEFS_H__
-static INLINE unsigned
+static inline unsigned
nvgl_blend_func(unsigned factor)
{
switch (factor) {
@@ -40,7 +40,7 @@ nvgl_blend_func(unsigned factor)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_blend_eqn(unsigned func)
{
switch (func) {
@@ -59,7 +59,7 @@ nvgl_blend_eqn(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_logicop_func(unsigned func)
{
switch (func) {
@@ -100,7 +100,7 @@ nvgl_logicop_func(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_comparison_op(unsigned op)
{
switch (op) {
@@ -125,7 +125,7 @@ nvgl_comparison_op(unsigned op)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_polygon_mode(unsigned mode)
{
switch (mode) {
@@ -140,7 +140,7 @@ nvgl_polygon_mode(unsigned mode)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_stencil_op(unsigned op)
{
switch (op) {
@@ -165,7 +165,7 @@ nvgl_stencil_op(unsigned op)
}
}
-static INLINE unsigned
+static inline unsigned
nvgl_primitive(unsigned prim) {
switch (prim) {
case PIPE_PRIM_POINTS:
diff --git a/src/gallium/drivers/nouveau/nouveau_mm.c b/src/gallium/drivers/nouveau/nouveau_mm.c
index 9c454c56db0..43b3d99f48a 100644
--- a/src/gallium/drivers/nouveau/nouveau_mm.c
+++ b/src/gallium/drivers/nouveau/nouveau_mm.c
@@ -70,7 +70,7 @@ mm_slab_alloc(struct mm_slab *slab)
return -1;
}
-static INLINE void
+static inline void
mm_slab_free(struct mm_slab *slab, int i)
{
assert(i < slab->count);
@@ -79,7 +79,7 @@ mm_slab_free(struct mm_slab *slab, int i)
assert(slab->free <= slab->count);
}
-static INLINE int
+static inline int
mm_get_order(uint32_t size)
{
int s = __builtin_clz(size) ^ 31;
@@ -104,7 +104,7 @@ mm_bucket_by_size(struct nouveau_mman *cache, unsigned size)
}
/* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */
-static INLINE uint32_t
+static inline uint32_t
mm_default_slab_size(unsigned chunk_order)
{
static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
@@ -263,7 +263,7 @@ nouveau_mm_create(struct nouveau_device *dev, uint32_t domain,
return cache;
}
-static INLINE void
+static inline void
nouveau_mm_free_slabs(struct list_head *head)
{
struct mm_slab *slab, *next;
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index c6e5074db19..b2290e7e784 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -67,18 +67,14 @@ nouveau_screen_fence_ref(struct pipe_screen *pscreen,
nouveau_fence_ref(nouveau_fence(pfence), (struct nouveau_fence **)ptr);
}
-static boolean
-nouveau_screen_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *pfence)
-{
- return nouveau_fence_signalled(nouveau_fence(pfence));
-}
-
static boolean
nouveau_screen_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *pfence,
uint64_t timeout)
{
+ if (!timeout)
+ return nouveau_fence_signalled(nouveau_fence(pfence));
+
return nouveau_fence_wait(nouveau_fence(pfence));
}
@@ -115,7 +111,7 @@ nouveau_screen_bo_from_handle(struct pipe_screen *pscreen,
}
-boolean
+bool
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
struct nouveau_bo *bo,
unsigned stride,
@@ -127,11 +123,11 @@ nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
return nouveau_bo_name_get(bo, &whandle->handle) == 0;
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
whandle->handle = bo->handle;
- return TRUE;
+ return true;
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
return nouveau_bo_set_prime(bo, (int *)&whandle->handle) == 0;
} else {
- return FALSE;
+ return false;
}
}
@@ -203,7 +199,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
pscreen->get_timestamp = nouveau_screen_get_timestamp;
pscreen->fence_reference = nouveau_screen_fence_ref;
- pscreen->fence_signalled = nouveau_screen_fence_signalled;
pscreen->fence_finish = nouveau_screen_fence_finish;
util_format_s3tc_init();
@@ -214,7 +209,8 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_CURSOR |
PIPE_BIND_SAMPLER_VIEW |
- PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE |
+ PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE |
+ PIPE_BIND_COMPUTE_RESOURCE |
PIPE_BIND_GLOBAL;
screen->sysmem_bindings =
PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_STREAM_OUTPUT |
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 30041b271c9..4fdde9fbf3d 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -49,7 +49,7 @@ struct nouveau_screen {
int64_t cpu_gpu_time_delta;
- boolean hint_buf_keep_sysmem_copy;
+ bool hint_buf_keep_sysmem_copy;
unsigned vram_domain;
@@ -112,15 +112,15 @@ struct nouveau_screen {
# define NOUVEAU_DRV_STAT_IFD(x)
#endif
-static INLINE struct nouveau_screen *
+static inline struct nouveau_screen *
nouveau_screen(struct pipe_screen *pscreen)
{
return (struct nouveau_screen *)pscreen;
}
-boolean nouveau_drm_screen_unref(struct nouveau_screen *screen);
+bool nouveau_drm_screen_unref(struct nouveau_screen *screen);
-boolean
+bool
nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
struct nouveau_bo *bo,
unsigned stride,
diff --git a/src/gallium/drivers/nouveau/nouveau_statebuf.h b/src/gallium/drivers/nouveau/nouveau_statebuf.h
index 4f8bd7bdf16..f38014091ba 100644
--- a/src/gallium/drivers/nouveau/nouveau_statebuf.h
+++ b/src/gallium/drivers/nouveau/nouveau_statebuf.h
@@ -20,7 +20,7 @@ struct nouveau_statebuf_builder
#define sb_data(sb, v) *(sb).p++ = (v)
#endif
-static INLINE uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size)
+static inline uint32_t sb_header(unsigned subc, unsigned mthd, unsigned size)
{
return (size << 18) | (subc << 13) | mthd;
}
diff --git a/src/gallium/drivers/nouveau/nouveau_video.c b/src/gallium/drivers/nouveau/nouveau_video.c
index d6330fa63a8..e414a534418 100644
--- a/src/gallium/drivers/nouveau/nouveau_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_video.c
@@ -100,7 +100,7 @@ nouveau_vpe_fini(struct nouveau_decoder *dec) {
dec->current = dec->future = dec->past = 8;
}
-static INLINE void
+static inline void
nouveau_vpe_mb_dct_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12_macroblock *mb)
{
int cbb;
@@ -125,7 +125,7 @@ nouveau_vpe_mb_dct_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12_
}
}
-static INLINE void
+static inline void
nouveau_vpe_mb_data_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12_macroblock *mb)
{
int cbb;
@@ -143,7 +143,7 @@ nouveau_vpe_mb_data_blocks(struct nouveau_decoder *dec, const struct pipe_mpeg12
}
}
-static INLINE void
+static inline void
nouveau_vpe_mb_dct_header(struct nouveau_decoder *dec,
const struct pipe_mpeg12_macroblock *mb,
bool luma)
@@ -187,7 +187,7 @@ nouveau_vpe_mb_dct_header(struct nouveau_decoder *dec,
x | (y << NV17_MPEG_CMD_MB_COORDS_Y__SHIFT));
}
-static INLINE unsigned int
+static inline unsigned int
nouveau_vpe_mb_mv_flags(bool luma, int mv_h, int mv_v, bool forward, bool first, bool vert)
{
unsigned mc_header = 0;
@@ -228,7 +228,7 @@ static int div_up(int val, int mult) {
return val / mult;
}
-static INLINE void
+static inline void
nouveau_vpe_mb_mv(struct nouveau_decoder *dec, unsigned mc_header,
bool luma, bool frame, bool forward, bool vert,
int x, int y, const short motions[2],
@@ -296,16 +296,16 @@ nouveau_vpe_mb_mv_header(struct nouveau_decoder *dec,
case PIPE_MPEG12_MO_TYPE_DUAL_PRIME: {
base = NV17_MPEG_CMD_CHROMA_MV_HEADER_COUNT_2;
if (forward) {
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE, FALSE,
- x, y, mb->PMV[0][0], dec->past, TRUE);
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE, TRUE,
- x, y2, mb->PMV[0][0], dec->past, FALSE);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true, false,
+ x, y, mb->PMV[0][0], dec->past, true);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true, true,
+ x, y2, mb->PMV[0][0], dec->past, false);
}
if (backward && forward) {
- nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, TRUE,
- x, y, mb->PMV[1][0], dec->future, TRUE);
- nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, FALSE,
- x, y2, mb->PMV[1][1], dec->future, FALSE);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, true,
+ x, y, mb->PMV[1][0], dec->future, true);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, false,
+ x, y2, mb->PMV[1][1], dec->future, false);
} else assert(!backward);
break;
}
@@ -320,13 +320,13 @@ nouveau_vpe_mb_mv_header(struct nouveau_decoder *dec,
if (frame)
base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_TYPE_FRAME;
if (forward)
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE,
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true,
dec->picture_structure != PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP,
- x, y, mb->PMV[0][0], dec->past, TRUE);
+ x, y, mb->PMV[0][0], dec->past, true);
if (backward && forward)
- nouveau_vpe_mb_mv(dec, base, luma, frame, FALSE,
+ nouveau_vpe_mb_mv(dec, base, luma, frame, false,
dec->picture_structure == PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP,
- x, y, mb->PMV[0][1], dec->future, TRUE);
+ x, y, mb->PMV[0][1], dec->future, true);
else assert(!backward);
break;
}
@@ -341,11 +341,11 @@ mv1:
base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_TYPE_FRAME;
/* frame 16x16 */
if (forward)
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE, FALSE,
- x, y, mb->PMV[0][0], dec->past, TRUE);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true, false,
+ x, y, mb->PMV[0][0], dec->past, true);
if (backward)
- nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, FALSE,
- x, y, mb->PMV[0][1], dec->future, TRUE);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, !forward, false,
+ x, y, mb->PMV[0][1], dec->future, true);
return;
mv2:
@@ -353,20 +353,20 @@ mv2:
if (!frame)
base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_MV_SPLIT_HALF_MB;
if (forward) {
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE,
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true,
mb->motion_vertical_field_select & PIPE_MPEG12_FS_FIRST_FORWARD,
- x, y, mb->PMV[0][0], dec->past, TRUE);
- nouveau_vpe_mb_mv(dec, base, luma, frame, TRUE,
+ x, y, mb->PMV[0][0], dec->past, true);
+ nouveau_vpe_mb_mv(dec, base, luma, frame, true,
mb->motion_vertical_field_select & PIPE_MPEG12_FS_SECOND_FORWARD,
- x, y2, mb->PMV[1][0], dec->past, FALSE);
+ x, y2, mb->PMV[1][0], dec->past, false);
}
if (backward) {
nouveau_vpe_mb_mv(dec, base, luma, frame, !forward,
mb->motion_vertical_field_select & PIPE_MPEG12_FS_FIRST_BACKWARD,
- x, y, mb->PMV[0][1], dec->future, TRUE);
+ x, y, mb->PMV[0][1], dec->future, true);
nouveau_vpe_mb_mv(dec, base, luma, frame, !forward,
mb->motion_vertical_field_select & PIPE_MPEG12_FS_SECOND_BACKWARD,
- x, y2, mb->PMV[1][1], dec->future, FALSE);
+ x, y2, mb->PMV[1][1], dec->future, false);
}
}
@@ -438,14 +438,14 @@ nouveau_decoder_decode_macroblock(struct pipe_video_codec *decoder,
mb = (const struct pipe_mpeg12_macroblock *)pipe_mb;
for (i = 0; i < num_macroblocks; ++i, mb++) {
if (mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA) {
- nouveau_vpe_mb_dct_header(dec, mb, TRUE);
- nouveau_vpe_mb_dct_header(dec, mb, FALSE);
+ nouveau_vpe_mb_dct_header(dec, mb, true);
+ nouveau_vpe_mb_dct_header(dec, mb, false);
} else {
- nouveau_vpe_mb_mv_header(dec, mb, TRUE);
- nouveau_vpe_mb_dct_header(dec, mb, TRUE);
+ nouveau_vpe_mb_mv_header(dec, mb, true);
+ nouveau_vpe_mb_dct_header(dec, mb, true);
- nouveau_vpe_mb_mv_header(dec, mb, FALSE);
- nouveau_vpe_mb_dct_header(dec, mb, FALSE);
+ nouveau_vpe_mb_mv_header(dec, mb, false);
+ nouveau_vpe_mb_dct_header(dec, mb, false);
}
if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
nouveau_vpe_mb_dct_blocks(dec, mb);
diff --git a/src/gallium/drivers/nouveau/nouveau_video.h b/src/gallium/drivers/nouveau/nouveau_video.h
index 08d48b371fd..fd1bd527deb 100644
--- a/src/gallium/drivers/nouveau/nouveau_video.h
+++ b/src/gallium/drivers/nouveau/nouveau_video.h
@@ -45,7 +45,7 @@ struct nouveau_decoder {
#define NV31_VIDEO_BIND_CMD NV31_MPEG_IMAGE_Y_OFFSET__LEN
#define NV31_VIDEO_BIND_COUNT (NV31_MPEG_IMAGE_Y_OFFSET__LEN + 1)
-static INLINE void
+static inline void
nouveau_vpe_write(struct nouveau_decoder *dec, unsigned data) {
dec->cmds[dec->ofs++] = data;
}
@@ -54,33 +54,33 @@ nouveau_vpe_write(struct nouveau_decoder *dec, unsigned data) {
#define NV31_MPEG(mthd) SUBC_MPEG(NV31_MPEG_##mthd)
#define NV84_MPEG(mthd) SUBC_MPEG(NV84_MPEG_##mthd)
-static INLINE uint32_t
+static inline uint32_t
NV04_FIFO_PKHDR(int subc, int mthd, unsigned size)
{
return 0x00000000 | (size << 18) | (subc << 13) | mthd;
}
-static INLINE uint32_t
+static inline uint32_t
NV04_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
{
return 0x40000000 | (size << 18) | (subc << 13) | mthd;
}
-static INLINE void
+static inline void
BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
PUSH_SPACE(push, size + 1);
PUSH_DATA (push, NV04_FIFO_PKHDR(subc, mthd, size));
}
-static INLINE void
+static inline void
BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
PUSH_SPACE(push, size + 1);
PUSH_DATA (push, NV04_FIFO_PKHDR_NI(subc, mthd, size));
}
-static INLINE void
+static inline void
PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd,
struct nouveau_bo *bo, uint32_t offset,
struct nouveau_bufctx *ctx, int bin, uint32_t rw)
diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video.h b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
index 279a1ce18ef..33e3bef3df3 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.h
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
@@ -135,22 +135,22 @@ struct comm {
uint32_t parse_endpos[0x10]; // 1c0
};
-static INLINE uint32_t nouveau_vp3_video_align(uint32_t h)
+static inline uint32_t nouveau_vp3_video_align(uint32_t h)
{
return ((h+0x3f)&~0x3f);
};
-static INLINE uint32_t mb(uint32_t coord)
+static inline uint32_t mb(uint32_t coord)
{
return (coord + 0xf)>>4;
}
-static INLINE uint32_t mb_half(uint32_t coord)
+static inline uint32_t mb_half(uint32_t coord)
{
return (coord + 0x1f)>>5;
}
-static INLINE uint64_t
+static inline uint64_t
nouveau_vp3_video_addr(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target)
{
uint64_t ret;
@@ -161,7 +161,7 @@ nouveau_vp3_video_addr(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video
return dec->ref_bo->offset + ret;
}
-static INLINE void
+static inline void
nouveau_vp3_ycbcr_offsets(struct nouveau_vp3_decoder *dec, uint32_t *y2,
uint32_t *cbcr, uint32_t *cbcr2)
{
@@ -182,7 +182,7 @@ nouveau_vp3_ycbcr_offsets(struct nouveau_vp3_decoder *dec, uint32_t *y2,
}
}
-static INLINE void
+static inline void
nouveau_vp3_inter_sizes(struct nouveau_vp3_decoder *dec, uint32_t slice_count,
uint32_t *slice_size, uint32_t *bucket_size,
uint32_t *ring_size)
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index 51effb1d8d2..389a229eb78 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -15,34 +15,34 @@
#define NOUVEAU_MIN_BUFFER_MAP_ALIGN 64
#define NOUVEAU_MIN_BUFFER_MAP_ALIGN_MASK (NOUVEAU_MIN_BUFFER_MAP_ALIGN - 1)
-static INLINE uint32_t
+static inline uint32_t
PUSH_AVAIL(struct nouveau_pushbuf *push)
{
return push->end - push->cur;
}
-static INLINE boolean
+static inline bool
PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
{
if (PUSH_AVAIL(push) < size)
return nouveau_pushbuf_space(push, size, 0, 0) == 0;
- return TRUE;
+ return true;
}
-static INLINE void
+static inline void
PUSH_DATA(struct nouveau_pushbuf *push, uint32_t data)
{
*push->cur++ = data;
}
-static INLINE void
+static inline void
PUSH_DATAp(struct nouveau_pushbuf *push, const void *data, uint32_t size)
{
memcpy(push->cur, data, size * 4);
push->cur += size;
}
-static INLINE void
+static inline void
PUSH_DATAf(struct nouveau_pushbuf *push, float f)
{
union { float f; uint32_t i; } u;
@@ -50,7 +50,7 @@ PUSH_DATAf(struct nouveau_pushbuf *push, float f)
PUSH_DATA(push, u.i);
}
-static INLINE void
+static inline void
PUSH_KICK(struct nouveau_pushbuf *push)
{
nouveau_pushbuf_kick(push, push->channel);
@@ -60,7 +60,7 @@ PUSH_KICK(struct nouveau_pushbuf *push)
#define NOUVEAU_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
#define NOUVEAU_RESOURCE_FLAG_DRV_PRIV (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
-static INLINE uint32_t
+static inline uint32_t
nouveau_screen_transfer_flags(unsigned pipe)
{
uint32_t flags = 0;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h b/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
index 447f4b3b7ae..95468e580dd 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30-40_3d.xml.h
@@ -1459,6 +1459,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV40_3D_VTX_CACHE_INVALIDATE 0x00001714
+#define NV40_3D_VB_ELEMENT_BASE 0x0000173c
+
#define NV30_3D_VTXFMT(i0) (0x00001740 + 0x4*(i0))
#define NV30_3D_VTXFMT__ESIZE 0x00000004
#define NV30_3D_VTXFMT__LEN 0x00000010
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_clear.c b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
index 83fd1fa38dd..118cac77277 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
@@ -32,7 +32,7 @@
#include "nv30/nv30_context.h"
#include "nv30/nv30_format.h"
-static INLINE uint32_t
+static inline uint32_t
pack_rgba(enum pipe_format format, const float *rgba)
{
union util_color uc;
@@ -40,7 +40,7 @@ pack_rgba(enum pipe_format format, const float *rgba)
return uc.ui[0];
}
-static INLINE uint32_t
+static inline uint32_t
pack_zeta(enum pipe_format format, double depth, unsigned stencil)
{
uint32_t zuint = (uint32_t)(depth * 4294967295.0);
@@ -58,7 +58,7 @@ nv30_clear(struct pipe_context *pipe, unsigned buffers,
struct pipe_framebuffer_state *fb = &nv30->framebuffer;
uint32_t colr = 0, zeta = 0, mode = 0;
- if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, TRUE))
+ if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, true))
return;
if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.c b/src/gallium/drivers/nouveau/nv30/nv30_context.c
index 617b0887810..6e88ed725d6 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c
@@ -45,7 +45,7 @@ nv30_context_kick_notify(struct nouveau_pushbuf *push)
screen = &nv30->screen->base;
nouveau_fence_next(screen);
- nouveau_fence_update(screen, TRUE);
+ nouveau_fence_update(screen, true);
if (push->bufctx) {
struct nouveau_bufref *bref;
@@ -165,6 +165,12 @@ nv30_context_destroy(struct pipe_context *pipe)
if (nv30->draw)
draw_destroy(nv30->draw);
+ if (nv30->blit_vp)
+ nouveau_heap_free(&nv30->blit_vp);
+
+ if (nv30->blit_fp)
+ pipe_resource_reference(&nv30->blit_fp, NULL);
+
if (nv30->screen->base.pushbuf->user_priv == &nv30->bufctx)
nv30->screen->base.pushbuf->user_priv = NULL;
@@ -233,7 +239,7 @@ nv30_context_create(struct pipe_screen *pscreen, void *priv)
nv30->config.aniso = NV40_3D_TEX_WRAP_ANISO_MIP_FILTER_OPTIMIZATION_OFF;
- if (debug_get_bool_option("NV30_SWTNL", FALSE))
+ if (debug_get_bool_option("NV30_SWTNL", false))
nv30->draw_flags |= NV30_NEW_SWTNL;
nv30->sample_mask = 0xffff;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.h b/src/gallium/drivers/nouveau/nv30/nv30_context.h
index 592cdbe24f9..d5c18bb62dc 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h
@@ -51,7 +51,8 @@ struct nv30_context {
unsigned rt_enable;
unsigned scissor_off;
unsigned num_vtxelts;
- boolean prim_restart;
+ int index_bias;
+ bool prim_restart;
struct nv30_fragprog *fragprog;
} state;
@@ -114,17 +115,17 @@ struct nv30_context {
uint32_t vbo_user;
unsigned vbo_min_index;
unsigned vbo_max_index;
- boolean vbo_push_hint;
+ bool vbo_push_hint;
struct nouveau_heap *blit_vp;
struct pipe_resource *blit_fp;
struct pipe_query *render_cond_query;
unsigned render_cond_mode;
- boolean render_cond_cond;
+ bool render_cond_cond;
};
-static INLINE struct nv30_context *
+static inline struct nv30_context *
nv30_context(struct pipe_context *pipe)
{
return (struct nv30_context *)pipe;
@@ -203,8 +204,8 @@ nv30_draw_init(struct pipe_context *pipe);
void
nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);
-boolean
-nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl);
+bool
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, bool hwtnl);
void
nv30_state_release(struct nv30_context *nv30);
@@ -213,7 +214,7 @@ nv30_state_release(struct nv30_context *nv30);
#define NV30_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NV30_3D_VERTEX_BEGIN_END_##n
-static INLINE unsigned
+static inline unsigned
nv30_prim_gl(unsigned prim)
{
switch (prim) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_draw.c b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
index c1665b7ad2f..098d6e499fa 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
@@ -52,7 +52,7 @@ struct nv30_render {
uint32_t prim;
};
-static INLINE struct nv30_render *
+static inline struct nv30_render *
nv30_render(struct vbuf_render *render)
{
return (struct nv30_render *)render;
@@ -79,12 +79,12 @@ nv30_render_allocate_vertices(struct vbuf_render *render,
PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM,
render->max_vertex_buffer_bytes);
if (!r->buffer)
- return FALSE;
+ return false;
r->offset = 0;
}
- return TRUE;
+ return true;
}
static void *
@@ -134,7 +134,7 @@ nv30_render_draw_elements(struct vbuf_render *render,
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, ~0, FALSE))
+ if (!nv30_state_validate(nv30, ~0, false))
return;
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -179,7 +179,7 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
}
- if (!nv30_state_validate(nv30, ~0, FALSE))
+ if (!nv30_state_validate(nv30, ~0, false))
return;
BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -221,7 +221,7 @@ static const struct {
[TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 },
};
-static boolean
+static bool
vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
{
struct nv30_screen *screen = r->nv30->screen;
@@ -245,7 +245,7 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
}
if (emit == EMIT_OMIT)
- return FALSE;
+ return false;
draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib);
format = draw_translate_vinfo_format(emit);
@@ -272,10 +272,10 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
assert(sem == TGSI_SEMANTIC_TEXCOORD);
*idx = 0x00001000 << (result - 8);
}
- return TRUE;
+ return true;
}
-static boolean
+static bool
nv30_render_validate(struct nv30_context *nv30)
{
struct nv30_render *r = nv30_render(nv30->draw->render);
@@ -300,7 +300,7 @@ nv30_render_validate(struct nv30_context *nv30)
}
if (nouveau_heap_alloc(heap, 16, &r->vertprog, &r->vertprog))
- return FALSE;
+ return false;
}
}
@@ -370,7 +370,7 @@ nv30_render_validate(struct nv30_context *nv30)
}
vinfo->size /= 4;
- return TRUE;
+ return true;
}
void
@@ -519,6 +519,6 @@ nv30_draw_init(struct pipe_context *pipe)
draw_set_rasterize_stage(draw, stage);
draw_wide_line_threshold(draw, 10000000.f);
draw_wide_point_threshold(draw, 10000000.f);
- draw_wide_point_sprites(draw, TRUE);
+ draw_wide_point_sprites(draw, true);
nv30->draw = draw;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_format.h b/src/gallium/drivers/nouveau/nv30/nv30_format.h
index 8bf4a37299f..fa1e922fb65 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_format.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_format.h
@@ -27,28 +27,28 @@ struct nv30_texfmt {
};
extern const struct nv30_format_info nv30_format_info_table[];
-static INLINE const struct nv30_format_info *
+static inline const struct nv30_format_info *
nv30_format_info(struct pipe_screen *pscreen, enum pipe_format format)
{
return &nv30_format_info_table[format];
}
extern const struct nv30_format nv30_format_table[];
-static INLINE const struct nv30_format *
+static inline const struct nv30_format *
nv30_format(struct pipe_screen *pscreen, enum pipe_format format)
{
return &nv30_format_table[format];
}
extern const struct nv30_vtxfmt nv30_vtxfmt_table[];
-static INLINE const struct nv30_vtxfmt *
+static inline const struct nv30_vtxfmt *
nv30_vtxfmt(struct pipe_screen *pscreen, enum pipe_format format)
{
return &nv30_vtxfmt_table[format];
}
extern const struct nv30_texfmt nv30_texfmt_table[];
-static INLINE const struct nv30_texfmt *
+static inline const struct nv30_texfmt *
nv30_texfmt(struct pipe_screen *pscreen, enum pipe_format format)
{
return &nv30_texfmt_table[format];
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
index 7f227868f73..6de61bcc1c0 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
@@ -37,22 +37,26 @@ nv30_fragprog_upload(struct nv30_context *nv30)
struct nouveau_context *nv = &nv30->base;
struct nv30_fragprog *fp = nv30->fragprog.program;
struct pipe_context *pipe = &nv30->base.pipe;
- struct pipe_transfer *transfer;
- uint32_t *map;
- int i; (void)i;
- if (unlikely(!fp->buffer)) {
+ if (unlikely(!fp->buffer))
fp->buffer = pipe_buffer_create(pipe->screen, 0, 0, fp->insn_len * 4);
- }
- map = pipe_buffer_map(pipe, fp->buffer, PIPE_TRANSFER_WRITE, &transfer);
#ifndef PIPE_ARCH_BIG_ENDIAN
- memcpy(map, fp->insn, fp->insn_len * 4);
+ pipe_buffer_write(pipe, fp->buffer, 0, fp->insn_len * 4, fp->insn);
#else
- for (i = 0; i < fp->insn_len; i++)
- *map++ = (fp->insn[i] >> 16) | (fp->insn[i] << 16);
+ {
+ struct pipe_transfer *transfer;
+ uint32_t *map;
+ int i;
+
+ map = pipe_buffer_map(pipe, fp->buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
+ &transfer);
+ for (i = 0; i < fp->insn_len; i++)
+ *map++ = (fp->insn[i] >> 16) | (fp->insn[i] << 16);
+ pipe_buffer_unmap(pipe, transfer);
+ }
#endif
- pipe_buffer_unmap(pipe, transfer);
if (nv04_resource(fp->buffer)->domain != NOUVEAU_BO_VRAM)
nouveau_buffer_migrate(nv, nv04_resource(fp->buffer), NOUVEAU_BO_VRAM);
@@ -64,7 +68,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
struct nouveau_pushbuf *push = nv30->base.pushbuf;
struct nouveau_object *eng3d = nv30->screen->eng3d;
struct nv30_fragprog *fp = nv30->fragprog.program;
- boolean upload = FALSE;
+ bool upload = false;
int i;
if (!fp->translated) {
@@ -72,7 +76,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
if (!fp->translated)
return;
- upload = TRUE;
+ upload = true;
}
/* update constants, also needs to be done on every fp switch as we
@@ -89,7 +93,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
if (!memcmp(&fp->insn[off], &cbuf[idx], 4 * 4))
continue;
memcpy(&fp->insn[off], &cbuf[idx], 4 * 4);
- upload = TRUE;
+ upload = true;
}
}
@@ -161,8 +165,15 @@ static void
nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv30_context *nv30 = nv30_context(pipe);
+ struct nv30_fragprog *fp = hwcso;
- nv30->fragprog.program = hwcso;
+ /* reset the bucftx so that we don't keep a dangling reference to the fp
+ * code
+ */
+ if (fp != nv30->state.fragprog)
+ PUSH_RESET(nv30->base.pushbuf, BUFCTX_FRAGPROG);
+
+ nv30->fragprog.program = fp;
nv30->dirty |= NV30_NEW_FRAGPROG;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
index 1a4b8929c0f..c75b4b95fd8 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -33,7 +33,7 @@
#include "nv30/nv30_resource.h"
#include "nv30/nv30_transfer.h"
-static INLINE unsigned
+static inline unsigned
layer_offset(struct pipe_resource *pt, unsigned level, unsigned layer)
{
struct nv30_miptree *mt = nv30_miptree(pt);
@@ -54,7 +54,7 @@ nv30_miptree_get_handle(struct pipe_screen *pscreen,
unsigned stride;
if (!mt || !mt->base.bo)
- return FALSE;
+ return false;
stride = mt->level[0].pitch;
@@ -78,13 +78,13 @@ struct nv30_transfer {
unsigned nblocksy;
};
-static INLINE struct nv30_transfer *
+static inline struct nv30_transfer *
nv30_transfer(struct pipe_transfer *ptx)
{
return (struct nv30_transfer *)ptx;
}
-static INLINE void
+static inline void
define_rect(struct pipe_resource *pt, unsigned level, unsigned z,
unsigned x, unsigned y, unsigned w, unsigned h,
struct nv30_rect *rect)
@@ -242,8 +242,8 @@ nv30_miptree_transfer_map(struct pipe_context *pipe, struct pipe_resource *pt,
tx->base.level = level;
tx->base.usage = usage;
tx->base.box = *box;
- tx->base.stride = util_format_get_nblocksx(pt->format, box->width) *
- util_format_get_blocksize(pt->format);
+ tx->base.stride = align(util_format_get_nblocksx(pt->format, box->width) *
+ util_format_get_blocksize(pt->format), 64);
tx->base.layer_stride = util_format_get_nblocksy(pt->format, box->height) *
tx->base.stride;
@@ -372,7 +372,7 @@ nv30_miptree_create(struct pipe_screen *pscreen,
}
if (!mt->uniform_pitch)
- mt->swizzled = TRUE;
+ mt->swizzled = true;
size = 0;
for (l = 0; l <= pt->last_level; l++) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_push.c b/src/gallium/drivers/nouveau/nv30/nv30_push.c
index e0734fa70d3..67ab0508c17 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_push.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_push.c
@@ -47,12 +47,12 @@ struct push_context {
struct translate *translate;
- boolean primitive_restart;
+ bool primitive_restart;
uint32_t prim;
uint32_t restart_index;
};
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
{
unsigned i;
@@ -62,7 +62,7 @@ prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
{
unsigned i;
@@ -72,7 +72,7 @@ prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
{
unsigned i;
@@ -199,7 +199,7 @@ nv30_push_vbo(struct nv30_context *nv30, const struct pipe_draw_info *info)
{
struct push_context ctx;
unsigned i, index_size;
- boolean apply_bias = info->indexed && info->index_bias;
+ bool apply_bias = info->indexed && info->index_bias;
ctx.push = nv30->base.pushbuf;
ctx.translate = nv30->vertex->translate;
@@ -241,7 +241,7 @@ nv30_push_vbo(struct nv30_context *nv30, const struct pipe_draw_info *info)
} else {
ctx.idxbuf = NULL;
index_size = 0;
- ctx.primitive_restart = FALSE;
+ ctx.primitive_restart = false;
ctx.restart_index = 0;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_query.c b/src/gallium/drivers/nouveau/nv30/nv30_query.c
index 516ee83168e..3980be9579a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_query.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_query.c
@@ -98,7 +98,7 @@ struct nv30_query {
uint64_t result;
};
-static INLINE struct nv30_query *
+static inline struct nv30_query *
nv30_query(struct pipe_query *pipe)
{
return (struct nv30_query *)pipe;
@@ -208,7 +208,7 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
if (ntfy1) {
while (ntfy1[3] & 0xff000000) {
if (!wait)
- return FALSE;
+ return false;
}
switch (q->type) {
@@ -228,7 +228,7 @@ nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq,
}
*res64 = q->result;
- return TRUE;
+ return true;
}
static void
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.c b/src/gallium/drivers/nouveau/nv30/nv30_resource.c
index 38fac8af898..a98a6464de8 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.c
@@ -42,12 +42,12 @@ nv30_memory_barrier(struct pipe_context *pipe, unsigned flags)
if (!nv30->vtxbuf[i].buffer)
continue;
if (nv30->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
if (nv30->idxbuf.buffer &&
nv30->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_resource.h b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
index 1981c8d9ab9..8dac7795c9d 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
@@ -15,7 +15,7 @@ struct nv30_surface {
uint16_t depth;
};
-static INLINE struct nv30_surface *
+static inline struct nv30_surface *
nv30_surface(struct pipe_surface *ps)
{
return (struct nv30_surface *)ps;
@@ -32,13 +32,13 @@ struct nv30_miptree {
struct nv30_miptree_level level[13];
uint32_t uniform_pitch;
uint32_t layer_size;
- boolean swizzled;
+ bool swizzled;
unsigned ms_mode;
unsigned ms_x:1;
unsigned ms_y:1;
};
-static INLINE struct nv30_miptree *
+static inline struct nv30_miptree *
nv30_miptree(struct pipe_resource *pt)
{
return (struct nv30_miptree *)pt;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 2e38a1978ae..7aad26ba18b 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -69,6 +69,8 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return PIPE_ENDIAN_LITTLE;
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
return 16;
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_MAX_VIEWPORTS:
return 1;
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
@@ -96,6 +98,9 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
return 1;
+ /* nv35 capabilities */
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
+ return eng3d->oclass == NV35_3D_CLASS || eng3d->oclass >= NV40_3D_CLASS;
/* nv4x capabilities */
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_NPOT_TEXTURES:
@@ -135,7 +140,6 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
case PIPE_CAP_START_INSTANCE:
case PIPE_CAP_TEXTURE_MULTISAMPLE:
- case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
@@ -162,6 +166,9 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -313,12 +320,12 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen,
unsigned bindings)
{
if (sample_count > 4)
- return FALSE;
+ return false;
if (!(0x00000017 & (1 << sample_count)))
- return FALSE;
+ return false;
if (!util_format_is_supported(format, bindings)) {
- return FALSE;
+ return false;
}
/* transfers & shared are always supported */
@@ -656,6 +663,6 @@ nv30_screen_create(struct nouveau_device *dev)
nouveau_pushbuf_kick(push, push->channel);
- nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.h b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
index 3f2e47fec99..7b17b88097c 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
@@ -40,7 +40,7 @@ struct nv30_screen {
struct nouveau_heap *vp_data_heap;
};
-static INLINE struct nv30_screen *
+static inline struct nv30_screen *
nv30_screen(struct pipe_screen *pscreen)
{
return (struct nv30_screen *)pscreen;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state.c b/src/gallium/drivers/nouveau/nv30/nv30_state.c
index 708ba34c1e5..fd604c2266d 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state.c
@@ -211,6 +211,7 @@ static void *
nv30_zsa_state_create(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *cso)
{
+ struct nouveau_object *eng3d = nv30_context(pipe)->screen->eng3d;
struct nv30_zsa_stateobj *so;
so = CALLOC_STRUCT(nv30_zsa_stateobj);
@@ -223,6 +224,13 @@ nv30_zsa_state_create(struct pipe_context *pipe,
SB_DATA (so, cso->depth.writemask);
SB_DATA (so, cso->depth.enabled);
+ if (eng3d->oclass == NV35_3D_CLASS || eng3d->oclass >= NV40_3D_CLASS) {
+ SB_MTHD35(so, DEPTH_BOUNDS_TEST_ENABLE, 3);
+ SB_DATA (so, cso->depth.bounds_test);
+ SB_DATA (so, fui(cso->depth.bounds_min));
+ SB_DATA (so, fui(cso->depth.bounds_max));
+ }
+
if (cso->stencil[0].enabled) {
SB_MTHD30(so, STENCIL_ENABLE(0), 3);
SB_DATA (so, 1);
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state.h b/src/gallium/drivers/nouveau/nv30/nv30_state.h
index e27e16fae82..ed3b8103a00 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state.h
@@ -13,6 +13,8 @@
#define SB_DATA(so, u) (so)->data[(so)->size++] = (u)
#define SB_MTHD30(so, mthd, size) \
SB_DATA((so), ((size) << 18) | (7 << 13) | NV30_3D_##mthd)
+#define SB_MTHD35(so, mthd, size) \
+ SB_DATA((so), ((size) << 18) | (7 << 13) | NV35_3D_##mthd)
#define SB_MTHD40(so, mthd, size) \
SB_DATA((so), ((size) << 18) | (7 << 13) | NV40_3D_##mthd)
@@ -30,7 +32,7 @@ struct nv30_rasterizer_stateobj {
struct nv30_zsa_stateobj {
struct pipe_depth_stencil_alpha_state pipe;
- unsigned data[32];
+ unsigned data[36];
unsigned size;
};
@@ -80,7 +82,7 @@ struct nv30_vertprog {
struct tgsi_shader_info info;
struct draw_vertex_shader *draw;
- boolean translated;
+ bool translated;
unsigned enabled_ucps;
uint16_t texcoord[10];
@@ -109,7 +111,7 @@ struct nv30_fragprog {
struct tgsi_shader_info info;
struct draw_fragment_shader *draw;
- boolean translated;
+ bool translated;
uint32_t *insn;
unsigned insn_len;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
index a954dcce562..8957634f0fa 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
@@ -453,8 +453,8 @@ nv30_state_context_switch(struct nv30_context *nv30)
nv30->base.pushbuf->user_priv = &nv30->bufctx;
}
-boolean
-nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
+bool
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, bool hwtnl)
{
struct nouveau_screen *screen = &nv30->screen->base;
struct nouveau_pushbuf *push = nv30->base.pushbuf;
@@ -494,7 +494,7 @@ nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
nouveau_pushbuf_bufctx(push, bctx);
if (nouveau_pushbuf_validate(push)) {
nouveau_pushbuf_bufctx(push, NULL);
- return FALSE;
+ return false;
}
/*XXX*/
@@ -528,7 +528,7 @@ nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
}
}
- return TRUE;
+ return true;
}
void
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_texture.c b/src/gallium/drivers/nouveau/nv30/nv30_texture.c
index c3567217442..bfe21cceaa2 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_texture.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_texture.c
@@ -37,7 +37,7 @@
#define NV40_WRAP(n) \
case PIPE_TEX_WRAP_##n: ret = NV40_3D_TEX_WRAP_S_##n; break
-static INLINE unsigned
+static inline unsigned
wrap_mode(unsigned pipe)
{
unsigned ret = NV30_3D_TEX_WRAP_S_REPEAT;
@@ -58,7 +58,7 @@ wrap_mode(unsigned pipe)
return ret >> NV30_3D_TEX_WRAP_S__SHIFT;
}
-static INLINE unsigned
+static inline unsigned
filter_mode(const struct pipe_sampler_state *cso)
{
unsigned filter;
@@ -104,7 +104,7 @@ filter_mode(const struct pipe_sampler_state *cso)
return filter;
}
-static INLINE unsigned
+static inline unsigned
compare_mode(const struct pipe_sampler_state *cso)
{
if (cso->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE)
@@ -201,7 +201,7 @@ nv30_bind_sampler_states(struct pipe_context *pipe,
}
}
-static INLINE uint32_t
+static inline uint32_t
swizzle(const struct nv30_texfmt *fmt, unsigned cmp, unsigned swz)
{
uint32_t data = fmt->swz[swz].src << 8;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
index 99bc0994ac2..214da6568c3 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -41,33 +41,33 @@
* of different ways.
*/
-static INLINE boolean
+static inline bool
nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst)
{
if (src->x1 - src->x0 != dst->x1 - dst->x0)
- return TRUE;
+ return true;
if (src->y1 - src->y0 != dst->y1 - dst->y0)
- return TRUE;
- return FALSE;
+ return true;
+ return false;
}
-static INLINE boolean
+static inline bool
nv30_transfer_blit(XFER_ARGS)
{
if (nv30->screen->eng3d->oclass < NV40_3D_CLASS)
- return FALSE;
+ return false;
if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1)
- return FALSE;
+ return false;
if (dst->w < 2 || dst->h < 2)
- return FALSE;
+ return false;
if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch))
- return FALSE;
+ return false;
if (src->cpp > 4)
- return FALSE;
- return TRUE;
+ return false;
+ return true;
}
-static INLINE struct nouveau_heap *
+static inline struct nouveau_heap *
nv30_transfer_rect_vertprog(struct nv30_context *nv30)
{
struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
@@ -108,7 +108,7 @@ nv30_transfer_rect_vertprog(struct nv30_context *nv30)
}
-static INLINE struct nv04_resource *
+static inline struct nv04_resource *
nv30_transfer_rect_fragprog(struct nv30_context *nv30)
{
struct nv04_resource *fp = nv04_resource(nv30->blit_fp);
@@ -368,29 +368,29 @@ nv30_transfer_rect_blit(XFER_ARGS)
PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
}
-static boolean
+static bool
nv30_transfer_sifm(XFER_ARGS)
{
if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
- return FALSE;
+ return false;
if (src->d > 1 || dst->d > 1)
- return FALSE;
+ return false;
if (dst->offset & 63)
- return FALSE;
+ return false;
if (!dst->pitch) {
if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
- return FALSE;
+ return false;
} else {
if (dst->domain != NOUVEAU_BO_VRAM)
- return FALSE;
+ return false;
if (dst->pitch & 63)
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
static void
@@ -481,14 +481,14 @@ nv30_transfer_rect_sifm(XFER_ARGS)
* that name is still accurate on nv4x) error.
*/
-static boolean
+static bool
nv30_transfer_m2mf(XFER_ARGS)
{
if (!src->pitch || !dst->pitch)
- return FALSE;
+ return false;
if (nv30_transfer_scaled(src, dst))
- return FALSE;
- return TRUE;
+ return false;
+ return true;
}
static void
@@ -540,12 +540,12 @@ nv30_transfer_rect_m2mf(XFER_ARGS)
}
}
-static boolean
+static bool
nv30_transfer_cpu(XFER_ARGS)
{
if (nv30_transfer_scaled(src, dst))
- return FALSE;
- return TRUE;
+ return false;
+ return true;
}
static char *
@@ -554,7 +554,7 @@ linear_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
return base + (y * rect->pitch) + (x * rect->cpp);
}
-static INLINE unsigned
+static inline unsigned
swizzle2d(unsigned v, unsigned s)
{
v = (v | (v << 8)) & 0x00ff00ff;
@@ -614,7 +614,7 @@ swizzle3d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int);
-static INLINE get_ptr_t
+static inline get_ptr_t
get_ptr(struct nv30_rect *rect)
{
if (rect->pitch)
@@ -653,7 +653,7 @@ nv30_transfer_rect(struct nv30_context *nv30, enum nv30_transfer_filter filter,
{
static const struct {
char *name;
- boolean (*possible)(XFER_ARGS);
+ bool (*possible)(XFER_ARGS);
void (*execute)(XFER_ARGS);
} *method, methods[] = {
{ "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf },
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
index d4e384b21d2..8494549e9b1 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -79,7 +79,7 @@ nv30_emit_vtxattr(struct nv30_context *nv30, struct pipe_vertex_buffer *vb,
}
}
-static INLINE void
+static inline void
nv30_vbuf_range(struct nv30_context *nv30, int vbi,
uint32_t *base, uint32_t *size)
{
@@ -119,7 +119,7 @@ nv30_prevalidate_vbufs(struct nv30_context *nv30)
} else {
nouveau_buffer_migrate(&nv30->base, buf, NOUVEAU_BO_GART);
}
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
}
}
@@ -160,10 +160,10 @@ nv30_update_user_vbufs(struct nv30_context *nv30)
NOUVEAU_BO_LOW | NOUVEAU_BO_RD,
0, NV30_3D_VTXBUF_DMA1);
}
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
-static INLINE void
+static inline void
nv30_release_user_vbufs(struct nv30_context *nv30)
{
uint32_t vbo_user = nv30->vbo_user;
@@ -202,6 +202,9 @@ nv30_vbo_validate(struct nv30_context *nv30)
return;
redefine = MAX2(vertex->num_elements, nv30->state.num_vtxelts);
+ if (redefine == 0)
+ return;
+
BEGIN_NV04(push, NV30_3D(VTXFMT(0)), redefine);
for (i = 0; i < vertex->num_elements; i++) {
@@ -221,7 +224,7 @@ nv30_vbo_validate(struct nv30_context *nv30)
for (i = 0; i < vertex->num_elements; i++) {
struct nv04_resource *res;
unsigned offset;
- boolean user;
+ bool user;
ve = &vertex->pipe[i];
vb = &nv30->vtxbuf[ve->vertex_buffer_index];
@@ -254,14 +257,12 @@ nv30_vertex_state_create(struct pipe_context *pipe, unsigned num_elements,
struct translate_key transkey;
unsigned i;
- assert(num_elements);
-
so = MALLOC(sizeof(*so) + sizeof(*so->element) * num_elements);
if (!so)
return NULL;
memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
so->num_elements = num_elements;
- so->need_conversion = FALSE;
+ so->need_conversion = false;
transkey.nr_elements = 0;
transkey.output_stride = 0;
@@ -284,7 +285,7 @@ nv30_vertex_state_create(struct pipe_context *pipe, unsigned num_elements,
return NULL;
}
so->element[i].state = nv30_vtxfmt(pipe->screen, fmt)->hw;
- so->need_conversion = TRUE;
+ so->need_conversion = true;
}
if (1) {
@@ -452,7 +453,7 @@ nv30_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
}
static void
-nv30_draw_elements(struct nv30_context *nv30, boolean shorten,
+nv30_draw_elements(struct nv30_context *nv30, bool shorten,
unsigned mode, unsigned start, unsigned count,
unsigned instance_count, int32_t index_bias)
{
@@ -461,13 +462,11 @@ nv30_draw_elements(struct nv30_context *nv30, boolean shorten,
struct nouveau_object *eng3d = nv30->screen->eng3d;
unsigned prim = nv30_prim_gl(mode);
-#if 0 /*XXX*/
- if (index_bias != nv30->state.index_bias) {
- BEGIN_NV04(push, NV30_3D(VB_ELEMENT_BASE), 1);
+ if (eng3d->oclass >= NV40_3D_CLASS && index_bias != nv30->state.index_bias) {
+ BEGIN_NV04(push, NV40_3D(VB_ELEMENT_BASE), 1);
PUSH_DATA (push, index_bias);
nv30->state.index_bias = index_bias;
}
-#endif
if (eng3d->oclass == NV40_3D_CLASS && index_size > 1 &&
nv30->idxbuf.buffer) {
@@ -564,7 +563,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS)))
nv30_update_user_vbufs(nv30);
- nv30_state_validate(nv30, ~0, TRUE);
+ nv30_state_validate(nv30, ~0, true);
if (nv30->draw_flags) {
nv30_render_vbo(pipe, info);
return;
@@ -578,17 +577,17 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!nv30->vtxbuf[i].buffer)
continue;
if (nv30->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
}
if (!nv30->base.vbo_dirty && nv30->idxbuf.buffer &&
nv30->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv30->base.vbo_dirty = TRUE;
+ nv30->base.vbo_dirty = true;
if (nv30->base.vbo_dirty) {
BEGIN_NV04(push, NV30_3D(VTX_CACHE_INVALIDATE_1710), 1);
PUSH_DATA (push, 0);
- nv30->base.vbo_dirty = FALSE;
+ nv30->base.vbo_dirty = false;
}
if (!info->indexed) {
@@ -596,7 +595,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->mode, info->start, info->count,
info->instance_count);
} else {
- boolean shorten = info->max_index <= 65535;
+ bool shorten = info->max_index <= 65535;
if (info->primitive_restart != nv30->state.prim_restart) {
if (info->primitive_restart) {
@@ -605,7 +604,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->restart_index);
if (info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
} else {
BEGIN_NV04(push, NV40_3D(PRIM_RESTART_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -617,7 +616,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->restart_index);
if (info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
}
nv30_draw_elements(nv30, shorten,
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
index 4d4145d10b5..ee0a6280d7a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
@@ -48,7 +48,7 @@ nv30_vertprog_destroy(struct nv30_vertprog *vp)
vp->consts = NULL;
vp->nr_consts = 0;
- vp->translated = FALSE;
+ vp->translated = false;
}
void
@@ -58,8 +58,8 @@ nv30_vertprog_validate(struct nv30_context *nv30)
struct nouveau_object *eng3d = nv30->screen->eng3d;
struct nv30_vertprog *vp = nv30->vertprog.program;
struct nv30_fragprog *fp = nv30->fragprog.program;
- boolean upload_code = FALSE;
- boolean upload_data = FALSE;
+ bool upload_code = false;
+ bool upload_data = false;
unsigned i;
if (nv30->dirty & NV30_NEW_FRAGPROG) {
@@ -125,7 +125,7 @@ nv30_vertprog_validate(struct nv30_context *nv30)
}
}
- upload_code = TRUE;
+ upload_code = true;
}
if (vp->nr_consts && !vp->data) {
@@ -166,8 +166,8 @@ nv30_vertprog_validate(struct nv30_context *nv30)
}
}
- upload_code = TRUE;
- upload_data = TRUE;
+ upload_code = true;
+ upload_data = true;
}
if (vp->nr_consts) {
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_winsys.h b/src/gallium/drivers/nouveau/nv30/nv30_winsys.h
index 5cee5df60ce..2324b517c44 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_winsys.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_winsys.h
@@ -19,34 +19,34 @@
#define NV40_3D_PRIM_RESTART_ENABLE 0x1dac
#define NV40_3D_PRIM_RESTART_INDEX 0x1db0
-static INLINE void
+static inline void
PUSH_RELOC(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t offset,
uint32_t flags, uint32_t vor, uint32_t tor)
{
nouveau_pushbuf_reloc(push, bo, offset, flags, vor, tor);
}
-static INLINE struct nouveau_bufctx *
+static inline struct nouveau_bufctx *
bufctx(struct nouveau_pushbuf *push)
{
struct nouveau_bufctx **pctx = push->user_priv;
return *pctx;
}
-static INLINE void
+static inline void
PUSH_RESET(struct nouveau_pushbuf *push, int bin)
{
nouveau_bufctx_reset(bufctx(push), bin);
}
-static INLINE void
+static inline void
PUSH_REFN(struct nouveau_pushbuf *push, int bin,
struct nouveau_bo *bo, uint32_t access)
{
nouveau_bufctx_refn(bufctx(push), bin, bo, access);
}
-static INLINE void
+static inline void
PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nouveau_bo *bo, uint32_t offset, uint32_t access)
{
@@ -55,7 +55,7 @@ PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
PUSH_DATA(push, bo->offset + offset);
}
-static INLINE void
+static inline void
PUSH_MTHDo(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nouveau_bo *bo, uint32_t access, uint32_t vor, uint32_t tor)
{
@@ -67,7 +67,7 @@ PUSH_MTHDo(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
PUSH_DATA(push, tor);
}
-static INLINE void
+static inline void
PUSH_MTHDs(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nouveau_bo *bo, uint32_t data, uint32_t access,
uint32_t vor, uint32_t tor)
@@ -80,7 +80,7 @@ PUSH_MTHDs(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
PUSH_DATA(push, data | tor);
}
-static INLINE struct nouveau_bufref *
+static inline struct nouveau_bufref *
PUSH_MTHD(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nouveau_bo *bo, uint32_t data, uint32_t access,
uint32_t vor, uint32_t tor)
@@ -99,7 +99,7 @@ PUSH_MTHD(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
return bref;
}
-static INLINE void
+static inline void
PUSH_RESRC(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
struct nv04_resource *r, uint32_t data, uint32_t access,
uint32_t vor, uint32_t tor)
@@ -108,14 +108,14 @@ PUSH_RESRC(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
r->domain | access, vor, tor)->priv = r;
}
-static INLINE void
+static inline void
BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
{
PUSH_SPACE(push, size + 1);
PUSH_DATA (push, 0x00000000 | (size << 18) | (subc << 13) | mthd);
}
-static INLINE void
+static inline void
BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
{
PUSH_SPACE(push, size + 1);
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
index 9ef16965f39..e68d23e5587 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c
@@ -44,7 +44,7 @@ struct nvfx_fpc {
struct util_dynarray label_relocs;
};
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
temp(struct nvfx_fpc *fpc)
{
int idx = __builtin_ctzll(~fpc->r_temps);
@@ -60,7 +60,7 @@ temp(struct nvfx_fpc *fpc)
return nvfx_reg(NVFXSR_TEMP, idx);
}
-static INLINE void
+static inline void
release_temps(struct nvfx_fpc *fpc)
{
fpc->r_temps &= ~fpc->r_temps_discard;
@@ -373,7 +373,7 @@ nv40_fp_brk(struct nvfx_fpc *fpc)
hw[3] = 0;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
{
struct nvfx_src src;
@@ -415,7 +415,7 @@ tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
return src;
}
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
switch (fdst->Register.File) {
case TGSI_FILE_OUTPUT:
@@ -430,7 +430,7 @@ tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
}
}
-static INLINE int
+static inline int
tgsi_mask(uint tgsi)
{
int mask = 0;
@@ -442,7 +442,7 @@ tgsi_mask(uint tgsi)
return mask;
}
-static boolean
+static bool
nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
const struct tgsi_full_instruction *finst)
{
@@ -455,7 +455,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
int i;
if (finst->Instruction.Opcode == TGSI_OPCODE_END)
- return TRUE;
+ return true;
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
const struct tgsi_full_src_register *fsrc;
@@ -525,7 +525,7 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
break;
default:
NOUVEAU_ERR("bad src file\n");
- return FALSE;
+ return false;
}
}
@@ -868,12 +868,12 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc,
default:
NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
- return FALSE;
+ return false;
}
out:
release_temps(fpc);
- return TRUE;
+ return true;
nv3x_cflow:
{
static int warned = 0;
@@ -887,7 +887,7 @@ nv3x_cflow:
goto out;
}
-static boolean
+static bool
nvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
@@ -917,17 +917,17 @@ nvfx_fragprog_parse_decl_input(struct nvfx_fpc *fpc,
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_PCOORD:
/* will be assigned to remaining TC slots later */
- return TRUE;
+ return true;
default:
assert(0);
- return FALSE;
+ return false;
}
fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
- return TRUE;
+ return true;
}
-static boolean
+static bool
nvfx_fragprog_assign_generic(struct nvfx_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
@@ -954,16 +954,16 @@ nvfx_fragprog_assign_generic(struct nvfx_fpc *fpc,
}
hw = NVFX_FP_OP_INPUT_SRC_TC(hw);
fpc->r_input[idx] = nvfx_reg(NVFXSR_INPUT, hw);
- return TRUE;
+ return true;
}
}
- return FALSE;
+ return false;
default:
- return TRUE;
+ return true;
}
}
-static boolean
+static bool
nvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc,
const struct tgsi_full_declaration *fdec)
{
@@ -984,20 +984,20 @@ nvfx_fragprog_parse_decl_output(struct nvfx_fpc *fpc,
}
if(hw > ((fpc->is_nv4x) ? 4 : 2)) {
NOUVEAU_ERR("bad rcol index\n");
- return FALSE;
+ return false;
}
break;
default:
NOUVEAU_ERR("bad output semantic\n");
- return FALSE;
+ return false;
}
fpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
fpc->r_temps |= (1ULL << hw);
- return TRUE;
+ return true;
}
-static boolean
+static bool
nvfx_fragprog_prepare(struct nvfx_fpc *fpc)
{
struct tgsi_parse_context p;
@@ -1081,17 +1081,17 @@ nvfx_fragprog_prepare(struct nvfx_fpc *fpc)
fpc->r_temps_discard = 0ULL;
}
- return TRUE;
+ return true;
out_err:
FREE(fpc->r_temp);
fpc->r_temp = NULL;
tgsi_parse_free(&p);
- return FALSE;
+ return false;
}
-DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_fp, "NVFX_DUMP_FP", false)
void
_nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp)
@@ -1100,7 +1100,7 @@ _nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp)
struct nvfx_fpc *fpc = NULL;
struct util_dynarray insns;
- fp->translated = FALSE;
+ fp->translated = false;
fp->point_sprite_control = 0;
fp->vp_or = 0;
@@ -1182,7 +1182,7 @@ _nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp)
debug_printf("\n");
}
- fp->translated = TRUE;
+ fp->translated = true;
out:
tgsi_parse_free(&parse);
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
index 9538a793d7e..e66d8af7620 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_shader.h
@@ -448,8 +448,8 @@ struct nvfx_insn
struct nvfx_src src[3];
};
-static INLINE struct nvfx_insn
-nvfx_insn(boolean sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2)
+static inline struct nvfx_insn
+nvfx_insn(bool sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask, struct nvfx_src s0, struct nvfx_src s1, struct nvfx_src s2)
{
struct nvfx_insn insn = {
.op = op,
@@ -468,7 +468,7 @@ nvfx_insn(boolean sat, unsigned op, int unit, struct nvfx_reg dst, unsigned mask
return insn;
}
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
nvfx_reg(int type, int index)
{
struct nvfx_reg temp = {
@@ -478,7 +478,7 @@ nvfx_reg(int type, int index)
return temp;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
nvfx_src(struct nvfx_reg reg)
{
struct nvfx_src temp = {
@@ -491,7 +491,7 @@ nvfx_src(struct nvfx_reg reg)
return temp;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
nvfx_src_swz(struct nvfx_src src, int x, int y, int z, int w)
{
struct nvfx_src dst = src;
@@ -503,14 +503,14 @@ nvfx_src_swz(struct nvfx_src src, int x, int y, int z, int w)
return dst;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
nvfx_src_neg(struct nvfx_src src)
{
src.negate = !src.negate;
return src;
}
-static INLINE struct nvfx_src
+static inline struct nvfx_src
nvfx_src_abs(struct nvfx_src src)
{
src.abs = 1;
@@ -529,7 +529,7 @@ struct nv30_vertprog;
void
_nvfx_fragprog_translate(uint16_t oclass, struct nv30_fragprog *fp);
-boolean
+bool
_nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp);
#endif
diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
index 1ce0589be71..5757eb1fb16 100644
--- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
@@ -416,7 +416,7 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
return src;
}
-static INLINE struct nvfx_reg
+static inline struct nvfx_reg
tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
struct nvfx_reg dst;
@@ -455,7 +455,7 @@ tgsi_mask(uint tgsi)
return mask;
}
-static boolean
+static bool
nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
unsigned idx, const struct tgsi_full_instruction *finst)
{
@@ -466,7 +466,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
struct nvfx_insn insn;
struct nvfx_relocation reloc;
struct nvfx_loop_entry loop;
- boolean sat = FALSE;
+ bool sat = false;
int mask;
int ai = -1, ci = -1, ii = -1;
int i;
@@ -524,25 +524,25 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
break;
default:
NOUVEAU_ERR("bad src file\n");
- return FALSE;
+ return false;
}
}
for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
if(src[i].reg.type < 0)
- return FALSE;
+ return false;
}
if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS &&
finst->Instruction.Opcode != TGSI_OPCODE_ARL)
- return FALSE;
+ return false;
final_dst = dst = tgsi_dst(vpc, &finst->Dst[0]);
mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
if(finst->Instruction.Saturate) {
assert(finst->Instruction.Opcode != TGSI_OPCODE_ARL);
if (vpc->is_nv4x)
- sat = TRUE;
+ sat = true;
else
if(dst.type != NVFXSR_TEMP)
dst = temp(vpc);
@@ -793,7 +793,7 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
break;
default:
NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
- return FALSE;
+ return false;
}
if(finst->Instruction.Saturate && !vpc->is_nv4x) {
@@ -804,10 +804,10 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc,
}
release_temps(vpc);
- return TRUE;
+ return true;
}
-static boolean
+static bool
nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
const struct tgsi_full_declaration *fdec)
{
@@ -825,7 +825,7 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
vpc->r_result[idx] = temp(vpc);
vpc->r_temps_discard = 0;
vpc->cvtx_idx = idx;
- return TRUE;
+ return true;
case TGSI_SEMANTIC_COLOR:
if (fdec->Semantic.Index == 0) {
hw = NVFX_VP(INST_DEST_COL0);
@@ -834,7 +834,7 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
hw = NVFX_VP(INST_DEST_COL1);
} else {
NOUVEAU_ERR("bad colour semantic index\n");
- return FALSE;
+ return false;
}
break;
case TGSI_SEMANTIC_BCOLOR:
@@ -845,7 +845,7 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
hw = NVFX_VP(INST_DEST_BFC1);
} else {
NOUVEAU_ERR("bad bcolour semantic index\n");
- return FALSE;
+ return false;
}
break;
case TGSI_SEMANTIC_FOG:
@@ -868,22 +868,22 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
if (i == num_texcoords) {
vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);
- return TRUE;
+ return true;
}
break;
case TGSI_SEMANTIC_EDGEFLAG:
vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);
- return TRUE;
+ return true;
default:
NOUVEAU_ERR("bad output semantic\n");
- return FALSE;
+ return false;
}
vpc->r_result[idx] = nvfx_reg(NVFXSR_OUTPUT, hw);
- return TRUE;
+ return true;
}
-static boolean
+static bool
nvfx_vertprog_prepare(struct nvfx_vpc *vpc)
{
struct tgsi_parse_context p;
@@ -924,7 +924,7 @@ nvfx_vertprog_prepare(struct nvfx_vpc *vpc)
break;
case TGSI_FILE_OUTPUT:
if (!nvfx_vertprog_parse_decl_output(vpc, fdec))
- return FALSE;
+ return false;
break;
default:
break;
@@ -961,12 +961,12 @@ nvfx_vertprog_prepare(struct nvfx_vpc *vpc)
}
vpc->r_temps_discard = 0;
- return TRUE;
+ return true;
}
-DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE)
+DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", false)
-boolean
+bool
_nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)
{
struct tgsi_parse_context parse;
@@ -975,13 +975,13 @@ _nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)
struct util_dynarray insns;
int i, ucps;
- vp->translated = FALSE;
+ vp->translated = false;
vp->nr_insns = 0;
vp->nr_consts = 0;
vpc = CALLOC_STRUCT(nvfx_vpc);
if (!vpc)
- return FALSE;
+ return false;
vpc->is_nv4x = (oclass >= NV40_3D_CLASS) ? ~0 : 0;
vpc->vp = vp;
vpc->pipe = vp->pipe;
@@ -990,7 +990,7 @@ _nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)
if (!nvfx_vertprog_prepare(vpc)) {
FREE(vpc);
- return FALSE;
+ return false;
}
/* Redirect post-transform vertex position to a temp if user clip
@@ -1108,7 +1108,7 @@ _nvfx_vertprog_translate(uint16_t oclass, struct nv30_vertprog *vp)
debug_printf("\n");
}
- vp->translated = TRUE;
+ vp->translated = true;
out:
tgsi_parse_free(&parse);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_blit.h b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
index 756c4c11bf6..0ccec568d3a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_blit.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
@@ -37,7 +37,7 @@ nv50_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
#define NV50_BLIT_TEXTURE_2D_ARRAY 5
#define NV50_BLIT_MAX_TEXTURE_TYPES 6
-static INLINE unsigned
+static inline unsigned
nv50_blit_texture_type(enum pipe_texture_target target)
{
switch (target) {
@@ -52,7 +52,7 @@ nv50_blit_texture_type(enum pipe_texture_target target)
}
}
-static INLINE unsigned
+static inline unsigned
nv50_blit_get_tgsi_texture_target(enum pipe_texture_target target)
{
switch (target) {
@@ -67,7 +67,7 @@ nv50_blit_get_tgsi_texture_target(enum pipe_texture_target target)
}
}
-static INLINE enum pipe_texture_target
+static inline enum pipe_texture_target
nv50_blit_reinterpret_pipe_texture_target(enum pipe_texture_target target)
{
switch (target) {
@@ -81,7 +81,7 @@ nv50_blit_reinterpret_pipe_texture_target(enum pipe_texture_target target)
}
}
-static INLINE unsigned
+static inline unsigned
nv50_blit_get_filter(const struct pipe_blit_info *info)
{
if (info->dst.resource->nr_samples < info->src.resource->nr_samples)
@@ -102,7 +102,7 @@ nv50_blit_get_filter(const struct pipe_blit_info *info)
/* Since shaders cannot export stencil, we cannot copy stencil values when
* rendering to ZETA, so we attach the ZS surface to a colour render target.
*/
-static INLINE enum pipe_format
+static inline enum pipe_format
nv50_blit_zeta_to_colour_format(enum pipe_format format)
{
switch (format) {
@@ -127,7 +127,7 @@ nv50_blit_zeta_to_colour_format(enum pipe_format format)
}
-static INLINE uint16_t
+static inline uint16_t
nv50_blit_derive_color_mask(const struct pipe_blit_info *info)
{
const unsigned mask = info->mask;
@@ -162,7 +162,7 @@ nv50_blit_derive_color_mask(const struct pipe_blit_info *info)
return color_mask;
}
-static INLINE uint32_t
+static inline uint32_t
nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info)
{
uint32_t mask = 0;
@@ -191,8 +191,8 @@ nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info)
# define nv50_format_table nvc0_format_table
#endif
-/* return TRUE for formats that can be converted among each other by NVC0_2D */
-static INLINE boolean
+/* return true for formats that can be converted among each other by NVC0_2D */
+static inline bool
nv50_2d_dst_format_faithful(enum pipe_format format)
{
const uint64_t mask =
@@ -201,7 +201,7 @@ nv50_2d_dst_format_faithful(enum pipe_format format)
uint8_t id = nv50_format_table[format].rt;
return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
}
-static INLINE boolean
+static inline bool
nv50_2d_src_format_faithful(enum pipe_format format)
{
const uint64_t mask =
@@ -211,7 +211,7 @@ nv50_2d_src_format_faithful(enum pipe_format format)
return (id >= 0xc0) && (mask & (1ULL << (id - 0xc0)));
}
-static INLINE boolean
+static inline bool
nv50_2d_format_supported(enum pipe_format format)
{
uint8_t id = nv50_format_table[format].rt;
@@ -219,7 +219,7 @@ nv50_2d_format_supported(enum pipe_format format)
(NV50_ENG2D_SUPPORTED_FORMATS & (1ULL << (id - 0xc0)));
}
-static INLINE boolean
+static inline bool
nv50_2d_dst_format_ops_supported(enum pipe_format format)
{
uint8_t id = nv50_format_table[format].rt;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 5b5d3912c20..f8d46db7c67 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -64,12 +64,12 @@ nv50_memory_barrier(struct pipe_context *pipe, unsigned flags)
if (!nv50->vtxbuf[i].buffer)
continue;
if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
}
if (nv50->idxbuf.buffer &&
nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
uint32_t valid = nv50->constbuf_valid[s];
@@ -87,7 +87,7 @@ nv50_memory_barrier(struct pipe_context *pipe, unsigned flags)
continue;
if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nv50->cb_dirty = TRUE;
+ nv50->cb_dirty = true;
}
}
}
@@ -100,9 +100,9 @@ nv50_default_kick_notify(struct nouveau_pushbuf *push)
if (screen) {
nouveau_fence_next(&screen->base);
- nouveau_fence_update(&screen->base, TRUE);
+ nouveau_fence_update(&screen->base, true);
if (screen->cur_ctx)
- screen->cur_ctx->state.flushed = TRUE;
+ screen->cur_ctx->state.flushed = true;
}
}
@@ -310,7 +310,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
nv50->base.invalidate_resource_storage = nv50_invalidate_resource_storage;
if (screen->base.device->chipset < 0x84 ||
- debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+ debug_get_bool_option("NOUVEAU_PMPEG", false)) {
/* PMPEG */
nouveau_context_init_vdec(&nv50->base);
} else if (screen->base.device->chipset < 0x98 ||
@@ -351,7 +351,7 @@ out_err:
}
void
-nv50_bufctx_fence(struct nouveau_bufctx *bufctx, boolean on_flush)
+nv50_bufctx_fence(struct nouveau_bufctx *bufctx, bool on_flush)
{
struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
struct nouveau_list *it;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 1f123ef7e92..ce12e714774 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -91,7 +91,7 @@
struct nv50_blitctx;
-boolean nv50_blitctx_create(struct nv50_context *);
+bool nv50_blitctx_create(struct nv50_context *);
struct nv50_context {
struct nouveau_context base;
@@ -102,7 +102,7 @@ struct nv50_context {
struct nouveau_bufctx *bufctx;
uint32_t dirty;
- boolean cb_dirty;
+ bool cb_dirty;
struct nv50_graph_state state;
@@ -152,26 +152,26 @@ struct nv50_context {
unsigned sample_mask;
unsigned min_samples;
- boolean vbo_push_hint;
+ bool vbo_push_hint;
uint32_t rt_array_mode;
struct pipe_query *cond_query;
- boolean cond_cond; /* inverted rendering condition */
+ bool cond_cond; /* inverted rendering condition */
uint cond_mode;
uint32_t cond_condmode; /* the calculated condition */
struct nv50_blitctx *blit;
};
-static INLINE struct nv50_context *
+static inline struct nv50_context *
nv50_context(struct pipe_context *pipe)
{
return (struct nv50_context *)pipe;
}
/* return index used in nv50_context arrays for a specific shader type */
-static INLINE unsigned
+static inline unsigned
nv50_context_shader_stage(unsigned pipe)
{
switch (pipe) {
@@ -188,7 +188,7 @@ nv50_context_shader_stage(unsigned pipe)
/* nv50_context.c */
struct pipe_context *nv50_create(struct pipe_screen *, void *);
-void nv50_bufctx_fence(struct nouveau_bufctx *, boolean on_flush);
+void nv50_bufctx_fence(struct nouveau_bufctx *, bool on_flush);
void nv50_default_kick_notify(struct nouveau_pushbuf *);
@@ -202,7 +202,7 @@ void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
void nva0_so_target_save_offset(struct pipe_context *,
struct pipe_stream_output_target *,
- unsigned index, boolean seralize);
+ unsigned index, bool seralize);
#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
@@ -221,8 +221,8 @@ extern void nv50_init_state_functions(struct nv50_context *);
/* nv50_state_validate.c */
/* @words: check for space before emitting relocs */
-extern boolean nv50_state_validate(struct nv50_context *, uint32_t state_mask,
- unsigned space_words);
+extern bool nv50_state_validate(struct nv50_context *, uint32_t state_mask,
+ unsigned space_words);
/* nv50_surface.c */
extern void nv50_clear(struct pipe_context *, unsigned buffers,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 0f86ba1de0d..49a93bf1d91 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -44,7 +44,7 @@
*/
#define U_V PIPE_BIND_VERTEX_BUFFER
#define U_T PIPE_BIND_SAMPLER_VIEW
-#define U_I PIPE_BIND_SHADER_RESOURCE | PIPE_BIND_COMPUTE_RESOURCE
+#define U_I PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE
#define U_TR PIPE_BIND_RENDER_TARGET | U_T
#define U_IR U_TR | U_I
#define U_TB PIPE_BIND_BLENDABLE | U_TR
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index f15d8f3ecb6..92d49e49ff2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -30,7 +30,7 @@
uint32_t
nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
- boolean is_3d)
+ bool is_3d)
{
uint32_t tile_mode = 0x000;
@@ -59,13 +59,13 @@ nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
}
static uint32_t
-nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d)
+nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
{
return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz, is_3d);
}
static uint32_t
-nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+nv50_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
{
const unsigned ms = util_logbase2(mt->base.base.nr_samples);
uint32_t tile_flags;
@@ -184,7 +184,7 @@ nv50_miptree_get_handle(struct pipe_screen *pscreen,
unsigned stride;
if (!mt || !mt->base.bo)
- return FALSE;
+ return false;
stride = mt->level[0].pitch;
@@ -204,7 +204,7 @@ const struct u_resource_vtbl nv50_miptree_vtbl =
u_default_transfer_inline_write /* transfer_inline_write */
};
-static INLINE boolean
+static inline bool
nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
{
switch (mt->base.base.nr_samples) {
@@ -228,12 +228,12 @@ nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
break;
default:
NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
-boolean
+bool
nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align)
{
struct pipe_resource *pt = &mt->base.base;
@@ -241,12 +241,12 @@ nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align)
unsigned h = pt->height0;
if (util_format_is_depth_or_stencil(pt->format))
- return FALSE;
+ return false;
if ((pt->last_level > 0) || (pt->depth0 > 1) || (pt->array_size > 1))
- return FALSE;
+ return false;
if (mt->ms_x | mt->ms_y)
- return FALSE;
+ return false;
mt->level[0].pitch = align(pt->width0 * blocksize, pitch_align);
@@ -256,7 +256,7 @@ nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align)
mt->total_size = mt->level[0].pitch * h;
- return TRUE;
+ return true;
}
static void
@@ -335,7 +335,7 @@ nv50_miptree_create(struct pipe_screen *pscreen,
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
struct pipe_resource *pt = &mt->base.base;
- boolean compressed = dev->drm_version >= 0x01000101;
+ bool compressed = dev->drm_version >= 0x01000101;
int ret;
union nouveau_bo_config bo_config;
uint32_t bo_flags;
@@ -438,7 +438,7 @@ nv50_miptree_from_handle(struct pipe_screen *pscreen,
/* Offset of zslice @z from start of level @l. */
-INLINE unsigned
+inline unsigned
nv50_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
{
const struct pipe_resource *pt = &mt->base.base;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index aaca4c550d9..02dc3677259 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -25,7 +25,7 @@
#include "codegen/nv50_ir_driver.h"
-static INLINE unsigned
+static inline unsigned
bitcount4(const uint32_t val)
{
static const uint8_t cnt[16]
@@ -104,7 +104,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
prog->vp.bfc[info->out[i].si] = i;
break;
case TGSI_SEMANTIC_LAYER:
- prog->gp.has_layer = TRUE;
+ prog->gp.has_layer = true;
prog->gp.layerid = n;
break;
case TGSI_SEMANTIC_VIEWPORT_INDEX:
@@ -316,7 +316,7 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
return so;
}
-boolean
+bool
nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
{
struct nv50_ir_prog_info *info;
@@ -325,7 +325,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
- return FALSE;
+ return false;
info->type = prog->type;
info->target = chipset;
@@ -410,7 +410,7 @@ out:
return !ret;
}
-boolean
+bool
nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
{
struct nouveau_heap *heap;
@@ -423,7 +423,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break;
default:
assert(!"invalid program type");
- return FALSE;
+ return false;
}
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
@@ -440,7 +440,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
if (ret) {
NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
- return FALSE;
+ return false;
}
}
prog->code_base = prog->mem->start;
@@ -448,10 +448,10 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
ret = nv50_tls_realloc(nv50->screen, prog->tls_space);
if (ret < 0) {
nouveau_heap_free(&prog->mem);
- return FALSE;
+ return false;
}
if (ret > 0)
- nv50->state.new_tls_space = TRUE;
+ nv50->state.new_tls_space = true;
if (prog->fixups)
nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0);
@@ -463,7 +463,7 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1);
PUSH_DATA (nv50->base.pushbuf, 0);
- return TRUE;
+ return true;
}
void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index fe6bd6025be..5d3ff5644d2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -53,7 +53,7 @@ struct nv50_program {
struct pipe_shader_state pipe;
ubyte type;
- boolean translated;
+ bool translated;
uint32_t *code;
unsigned code_size;
@@ -104,8 +104,8 @@ struct nv50_program {
struct nv50_stream_output_state *so;
};
-boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);
-boolean nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
+bool nv50_program_translate(struct nv50_program *, uint16_t chipset);
+bool nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
#endif /* __NV50_PROG_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c
index a3a397c52c1..f31eaa0e314 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_push.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c
@@ -23,13 +23,13 @@ struct push_context {
struct translate *translate;
- boolean primitive_restart;
+ bool primitive_restart;
uint32_t prim;
uint32_t restart_index;
uint32_t instance_id;
};
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
{
unsigned i;
@@ -39,7 +39,7 @@ prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
{
unsigned i;
@@ -49,7 +49,7 @@ prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
{
unsigned i;
@@ -179,7 +179,7 @@ emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
#define NV50_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
-static INLINE unsigned
+static inline unsigned
nv50_prim_gl(unsigned prim)
{
switch (prim) {
@@ -212,7 +212,7 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
unsigned i, index_size;
unsigned inst_count = info->instance_count;
unsigned vert_count = info->count;
- boolean apply_bias = info->indexed && info->index_bias;
+ bool apply_bias = info->indexed && info->index_bias;
ctx.push = nv50->base.pushbuf;
ctx.translate = nv50->vertex->translate;
@@ -258,12 +258,12 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
return;
}
- pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count);
vert_count /= targ->stride;
}
ctx.idxbuf = NULL;
index_size = 0;
- ctx.primitive_restart = FALSE;
+ ctx.primitive_restart = false;
ctx.restart_index = 0;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 81f7474e36b..f4adbf8c653 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -48,20 +48,21 @@ struct nv50_query {
uint32_t base;
uint32_t offset; /* base + i * 32 */
uint8_t state;
- boolean is64bit;
+ bool is64bit;
+ int nesting; /* only used for occlusion queries */
struct nouveau_mm_allocation *mm;
struct nouveau_fence *fence;
};
#define NV50_QUERY_ALLOC_SPACE 256
-static INLINE struct nv50_query *
+static inline struct nv50_query *
nv50_query(struct pipe_query *pipe)
{
return (struct nv50_query *)pipe;
}
-static boolean
+static bool
nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
{
struct nv50_screen *screen = nv50->screen;
@@ -80,17 +81,17 @@ nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int size)
if (size) {
q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
if (!q->bo)
- return FALSE;
+ return false;
q->offset = q->base;
ret = nouveau_bo_map(q->bo, 0, screen->base.client);
if (ret) {
nv50_query_allocate(nv50, q, 0);
- return FALSE;
+ return false;
}
q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
}
- return TRUE;
+ return true;
}
static void
@@ -153,8 +154,8 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nv50_query *q = nv50_query(pq);
/* For occlusion queries we have to change the storage, because a previous
- * query might set the initial render conition to FALSE even *after* we re-
- * initialized it to TRUE.
+ * query might set the initial render conition to false even *after* we re-
+ * initialized it to true.
*/
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
q->offset += 32;
@@ -166,7 +167,7 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
* query ?
*/
q->data[0] = q->sequence; /* initialize sequence */
- q->data[1] = 1; /* initial render condition = TRUE */
+ q->data[1] = 1; /* initial render condition = true */
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
q->data[5] = 0;
}
@@ -175,11 +176,16 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
- PUSH_SPACE(push, 4);
- BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
- PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
- BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
- PUSH_DATA (push, 1);
+ q->nesting = nv50->screen->num_occlusion_queries_active++;
+ if (q->nesting) {
+ nv50_query_get(push, q, 0x10, 0x0100f002);
+ } else {
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ }
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nv50_query_get(push, q, 0x10, 0x06805002);
@@ -223,9 +229,11 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
nv50_query_get(push, q, 0, 0x0100f002);
- PUSH_SPACE(push, 2);
- BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
- PUSH_DATA (push, 0);
+ if (--nv50->screen->num_occlusion_queries_active == 0) {
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
nv50_query_get(push, q, 0, 0x06805002);
@@ -261,7 +269,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* This query is not issued on GPU because disjoint is forced to FALSE */
+ /* This query is not issued on GPU because disjoint is forced to false */
q->state = NV50_QUERY_STATE_READY;
break;
default:
@@ -273,7 +281,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence);
}
-static INLINE void
+static inline void
nv50_query_update(struct nv50_query *q)
{
if (q->is64bit) {
@@ -293,7 +301,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
struct nv50_query *q = nv50_query(pq);
uint64_t *res64 = (uint64_t *)result;
uint32_t *res32 = (uint32_t *)result;
- boolean *res8 = (boolean *)result;
+ uint8_t *res8 = (uint8_t *)result;
uint64_t *data64 = (uint64_t *)q->data;
int i;
@@ -307,19 +315,19 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
q->state = NV50_QUERY_STATE_FLUSHED;
PUSH_KICK(nv50->base.pushbuf);
}
- return FALSE;
+ return false;
}
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
- return FALSE;
+ return false;
}
q->state = NV50_QUERY_STATE_READY;
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
- res8[0] = TRUE;
+ res8[0] = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
- res64[0] = q->data[1];
+ res64[0] = q->data[1] - q->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
@@ -338,7 +346,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
res64[0] = 1000000000;
- res8[8] = FALSE;
+ res8[8] = false;
break;
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
@@ -347,10 +355,10 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
res32[0] = q->data[1];
break;
default:
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
void
@@ -377,7 +385,7 @@ nv50_render_condition(struct pipe_context *pipe,
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nv50_query *q;
uint32_t cond;
- boolean wait =
+ bool wait =
mode != PIPE_RENDER_COND_NO_WAIT &&
mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
@@ -391,13 +399,12 @@ nv50_render_condition(struct pipe_context *pipe,
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
cond = condition ? NV50_3D_COND_MODE_EQUAL :
NV50_3D_COND_MODE_NOT_EQUAL;
- wait = TRUE;
+ wait = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
if (likely(!condition)) {
- /* XXX: Placeholder, handle nesting here if available */
- if (unlikely(false))
+ if (unlikely(q->nesting))
cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL :
NV50_3D_COND_MODE_ALWAYS;
else
@@ -461,7 +468,7 @@ nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
void
nva0_so_target_save_offset(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg,
- unsigned index, boolean serialize)
+ unsigned index, bool serialize)
{
struct nv50_so_target *targ = nv50_so_target(ptarg);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
index f7ee1354a92..a46e622c597 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -35,7 +35,7 @@ nv50_screen_init_resource_functions(struct pipe_screen *pscreen);
uint32_t
nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
- boolean is_3d);
+ bool is_3d);
struct nv50_miptree_level {
uint32_t offset;
@@ -50,13 +50,13 @@ struct nv50_miptree {
struct nv50_miptree_level level[NV50_MAX_TEXTURE_LEVELS];
uint32_t total_size;
uint32_t layer_stride;
- boolean layout_3d; /* TRUE if layer count varies with mip level */
+ bool layout_3d; /* true if layer count varies with mip level */
uint8_t ms_x; /* log2 of number of samples in x/y dimension */
uint8_t ms_y;
uint8_t ms_mode;
};
-static INLINE struct nv50_miptree *
+static inline struct nv50_miptree *
nv50_miptree(struct pipe_resource *pt)
{
return (struct nv50_miptree *)pt;
@@ -70,7 +70,7 @@ nv50_miptree(struct pipe_resource *pt)
/* Internal functions:
*/
-boolean
+bool
nv50_miptree_init_layout_linear(struct nv50_miptree *mt, unsigned pitch_align);
struct pipe_resource *
@@ -98,13 +98,13 @@ struct nv50_surface {
uint16_t depth;
};
-static INLINE struct nv50_surface *
+static inline struct nv50_surface *
nv50_surface(struct pipe_surface *ps)
{
return (struct nv50_surface *)ps;
}
-static INLINE enum pipe_format
+static inline enum pipe_format
nv50_zs_to_s_format(enum pipe_format format)
{
switch (format) {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6583a353578..30e6e042fbf 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -51,19 +51,19 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
unsigned bindings)
{
if (sample_count > 8)
- return FALSE;
+ return false;
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
- return FALSE;
+ return false;
if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
- return FALSE;
+ return false;
if (!util_format_is_supported(format, bindings))
- return FALSE;
+ return false;
switch (format) {
case PIPE_FORMAT_Z16_UNORM:
if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
- return FALSE;
+ return false;
break;
default:
break;
@@ -176,6 +176,9 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -210,6 +213,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
return 0;
case PIPE_CAP_VENDOR_ID:
@@ -286,7 +290,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
/* The chip could handle more sampler views than samplers */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
- return MIN2(32, PIPE_MAX_SAMPLERS);
+ return MIN2(16, PIPE_MAX_SAMPLERS);
case PIPE_SHADER_CAP_DOUBLES:
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
@@ -454,7 +458,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
PUSH_DATA (push, 0xf);
- if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+ if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
PUSH_DATA (push, 0x18);
}
@@ -734,7 +738,7 @@ nv50_screen_create(struct nouveau_device *dev)
nv50_screen_init_resource_functions(pscreen);
if (screen->base.device->chipset < 0x84 ||
- debug_get_bool_option("NOUVEAU_PMPEG", FALSE)) {
+ debug_get_bool_option("NOUVEAU_PMPEG", false)) {
/* PMPEG */
nouveau_screen_init_vdec(&screen->base);
} else if (screen->base.device->chipset < 0x98 ||
@@ -890,7 +894,7 @@ nv50_screen_create(struct nouveau_device *dev)
nv50_screen_init_hwctx(screen);
- nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 881051b1862..ce51f0fc254 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -32,14 +32,14 @@ struct nv50_graph_state {
uint32_t semantic_color;
uint32_t semantic_psize;
int32_t index_bias;
- boolean uniform_buffer_bound[3];
- boolean prim_restart;
- boolean point_sprite;
- boolean rt_serialize;
- boolean flushed;
- boolean rasterizer_discard;
+ bool uniform_buffer_bound[3];
+ bool prim_restart;
+ bool point_sprite;
+ bool rt_serialize;
+ bool flushed;
+ bool rasterizer_discard;
uint8_t tls_required;
- boolean new_tls_space;
+ bool new_tls_space;
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
uint8_t num_textures[3];
@@ -54,6 +54,8 @@ struct nv50_screen {
struct nv50_context *cur_ctx;
struct nv50_graph_state save_state;
+ int num_occlusion_queries_active;
+
struct nouveau_bo *code;
struct nouveau_bo *uniforms;
struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
@@ -95,19 +97,19 @@ struct nv50_screen {
struct nouveau_object *m2mf;
};
-static INLINE struct nv50_screen *
+static inline struct nv50_screen *
nv50_screen(struct pipe_screen *screen)
{
return (struct nv50_screen *)screen;
}
-boolean nv50_blitter_create(struct nv50_screen *);
+bool nv50_blitter_create(struct nv50_screen *);
void nv50_blitter_destroy(struct nv50_screen *);
int nv50_screen_tic_alloc(struct nv50_screen *, void *);
int nv50_screen_tsc_alloc(struct nv50_screen *, void *);
-static INLINE void
+static inline void
nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
{
struct nv50_screen *screen = nv50_screen(res->base.screen);
@@ -119,7 +121,7 @@ nv50_resource_fence(struct nv04_resource *res, uint32_t flags)
}
}
-static INLINE void
+static inline void
nv50_resource_validate(struct nv04_resource *res, uint32_t flags)
{
if (likely(res->bo)) {
@@ -142,21 +144,21 @@ struct nv50_format {
extern const struct nv50_format nv50_format_table[];
-static INLINE void
+static inline void
nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic)
{
if (tic->id >= 0)
screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
}
-static INLINE void
+static inline void
nv50_screen_tsc_unlock(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
{
if (tsc->id >= 0)
screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
}
-static INLINE void
+static inline void
nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic)
{
if (tic->id >= 0) {
@@ -165,7 +167,7 @@ nv50_screen_tic_free(struct nv50_screen *screen, struct nv50_tic_entry *tic)
}
}
-static INLINE void
+static inline void
nv50_screen_tsc_free(struct nv50_screen *screen, struct nv50_tsc_entry *tsc)
{
if (tsc->id >= 0) {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index c698782d8bd..b033ce5c6dc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -60,7 +60,7 @@ nv50_constbufs_validate(struct nv50_context *nv50)
continue;
}
if (!nv50->state.uniform_buffer_bound[s]) {
- nv50->state.uniform_buffer_bound[s] = TRUE;
+ nv50->state.uniform_buffer_bound[s] = true;
BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
}
@@ -99,33 +99,35 @@ nv50_constbufs_validate(struct nv50_context *nv50)
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
BCTX_REFN(nv50->bufctx_3d, CB(s, i), res, RD);
+
+ nv50->cb_dirty = 1; /* Force cache flush for UBO. */
} else {
BEGIN_NV04(push, NV50_3D(SET_PROGRAM_CB), 1);
PUSH_DATA (push, (i << 8) | p | 0);
}
if (i == 0)
- nv50->state.uniform_buffer_bound[s] = FALSE;
+ nv50->state.uniform_buffer_bound[s] = false;
}
}
}
}
-static boolean
+static bool
nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
{
if (!prog->translated) {
prog->translated = nv50_program_translate(
prog, nv50->screen->base.device->chipset);
if (!prog->translated)
- return FALSE;
+ return false;
} else
if (prog->mem)
- return TRUE;
+ return true;
return nv50_program_upload_code(nv50, prog);
}
-static INLINE void
+static inline void
nv50_program_update_context_state(struct nv50_context *nv50,
struct nv50_program *prog, int stage)
{
@@ -136,7 +138,7 @@ nv50_program_update_context_state(struct nv50_context *nv50,
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_TLS);
if (!nv50->state.tls_required || nv50->state.new_tls_space)
BCTX_REFN_bo(nv50->bufctx_3d, TLS, flags, nv50->screen->tls_bo);
- nv50->state.new_tls_space = FALSE;
+ nv50->state.new_tls_space = false;
nv50->state.tls_required |= 1 << stage;
} else {
if (nv50->state.tls_required == (1 << stage))
@@ -243,11 +245,11 @@ nv50_sprite_coords_validate(struct nv50_context *nv50)
for (i = 0; i < 8; ++i)
PUSH_DATA(push, 0);
- nv50->state.point_sprite = FALSE;
+ nv50->state.point_sprite = false;
}
return;
} else {
- nv50->state.point_sprite = TRUE;
+ nv50->state.point_sprite = true;
}
memset(pntc, 0, sizeof(pntc));
@@ -646,7 +648,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
nv50_query_pushbuf_submit(push, targ->pq, 0x4);
} else {
PUSH_DATA(push, 0);
- targ->clean = FALSE;
+ targ->clean = false;
}
} else {
const unsigned limit = targ->pipe.buffer_size /
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index d4d41af3c61..9505a0b4085 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -62,7 +62,7 @@
* in advance to maintain elegant separate shader objects.)
*/
-static INLINE uint32_t
+static inline uint32_t
nv50_colormask(unsigned mask)
{
uint32_t ret = 0;
@@ -82,7 +82,7 @@ nv50_colormask(unsigned mask)
#define NV50_BLEND_FACTOR_CASE(a, b) \
case PIPE_BLENDFACTOR_##a: return NV50_BLEND_FACTOR_##b
-static INLINE uint32_t
+static inline uint32_t
nv50_blend_fac(unsigned factor)
{
switch (factor) {
@@ -116,7 +116,7 @@ nv50_blend_state_create(struct pipe_context *pipe,
{
struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
int i;
- boolean emit_common_func = cso->rt[0].blend_enable;
+ bool emit_common_func = cso->rt[0].blend_enable;
uint32_t ms;
if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
@@ -137,11 +137,11 @@ nv50_blend_state_create(struct pipe_context *pipe,
for (i = 0; i < 8; ++i) {
SB_DATA(so, cso->rt[i].blend_enable);
if (cso->rt[i].blend_enable)
- emit_common_func = TRUE;
+ emit_common_func = true;
}
if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
- emit_common_func = FALSE;
+ emit_common_func = false;
for (i = 0; i < 8; ++i) {
if (!cso->rt[i].blend_enable)
@@ -373,6 +373,16 @@ nv50_zsa_state_create(struct pipe_context *pipe,
SB_DATA (so, 0);
}
+ SB_BEGIN_3D(so, DEPTH_BOUNDS_EN, 1);
+ if (cso->depth.bounds_test) {
+ SB_DATA (so, 1);
+ SB_BEGIN_3D(so, DEPTH_BOUNDS(0), 2);
+ SB_DATA (so, fui(cso->depth.bounds_min));
+ SB_DATA (so, fui(cso->depth.bounds_max));
+ } else {
+ SB_DATA (so, 0);
+ }
+
if (cso->stencil[0].enabled) {
SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
SB_DATA (so, 1);
@@ -439,7 +449,7 @@ nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
#define NV50_TSC_WRAP_CASE(n) \
case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
-static INLINE unsigned
+static inline unsigned
nv50_tsc_wrap_mode(unsigned wrap)
{
switch (wrap) {
@@ -572,7 +582,7 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
FREE(hwcso);
}
-static INLINE void
+static inline void
nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
unsigned nr, void **hwcso)
{
@@ -650,7 +660,7 @@ nv50_sampler_view_destroy(struct pipe_context *pipe,
FREE(nv50_tic_entry(view));
}
-static INLINE void
+static inline void
nv50_stage_set_sampler_views(struct nv50_context *nv50, int s,
unsigned nr,
struct pipe_sampler_view **views)
@@ -808,7 +818,7 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
pipe_resource_reference(&nv50->constbuf[s][i].u.buf, res);
- nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+ nv50->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
if (nv50->constbuf[s][i].user) {
nv50->constbuf[s][i].u.data = cb->user_buffer;
nv50->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
@@ -1041,7 +1051,7 @@ nv50_so_target_create(struct pipe_context *pipe,
} else {
targ->pq = NULL;
}
- targ->clean = TRUE;
+ targ->clean = true;
targ->pipe.buffer_size = size;
targ->pipe.buffer_offset = offset;
@@ -1075,32 +1085,32 @@ nv50_set_stream_output_targets(struct pipe_context *pipe,
{
struct nv50_context *nv50 = nv50_context(pipe);
unsigned i;
- boolean serialize = TRUE;
- const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
+ bool serialize = true;
+ const bool can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
assert(num_targets <= 4);
for (i = 0; i < num_targets; ++i) {
- const boolean changed = nv50->so_target[i] != targets[i];
- const boolean append = (offsets[i] == (unsigned)-1);
+ const bool changed = nv50->so_target[i] != targets[i];
+ const bool append = (offsets[i] == (unsigned)-1);
if (!changed && append)
continue;
nv50->so_targets_dirty |= 1 << i;
if (can_resume && changed && nv50->so_target[i]) {
nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
- serialize = FALSE;
+ serialize = false;
}
if (targets[i] && !append)
- nv50_so_target(targets[i])->clean = TRUE;
+ nv50_so_target(targets[i])->clean = true;
pipe_so_target_reference(&nv50->so_target[i], targets[i]);
}
for (; i < nv50->num_so_targets; ++i) {
if (can_resume && nv50->so_target[i]) {
nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
- serialize = FALSE;
+ serialize = false;
}
pipe_so_target_reference(&nv50->so_target[i], NULL);
nv50->so_targets_dirty |= 1 << i;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 116bf4bba7c..985603df5fa 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -2,7 +2,7 @@
#include "nv50/nv50_context.h"
#include "nv50/nv50_defs.xml.h"
-static INLINE void
+static inline void
nv50_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
{
BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 4);
@@ -82,7 +82,7 @@ nv50_validate_fb(struct nv50_context *nv50)
ms_mode = mt->ms_mode;
if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
- nv50->state.rt_serialize = TRUE;
+ nv50->state.rt_serialize = true;
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
@@ -111,7 +111,7 @@ nv50_validate_fb(struct nv50_context *nv50)
ms_mode = mt->ms_mode;
if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
- nv50->state.rt_serialize = TRUE;
+ nv50->state.rt_serialize = true;
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
@@ -275,7 +275,7 @@ nv50_validate_viewport(struct nv50_context *nv50)
nv50->viewports_dirty = 0;
}
-static INLINE void
+static inline void
nv50_check_program_ucps(struct nv50_context *nv50,
struct nv50_program *vp, uint8_t mask)
{
@@ -296,6 +296,23 @@ nv50_check_program_ucps(struct nv50_context *nv50,
nv50_fp_linkage_validate(nv50);
}
+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nv50_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nv50_validate_derived_2(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+ if (nv50->zsa && nv50->zsa->pipe.alpha.enabled &&
+ nv50->framebuffer.nr_cbufs == 0) {
+ nv50_fb_set_null_rt(push, 0);
+ BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | 1);
+ }
+}
+
static void
nv50_validate_clip(struct nv50_context *nv50)
{
@@ -456,6 +473,7 @@ static struct state_validate {
{ nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
{ nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+ { nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
{ nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
{ nv50_constbufs_validate, NV50_NEW_CONSTBUF },
@@ -468,7 +486,7 @@ static struct state_validate {
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
-boolean
+bool
nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words)
{
uint32_t state_mask;
@@ -490,19 +508,19 @@ nv50_state_validate(struct nv50_context *nv50, uint32_t mask, unsigned words)
nv50->dirty &= ~state_mask;
if (nv50->state.rt_serialize) {
- nv50->state.rt_serialize = FALSE;
+ nv50->state.rt_serialize = false;
BEGIN_NV04(nv50->base.pushbuf, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (nv50->base.pushbuf, 0);
}
- nv50_bufctx_fence(nv50->bufctx_3d, FALSE);
+ nv50_bufctx_fence(nv50->bufctx_3d, false);
}
nouveau_pushbuf_bufctx(nv50->base.pushbuf, nv50->bufctx_3d);
ret = nouveau_pushbuf_validate(nv50->base.pushbuf);
if (unlikely(nv50->state.flushed)) {
- nv50->state.flushed = FALSE;
- nv50_bufctx_fence(nv50->bufctx_3d, TRUE);
+ nv50->state.flushed = false;
+ nv50_bufctx_fence(nv50->bufctx_3d, true);
}
return !ret;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
index eea5327b6cb..cf75d1eb11b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
@@ -31,7 +31,7 @@ struct nv50_rasterizer_stateobj {
struct nv50_zsa_stateobj {
struct pipe_depth_stencil_alpha_state pipe;
int size;
- uint32_t state[29];
+ uint32_t state[34];
};
struct nv50_constbuf {
@@ -41,7 +41,7 @@ struct nv50_constbuf {
} u;
uint32_t size; /* max 65536 */
uint32_t offset;
- boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+ bool user; /* should only be true if u.data is valid and non-NULL */
};
struct nv50_vertex_element {
@@ -56,7 +56,7 @@ struct nv50_vertex_stateobj {
unsigned num_elements;
uint32_t instance_elts;
uint32_t instance_bufs;
- boolean need_conversion;
+ bool need_conversion;
unsigned vertex_size;
unsigned packet_vertex_limit;
struct nv50_vertex_element element[0];
@@ -66,10 +66,10 @@ struct nv50_so_target {
struct pipe_stream_output_target pipe;
struct pipe_query *pq;
unsigned stride;
- boolean clean;
+ bool clean;
};
-static INLINE struct nv50_so_target *
+static inline struct nv50_so_target *
nv50_so_target(struct pipe_stream_output_target *ptarg)
{
return (struct nv50_so_target *)ptarg;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
index 99548cbdb42..e0793bb6ec4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h
@@ -9,7 +9,7 @@ struct nv50_tsc_entry {
uint32_t tsc[8];
};
-static INLINE struct nv50_tsc_entry *
+static inline struct nv50_tsc_entry *
nv50_tsc_entry(void *hwcso)
{
return (struct nv50_tsc_entry *)hwcso;
@@ -21,7 +21,7 @@ struct nv50_tic_entry {
uint32_t tic[8];
};
-static INLINE struct nv50_tic_entry *
+static inline struct nv50_tic_entry *
nv50_tic_entry(struct pipe_sampler_view *view)
{
return (struct nv50_tic_entry *)view;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index dc9852d4e47..b1ae01692cb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -49,8 +49,8 @@
#define NOUVEAU_DRIVER 0x50
#include "nv50/nv50_blit.h"
-static INLINE uint8_t
-nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+static inline uint8_t
+nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
{
uint8_t id = nv50_format_table[format].rt;
@@ -76,7 +76,7 @@ nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
static int
nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
struct nv50_miptree *mt, unsigned level, unsigned layer,
- enum pipe_format pformat, boolean dst_src_pformat_equal)
+ enum pipe_format pformat, bool dst_src_pformat_equal)
{
struct nouveau_bo *bo = mt->base.bo;
uint32_t width, height, depth;
@@ -153,7 +153,7 @@ nv50_2d_texture_do_copy(struct nouveau_pushbuf *push,
const enum pipe_format dfmt = dst->base.base.format;
const enum pipe_format sfmt = src->base.base.format;
int ret;
- boolean eqfmt = dfmt == sfmt;
+ bool eqfmt = dfmt == sfmt;
if (!PUSH_SPACE(push, 2 * 16 + 32))
return PIPE_ERROR;
@@ -196,7 +196,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
{
struct nv50_context *nv50 = nv50_context(pipe);
int ret;
- boolean m2mf;
+ bool m2mf;
unsigned dst_layer = dstz, src_layer = src_box->z;
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -658,7 +658,7 @@ nv50_blitter_make_vp(struct nv50_blitter *blit)
};
blit->vp.type = PIPE_SHADER_VERTEX;
- blit->vp.translated = TRUE;
+ blit->vp.translated = true;
blit->vp.code = (uint32_t *)code; /* const_cast */
blit->vp.code_size = sizeof(code);
blit->vp.max_gpr = 4;
@@ -687,24 +687,24 @@ nv50_blitter_make_fp(struct pipe_context *pipe,
const unsigned target = nv50_blit_get_tgsi_texture_target(ptarg);
- boolean tex_rgbaz = FALSE;
- boolean tex_s = FALSE;
- boolean cvt_un8 = FALSE;
+ bool tex_rgbaz = false;
+ bool tex_s = false;
+ bool cvt_un8 = false;
if (mode != NV50_BLIT_MODE_PASS &&
mode != NV50_BLIT_MODE_Z24X8 &&
mode != NV50_BLIT_MODE_X8Z24)
- tex_s = TRUE;
+ tex_s = true;
if (mode != NV50_BLIT_MODE_X24S8 &&
mode != NV50_BLIT_MODE_S8X24 &&
mode != NV50_BLIT_MODE_XS)
- tex_rgbaz = TRUE;
+ tex_rgbaz = true;
if (mode != NV50_BLIT_MODE_PASS &&
mode != NV50_BLIT_MODE_ZS &&
mode != NV50_BLIT_MODE_XS)
- cvt_un8 = TRUE;
+ cvt_un8 = true;
ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
if (!ureg)
@@ -1271,7 +1271,7 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
int i;
uint32_t mode;
uint32_t mask = nv50_blit_eng2d_get_mask(info);
- boolean b;
+ bool b;
mode = nv50_blit_get_filter(info) ?
NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
@@ -1410,7 +1410,7 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
PUSH_DATA (push, srcy >> 32);
}
}
- nv50_bufctx_fence(nv50->bufctx, FALSE);
+ nv50_bufctx_fence(nv50->bufctx, false);
nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
@@ -1432,71 +1432,82 @@ static void
nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
{
struct nv50_context *nv50 = nv50_context(pipe);
- boolean eng3d = FALSE;
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ bool eng3d = FALSE;
if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
if (!(info->mask & PIPE_MASK_ZS))
return;
if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
- eng3d = TRUE;
+ eng3d = true;
if (info->filter != PIPE_TEX_FILTER_NEAREST)
- eng3d = TRUE;
+ eng3d = true;
} else {
if (!(info->mask & PIPE_MASK_RGBA))
return;
if (info->mask != PIPE_MASK_RGBA)
- eng3d = TRUE;
+ eng3d = true;
}
if (nv50_miptree(info->src.resource)->layout_3d) {
- eng3d = TRUE;
+ eng3d = true;
} else
if (info->src.box.depth != info->dst.box.depth) {
- eng3d = TRUE;
+ eng3d = true;
debug_printf("blit: cannot filter array or cube textures in z direction");
}
if (!eng3d && info->dst.format != info->src.format) {
if (!nv50_2d_dst_format_faithful(info->dst.format) ||
!nv50_2d_src_format_faithful(info->src.format)) {
- eng3d = TRUE;
+ eng3d = true;
} else
if (!nv50_2d_src_format_faithful(info->src.format)) {
if (!util_format_is_luminance(info->src.format)) {
if (util_format_is_intensity(info->src.format))
- eng3d = TRUE;
+ eng3d = true;
else
if (!nv50_2d_dst_format_ops_supported(info->dst.format))
- eng3d = TRUE;
+ eng3d = true;
else
eng3d = !nv50_2d_format_supported(info->src.format);
}
} else
if (util_format_is_luminance_alpha(info->src.format))
- eng3d = TRUE;
+ eng3d = true;
}
if (info->src.resource->nr_samples == 8 &&
info->dst.resource->nr_samples <= 1)
- eng3d = TRUE;
+ eng3d = true;
/* FIXME: can't make this work with eng2d anymore */
if ((info->src.resource->nr_samples | 1) !=
(info->dst.resource->nr_samples | 1))
- eng3d = TRUE;
+ eng3d = true;
/* FIXME: find correct src coordinate adjustments */
if ((info->src.box.width != info->dst.box.width &&
info->src.box.width != -info->dst.box.width) ||
(info->src.box.height != info->dst.box.height &&
info->src.box.height != -info->dst.box.height))
- eng3d = TRUE;
+ eng3d = true;
+
+ if (nv50->screen->num_occlusion_queries_active) {
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
if (!eng3d)
nv50_blit_eng2d(nv50, info);
else
nv50_blit_3d(nv50, info);
+
+ if (nv50->screen->num_occlusion_queries_active) {
+ BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ }
}
static void
@@ -1505,13 +1516,13 @@ nv50_flush_resource(struct pipe_context *ctx,
{
}
-boolean
+bool
nv50_blitter_create(struct nv50_screen *screen)
{
screen->blitter = CALLOC_STRUCT(nv50_blitter);
if (!screen->blitter) {
NOUVEAU_ERR("failed to allocate blitter struct\n");
- return FALSE;
+ return false;
}
pipe_mutex_init(screen->blitter->mutex);
@@ -1519,7 +1530,7 @@ nv50_blitter_create(struct nv50_screen *screen)
nv50_blitter_make_vp(screen->blitter);
nv50_blitter_make_sampler(screen->blitter);
- return TRUE;
+ return true;
}
void
@@ -1542,20 +1553,20 @@ nv50_blitter_destroy(struct nv50_screen *screen)
FREE(blitter);
}
-boolean
+bool
nv50_blitctx_create(struct nv50_context *nv50)
{
nv50->blit = CALLOC_STRUCT(nv50_blitctx);
if (!nv50->blit) {
NOUVEAU_ERR("failed to allocate blit context\n");
- return FALSE;
+ return false;
}
nv50->blit->nv50 = nv50;
nv50->blit->rast.pipe.half_pixel_center = 1;
- return TRUE;
+ return true;
}
void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
index d69c8d6ff0d..fc6374d1b1b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -31,8 +31,8 @@
(NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
-static INLINE uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+static inline uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
{
switch (swz) {
case PIPE_SWIZZLE_RED:
@@ -71,6 +71,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
uint32_t flags,
enum pipe_texture_target target)
{
+ const uint32_t class_3d = nouveau_context(pipe)->screen->class_3d;
const struct util_format_description *desc;
uint64_t addr;
uint32_t *tic;
@@ -78,7 +79,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
uint32_t depth;
struct nv50_tic_entry *view;
struct nv50_miptree *mt = nv50_miptree(texture);
- boolean tex_int;
+ bool tex_int;
view = MALLOC_STRUCT(nv50_tic_entry);
if (!view)
@@ -192,7 +193,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
break;
default:
NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
- return FALSE;
+ return false;
}
tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
@@ -201,11 +202,17 @@ nv50_create_texture_view(struct pipe_context *pipe,
tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff;
tic[5] |= depth << 16;
- tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+ if (class_3d > NV50_3D_CLASS)
+ tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+ else
+ tic[5] |= view->pipe.u.tex.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */
- tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+ if (class_3d > NV50_3D_CLASS)
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+ else
+ tic[7] = 0;
if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)))
if (mt->base.base.last_level)
@@ -214,13 +221,13 @@ nv50_create_texture_view(struct pipe_context *pipe,
return &view->pipe;
}
-static boolean
+static bool
nv50_validate_tic(struct nv50_context *nv50, int s)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
struct nouveau_bo *txc = nv50->screen->txc;
unsigned i;
- boolean need_flush = FALSE;
+ bool need_flush = false;
assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nv50->num_textures[s]; ++i) {
@@ -263,7 +270,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
BEGIN_NI04(push, NV50_2D(SIFC_DATA), 8);
PUSH_DATAp(push, &tic->tic[0], 8);
- need_flush = TRUE;
+ need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
@@ -309,7 +316,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
void nv50_validate_textures(struct nv50_context *nv50)
{
- boolean need_flush;
+ bool need_flush;
need_flush = nv50_validate_tic(nv50, 0);
need_flush |= nv50_validate_tic(nv50, 1);
@@ -321,12 +328,12 @@ void nv50_validate_textures(struct nv50_context *nv50)
}
}
-static boolean
+static bool
nv50_validate_tsc(struct nv50_context *nv50, int s)
{
struct nouveau_pushbuf *push = nv50->base.pushbuf;
unsigned i;
- boolean need_flush = FALSE;
+ bool need_flush = false;
assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nv50->num_samplers[s]; ++i) {
@@ -343,7 +350,7 @@ nv50_validate_tsc(struct nv50_context *nv50, int s)
nv50_sifc_linear_u8(&nv50->base, nv50->screen->txc,
65536 + tsc->id * 32,
NOUVEAU_BO_VRAM, 32, tsc->tsc);
- need_flush = TRUE;
+ need_flush = true;
}
nv50->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
@@ -361,7 +368,7 @@ nv50_validate_tsc(struct nv50_context *nv50, int s)
void nv50_validate_samplers(struct nv50_context *nv50)
{
- boolean need_flush;
+ bool need_flush;
need_flush = nv50_validate_tsc(nv50, 0);
need_flush |= nv50_validate_tsc(nv50, 1);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 1fd33b8aa59..6324726acec 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -58,7 +58,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
so->num_elements = num_elements;
so->instance_elts = 0;
so->instance_bufs = 0;
- so->need_conversion = FALSE;
+ so->need_conversion = false;
memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
@@ -89,7 +89,7 @@ nv50_vertex_state_create(struct pipe_context *pipe,
return NULL;
}
so->element[i].state = nv50_format_table[fmt].vtx;
- so->need_conversion = TRUE;
+ so->need_conversion = true;
}
so->element[i].state |= i;
@@ -188,7 +188,7 @@ nv50_emit_vtxattr(struct nv50_context *nv50, struct pipe_vertex_buffer *vb,
}
}
-static INLINE void
+static inline void
nv50_user_vbuf_range(struct nv50_context *nv50, unsigned vbi,
uint32_t *base, uint32_t *size)
{
@@ -229,7 +229,7 @@ nv50_upload_user_buffers(struct nv50_context *nv50,
BCTX_REFN_bo(nv50->bufctx_3d, VERTEX_TMP, NOUVEAU_BO_GART |
NOUVEAU_BO_RD, bo);
}
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
}
static void
@@ -275,10 +275,10 @@ nv50_update_user_vbufs(struct nv50_context *nv50)
PUSH_DATAh(push, address[b] + ve->src_offset);
PUSH_DATA (push, address[b] + ve->src_offset);
}
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
}
-static INLINE void
+static inline void
nv50_release_user_vbufs(struct nv50_context *nv50)
{
if (nv50->vbo_user) {
@@ -316,7 +316,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
struct nv04_resource *buf = nv04_resource(nv50->vtxbuf[i].buffer);
if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
break;
}
}
@@ -382,6 +382,11 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
if (nv50->vbo_user & (1 << b)) {
address = addrs[b] + ve->pipe.src_offset;
limit = addrs[b] + limits[b];
+ } else
+ if (!vb->buffer) {
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, 0);
+ continue;
} else {
struct nv04_resource *buf = nv04_resource(vb->buffer);
if (!(refd & (1 << b))) {
@@ -418,7 +423,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
#define NV50_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NV50_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
-static INLINE unsigned
+static inline unsigned
nv50_prim_gl(unsigned prim)
{
switch (prim) {
@@ -585,7 +590,7 @@ nv50_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
}
static void
-nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
+nv50_draw_elements(struct nv50_context *nv50, bool shorten,
unsigned mode, unsigned start, unsigned count,
unsigned instance_count, int32_t index_bias)
{
@@ -746,9 +751,9 @@ nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)
{
struct nv50_screen *screen = chan->user_priv;
- nouveau_fence_update(&screen->base, TRUE);
+ nouveau_fence_update(&screen->base, true);
- nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, TRUE);
+ nv50_bufctx_fence(screen->cur_ctx->bufctx_3d, true);
}
void
@@ -801,7 +806,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
continue;
if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv50->cb_dirty = TRUE;
+ nv50->cb_dirty = true;
}
}
@@ -809,7 +814,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (nv50->cb_dirty) {
BEGIN_NV04(push, NV50_3D(CODE_CB_FLUSH), 1);
PUSH_DATA (push, 0);
- nv50->cb_dirty = FALSE;
+ nv50->cb_dirty = false;
}
if (nv50->vbo_fifo) {
@@ -830,21 +835,21 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!nv50->vtxbuf[i].buffer)
continue;
if (nv50->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
}
if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nv50->base.vbo_dirty = TRUE;
+ nv50->base.vbo_dirty = true;
if (nv50->base.vbo_dirty) {
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
PUSH_DATA (push, 0);
- nv50->base.vbo_dirty = FALSE;
+ nv50->base.vbo_dirty = false;
}
if (info->indexed) {
- boolean shorten = info->max_index <= 65535;
+ bool shorten = info->max_index <= 65535;
if (info->primitive_restart != nv50->state.prim_restart) {
if (info->primitive_restart) {
@@ -853,7 +858,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->restart_index);
if (info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
} else {
BEGIN_NV04(push, NV50_3D(PRIM_RESTART_ENABLE), 1);
PUSH_DATA (push, 0);
@@ -865,7 +870,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
PUSH_DATA (push, info->restart_index);
if (info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
}
nv50_draw_elements(nv50, shorten,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
index e8578c8be6f..76f1b41ea70 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_winsys.h
@@ -16,14 +16,14 @@
#endif
-static INLINE void
+static inline void
nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
unsigned flags, struct nouveau_bo *bo)
{
nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
}
-static INLINE void
+static inline void
nv50_add_bufctx_resident(struct nouveau_bufctx *bufctx, int bin,
struct nv04_resource *res, unsigned flags)
{
@@ -39,7 +39,7 @@ nv50_add_bufctx_resident(struct nouveau_bufctx *bufctx, int bin,
#define BCTX_REFN(bctx, bin, res, acc) \
nv50_add_bufctx_resident(bctx, NV50_BIND_##bin, res, NOUVEAU_BO_##acc)
-static INLINE void
+static inline void
PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
{
struct nouveau_pushbuf_refn ref = { bo, flags };
@@ -61,39 +61,39 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define NV50_COMPUTE(n) SUBC_COMPUTE(NV50_COMPUTE_##n)
-static INLINE uint32_t
+static inline uint32_t
NV50_FIFO_PKHDR(int subc, int mthd, unsigned size)
{
return 0x00000000 | (size << 18) | (subc << 13) | mthd;
}
-static INLINE uint32_t
+static inline uint32_t
NV50_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
{
return 0x40000000 | (size << 18) | (subc << 13) | mthd;
}
-static INLINE uint32_t
+static inline uint32_t
NV50_FIFO_PKHDR_L(int subc, int mthd)
{
return 0x00030000 | (subc << 13) | mthd;
}
-static INLINE uint32_t
+static inline uint32_t
nouveau_bo_memtype(const struct nouveau_bo *bo)
{
return bo->config.nv50.memtype;
}
-static INLINE void
+static inline void
PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
{
*push->cur++ = (uint32_t)(data >> 32);
}
-static INLINE void
+static inline void
BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
@@ -102,7 +102,7 @@ BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
PUSH_DATA (push, NV50_FIFO_PKHDR(subc, mthd, size));
}
-static INLINE void
+static inline void
BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
@@ -112,7 +112,7 @@ BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
}
/* long, non-incremental, nv50-only */
-static INLINE void
+static inline void
BEGIN_NL50(struct nouveau_pushbuf *push, int subc, int mthd, uint32_t size)
{
#ifndef NV50_PUSH_EXPLICIT_SPACE_CHECKING
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.h b/src/gallium/drivers/nouveau/nv50/nv84_video.h
index 2edba389dbf..09773c12974 100644
--- a/src/gallium/drivers/nouveau/nv50/nv84_video.h
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video.h
@@ -102,12 +102,12 @@ struct nv84_decoder {
uint8_t mpeg12_non_intra_matrix[64];
};
-static INLINE uint32_t mb(uint32_t coord)
+static inline uint32_t mb(uint32_t coord)
{
return (coord + 0xf)>>4;
}
-static INLINE uint32_t mb_half(uint32_t coord)
+static inline uint32_t mb_half(uint32_t coord)
{
return (coord + 0x1f)>>5;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
index f3480b2e00e..8b121477a37 100644
--- a/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video_vp.c
@@ -221,7 +221,7 @@ nv84_decoder_vp_h264(struct nv84_decoder *dec,
PUSH_KICK (push);
}
-static INLINE int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) {
+static inline int16_t inverse_quantize(int16_t val, uint8_t quant, int mpeg1) {
int16_t ret = val * quant / 16;
if (mpeg1 && ret) {
if (ret > 0)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 56fc83d3679..47bd123621b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -121,51 +121,51 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
return 0;
}
-boolean
+bool
nvc0_compute_validate_program(struct nvc0_context *nvc0)
{
struct nvc0_program *prog = nvc0->compprog;
if (prog->mem)
- return TRUE;
+ return true;
if (!prog->translated) {
prog->translated = nvc0_program_translate(
prog, nvc0->screen->base.device->chipset);
if (!prog->translated)
- return FALSE;
+ return false;
}
if (unlikely(!prog->code_size))
- return FALSE;
+ return false;
if (likely(prog->code_size)) {
if (nvc0_program_upload_code(nvc0, prog)) {
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
- return TRUE;
+ return true;
}
}
- return FALSE;
+ return false;
}
-static boolean
+static bool
nvc0_compute_state_validate(struct nvc0_context *nvc0)
{
if (!nvc0_compute_validate_program(nvc0))
- return FALSE;
+ return false;
/* TODO: textures, samplers, surfaces, global memory buffers */
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
- return FALSE;
+ return false;
if (unlikely(nvc0->state.flushed))
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
- return TRUE;
+ return true;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
index 9a1a71760d7..168a6d1bee2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
@@ -4,7 +4,7 @@
#include "nv50/nv50_defs.xml.h"
#include "nvc0/nvc0_compute.xml.h"
-boolean
+bool
nvc0_compute_validate_program(struct nvc0_context *nvc0);
#endif /* NVC0_COMPUTE_H */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index a35c3f66142..84f8db6a8ac 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -63,12 +63,12 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
if (!nvc0->vtxbuf[i].buffer)
continue;
if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
}
if (nvc0->idxbuf.buffer &&
nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
uint32_t valid = nvc0->constbuf_valid[s];
@@ -86,7 +86,7 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
continue;
if (res->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT)
- nvc0->cb_dirty = TRUE;
+ nvc0->cb_dirty = true;
}
}
}
@@ -164,9 +164,9 @@ nvc0_default_kick_notify(struct nouveau_pushbuf *push)
if (screen) {
nouveau_fence_next(&screen->base);
- nouveau_fence_update(&screen->base, TRUE);
+ nouveau_fence_update(&screen->base, true);
if (screen->cur_ctx)
- screen->cur_ctx->state.flushed = TRUE;
+ screen->cur_ctx->state.flushed = true;
NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
}
}
@@ -378,7 +378,7 @@ out_err:
void
nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx,
- boolean on_flush)
+ bool on_flush)
{
struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
struct nouveau_list *it;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index a8d7593b398..f4499423a10 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -54,6 +54,7 @@
#define NVC0_NEW_IDXBUF (1 << 22)
#define NVC0_NEW_SURFACES (1 << 23)
#define NVC0_NEW_MIN_SAMPLES (1 << 24)
+#define NVC0_NEW_TESSFACTOR (1 << 25)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
@@ -93,7 +94,7 @@
struct nvc0_blitctx;
-boolean nvc0_blitctx_create(struct nvc0_context *);
+bool nvc0_blitctx_create(struct nvc0_context *);
void nvc0_blitctx_destroy(struct nvc0_context *);
struct nvc0_context {
@@ -130,7 +131,7 @@ struct nvc0_context {
struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[6];
uint16_t constbuf_valid[6];
- boolean cb_dirty;
+ bool cb_dirty;
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned num_vtxbufs;
@@ -164,14 +165,17 @@ struct nvc0_context {
unsigned sample_mask;
unsigned min_samples;
- boolean vbo_push_hint;
+ float default_tess_outer[4];
+ float default_tess_inner[2];
+
+ bool vbo_push_hint;
uint8_t tfbbuf_dirty;
struct pipe_stream_output_target *tfbbuf[4];
unsigned num_tfbbufs;
struct pipe_query *cond_query;
- boolean cond_cond; /* inverted rendering condition */
+ bool cond_cond; /* inverted rendering condition */
uint cond_mode;
uint32_t cond_condmode; /* the calculated condition */
@@ -184,19 +188,19 @@ struct nvc0_context {
struct util_dynarray global_residents;
};
-static INLINE struct nvc0_context *
+static inline struct nvc0_context *
nvc0_context(struct pipe_context *pipe)
{
return (struct nvc0_context *)pipe;
}
-static INLINE unsigned
+static inline unsigned
nvc0_shader_stage(unsigned pipe)
{
switch (pipe) {
case PIPE_SHADER_VERTEX: return 0;
-/* case PIPE_SHADER_TESSELLATION_CONTROL: return 1; */
-/* case PIPE_SHADER_TESSELLATION_EVALUATION: return 2; */
+ case PIPE_SHADER_TESS_CTRL: return 1;
+ case PIPE_SHADER_TESS_EVAL: return 2;
case PIPE_SHADER_GEOMETRY: return 3;
case PIPE_SHADER_FRAGMENT: return 4;
case PIPE_SHADER_COMPUTE: return 5;
@@ -210,15 +214,15 @@ nvc0_shader_stage(unsigned pipe)
/* nvc0_context.c */
struct pipe_context *nvc0_create(struct pipe_screen *, void *);
void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *,
- boolean on_flush);
+ bool on_flush);
void nvc0_default_kick_notify(struct nouveau_pushbuf *);
/* nvc0_draw.c */
extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
/* nvc0_program.c */
-boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
-boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
+bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
+bool nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
void nvc0_program_library_upload(struct nvc0_context *);
uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
@@ -231,7 +235,7 @@ void nvc0_query_pushbuf_submit(struct nouveau_pushbuf *,
void nvc0_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
void nvc0_so_target_save_offset(struct pipe_context *,
struct pipe_stream_output_target *, unsigned i,
- boolean *serialize);
+ bool *serialize);
#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
@@ -250,8 +254,8 @@ extern void nvc0_init_state_functions(struct nvc0_context *);
/* nvc0_state_validate.c */
void nvc0_validate_global_residents(struct nvc0_context *,
struct nouveau_bufctx *, int bin);
-extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
- unsigned space_words);
+extern bool nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
+ unsigned space_words);
/* nvc0_surface.c */
extern void nvc0_clear(struct pipe_context *, unsigned buffers,
@@ -260,7 +264,7 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers,
extern void nvc0_init_surface_functions(struct nvc0_context *);
/* nvc0_tex.c */
-boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s);
+bool nve4_validate_tsc(struct nvc0_context *nvc0, int s);
void nvc0_validate_textures(struct nvc0_context *);
void nvc0_validate_samplers(struct nvc0_context *);
void nve4_set_tex_handles(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index 3875bbf4ca4..15991c3d2bd 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -29,13 +29,13 @@
#include "nvc0/nvc0_resource.h"
static uint32_t
-nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d)
+nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
{
return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d);
}
static uint32_t
-nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
{
const unsigned ms = util_logbase2(mt->base.base.nr_samples);
@@ -133,7 +133,7 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
return tile_flags;
}
-static INLINE boolean
+static inline bool
nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
{
switch (mt->base.base.nr_samples) {
@@ -157,9 +157,9 @@ nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
break;
default:
NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
static void
@@ -250,7 +250,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
struct pipe_resource *pt = &mt->base.base;
- boolean compressed = dev->drm_version >= 0x01000101;
+ bool compressed = dev->drm_version >= 0x01000101;
int ret;
union nouveau_bo_config bo_config;
uint32_t bo_flags;
@@ -325,7 +325,7 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
}
/* Offset of zslice @z from start of level @l. */
-INLINE unsigned
+inline unsigned
nvc0_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
{
const struct pipe_resource *pt = &mt->base.base;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index e1f5a8c4416..507a2507fe3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -31,24 +31,25 @@
* 124 scalar varying values.
*/
static uint32_t
-nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
+nvc0_shader_input_address(unsigned sn, unsigned si)
{
switch (sn) {
- case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4;
+ case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10;
case TGSI_SEMANTIC_PRIMID: return 0x060;
case TGSI_SEMANTIC_LAYER: return 0x064;
case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
case TGSI_SEMANTIC_PSIZE: return 0x06c;
case TGSI_SEMANTIC_POSITION: return 0x070;
- case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10;
case TGSI_SEMANTIC_FOG: return 0x2e8;
case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
- case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
case TGSI_SEMANTIC_PCOORD: return 0x2e0;
- case NV50_SEMANTIC_TESSCOORD: return 0x2f0;
+ case TGSI_SEMANTIC_TESSCOORD: return 0x2f0;
case TGSI_SEMANTIC_INSTANCEID: return 0x2f8;
case TGSI_SEMANTIC_VERTEXID: return 0x2fc;
case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
@@ -60,20 +61,21 @@ nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
}
static uint32_t
-nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
+nvc0_shader_output_address(unsigned sn, unsigned si)
{
switch (sn) {
- case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_TESSOUTER: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_TESSINNER: return 0x010 + si * 0x4;
+ case TGSI_SEMANTIC_PATCH: return 0x020 + si * 0x10;
case TGSI_SEMANTIC_PRIMID: return 0x060;
case TGSI_SEMANTIC_LAYER: return 0x064;
case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
case TGSI_SEMANTIC_PSIZE: return 0x06c;
case TGSI_SEMANTIC_POSITION: return 0x070;
- case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_GENERIC: return 0x080 + si * 0x10;
case TGSI_SEMANTIC_FOG: return 0x2e8;
case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
- case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
@@ -95,7 +97,7 @@ nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info)
case TGSI_SEMANTIC_VERTEXID:
info->in[i].mask = 0x1;
info->in[i].slot[0] =
- nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4;
+ nvc0_shader_input_address(info->in[i].sn, 0) / 4;
continue;
default:
break;
@@ -111,18 +113,11 @@ nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info)
static int
nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
{
- unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
unsigned offset;
unsigned i, c;
for (i = 0; i < info->numInputs; ++i) {
- offset = nvc0_shader_input_address(info->in[i].sn,
- info->in[i].si, ubase);
- if (info->in[i].patch && offset >= 0x20)
- offset = 0x20 + info->in[i].si * 0x10;
-
- if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD)
- info->in[i].mask &= 3;
+ offset = nvc0_shader_input_address(info->in[i].sn, info->in[i].si);
for (c = 0; c < 4; ++c)
info->in[i].slot[c] = (offset + c * 0x4) / 4;
@@ -157,15 +152,11 @@ nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
static int
nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info)
{
- unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
unsigned offset;
unsigned i, c;
for (i = 0; i < info->numOutputs; ++i) {
- offset = nvc0_shader_output_address(info->out[i].sn,
- info->out[i].si, ubase);
- if (info->out[i].patch && offset >= 0x20)
- offset = 0x20 + info->out[i].si * 0x10;
+ offset = nvc0_shader_output_address(info->out[i].sn, info->out[i].si);
for (c = 0; c < 4; ++c)
info->out[i].slot[c] = (offset + c * 0x4) / 4;
@@ -193,7 +184,7 @@ nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info)
return ret;
}
-static INLINE void
+static inline void
nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot)
{
uint8_t min = (vp->hdr[4] >> 12) & 0xff;
@@ -216,12 +207,8 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
continue;
for (c = 0; c < 4; ++c) {
a = info->in[i].slot[c];
- if (info->in[i].mask & (1 << c)) {
- if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD)
- vp->hdr[5 + a / 32] |= 1 << (a % 32);
- else
- nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]);
- }
+ if (info->in[i].mask & (1 << c))
+ vp->hdr[5 + a / 32] |= 1 << (a % 32);
}
}
@@ -250,6 +237,14 @@ nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
case TGSI_SEMANTIC_VERTEXID:
vp->hdr[10] |= 1 << 31;
break;
+ case TGSI_SEMANTIC_TESSCOORD:
+ /* We don't have the mask, nor the slots populated. While this could
+ * be achieved, the vast majority of the time if either of the coords
+ * are read, then both will be read.
+ */
+ nvc0_vtgp_hdr_update_oread(vp, 0x2f0 / 4);
+ nvc0_vtgp_hdr_update_oread(vp, 0x2f4 / 4);
+ break;
default:
break;
}
@@ -277,7 +272,6 @@ nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
return nvc0_vtgp_gen_header(vp, info);
}
-#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN)
static void
nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
{
@@ -305,14 +299,13 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
switch (info->prop.tp.partitioning) {
- case PIPE_TESS_PART_INTEGER:
- case PIPE_TESS_PART_POW2:
+ case PIPE_TESS_SPACING_EQUAL:
tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL;
break;
- case PIPE_TESS_PART_FRACT_ODD:
+ case PIPE_TESS_SPACING_FRACTIONAL_ODD:
tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD;
break;
- case PIPE_TESS_PART_FRACT_EVEN:
+ case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN;
break;
default:
@@ -320,9 +313,7 @@ nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
break;
}
}
-#endif
-#ifdef PIPE_SHADER_HULL
static int
nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
{
@@ -346,9 +337,7 @@ nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
return 0;
}
-#endif
-#ifdef PIPE_SHADER_DOMAIN
static int
nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
{
@@ -365,7 +354,6 @@ nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
return 0;
}
-#endif
static int
nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
@@ -523,7 +511,7 @@ nvc0_program_dump(struct nvc0_program *prog)
}
#endif
-boolean
+bool
nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
{
struct nv50_ir_prog_info *info;
@@ -531,7 +519,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
info = CALLOC_STRUCT(nv50_ir_prog_info);
if (!info)
- return FALSE;
+ return false;
info->type = prog->type;
info->target = chipset;
@@ -598,16 +586,12 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
case PIPE_SHADER_VERTEX:
ret = nvc0_vp_gen_header(prog, info);
break;
-#ifdef PIPE_SHADER_HULL
- case PIPE_SHADER_HULL:
+ case PIPE_SHADER_TESS_CTRL:
ret = nvc0_tcp_gen_header(prog, info);
break;
-#endif
-#ifdef PIPE_SHADER_DOMAIN
- case PIPE_SHADER_DOMAIN:
+ case PIPE_SHADER_TESS_EVAL:
ret = nvc0_tep_gen_header(prog, info);
break;
-#endif
case PIPE_SHADER_GEOMETRY:
ret = nvc0_gp_gen_header(prog, info);
break;
@@ -630,7 +614,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
assert(info->bin.tlsSpace < (1 << 24));
prog->hdr[0] |= 1 << 26;
prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */
- prog->need_tls = TRUE;
+ prog->need_tls = true;
}
/* TODO: factor 2 only needed where joinat/precont is used,
* and we only have to count non-uniform branches
@@ -638,7 +622,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
/*
if ((info->maxCFDepth * 2) > 16) {
prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
- prog->need_tls = TRUE;
+ prog->need_tls = true;
}
*/
if (info->io.globalAccess)
@@ -655,11 +639,11 @@ out:
return !ret;
}
-boolean
+bool
nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
struct nvc0_screen *screen = nvc0->screen;
- const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE;
+ const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
int ret;
uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
uint32_t lib_pos = screen->lib_code->start;
@@ -694,7 +678,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
if (ret) {
NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
- return FALSE;
+ return false;
}
IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0);
}
@@ -729,7 +713,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
#ifdef DEBUG
- if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE))
+ if (debug_get_bool_option("NV50_PROG_DEBUG", false))
nvc0_program_dump(prog);
#endif
@@ -746,7 +730,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
PUSH_DATA (nvc0->base.pushbuf, 0x1011);
- return TRUE;
+ return true;
}
/* Upload code for builtin functions like integer division emulation. */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 3fd9d21b4c4..390e0c7a4f0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -21,8 +21,8 @@ struct nvc0_program {
struct pipe_shader_state pipe;
ubyte type;
- boolean translated;
- boolean need_tls;
+ bool translated;
+ bool need_tls;
uint8_t num_gprs;
uint32_t *code;
@@ -41,7 +41,7 @@ struct nvc0_program {
uint8_t clip_enable; /* mask of defined clip planes */
uint8_t num_ucps; /* also set to max if ClipDistance is used */
uint8_t edgeflag; /* attribute index of edgeflag input */
- boolean need_vertex_id;
+ bool need_vertex_id;
} vp;
struct {
uint8_t early_z;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index aea6cbda02d..f7b85a8e931 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -44,7 +44,7 @@ struct nvc0_query {
uint32_t base;
uint32_t offset; /* base + i * rotate */
uint8_t state;
- boolean is64bit;
+ bool is64bit;
uint8_t rotate;
int nesting; /* only used for occlusion queries */
union {
@@ -62,13 +62,13 @@ static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
struct nvc0_query *, void *, boolean);
-static INLINE struct nvc0_query *
+static inline struct nvc0_query *
nvc0_query(struct pipe_query *pipe)
{
return (struct nvc0_query *)pipe;
}
-static boolean
+static bool
nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
{
struct nvc0_screen *screen = nvc0->screen;
@@ -87,17 +87,17 @@ nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
if (size) {
q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
if (!q->bo)
- return FALSE;
+ return false;
q->offset = q->base;
ret = nouveau_bo_map(q->bo, 0, screen->base.client);
if (ret) {
nvc0_query_allocate(nvc0, q, 0);
- return FALSE;
+ return false;
}
q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
}
- return TRUE;
+ return true;
}
static void
@@ -126,17 +126,17 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
space = NVC0_QUERY_ALLOC_SPACE;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
- q->is64bit = TRUE;
+ q->is64bit = true;
space = 512;
break;
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- q->is64bit = TRUE;
+ q->is64bit = true;
space = 64;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_PRIMITIVES_EMITTED:
- q->is64bit = TRUE;
+ q->is64bit = true;
q->index = index;
space = 32;
break;
@@ -257,11 +257,11 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nvc0_context *nvc0 = nvc0_context(pipe);
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_query *q = nvc0_query(pq);
- boolean ret = true;
+ bool ret = true;
/* For occlusion queries we have to change the storage, because a previous
- * query might set the initial render conition to FALSE even *after* we re-
- * initialized it to TRUE.
+ * query might set the initial render conition to false even *after* we re-
+ * initialized it to true.
*/
if (q->rotate) {
nvc0_query_rotate(nvc0, q);
@@ -270,7 +270,7 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
* query ?
*/
q->data[0] = q->sequence; /* initialize sequence */
- q->data[1] = 1; /* initial render condition = TRUE */
+ q->data[1] = 1; /* initial render condition = true */
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
q->data[5] = 0;
}
@@ -401,7 +401,7 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
- /* This query is not issued on GPU because disjoint is forced to FALSE */
+ /* This query is not issued on GPU because disjoint is forced to false */
q->state = NVC0_QUERY_STATE_READY;
break;
default:
@@ -422,7 +422,7 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
}
-static INLINE void
+static inline void
nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
{
if (q->is64bit) {
@@ -442,7 +442,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
struct nvc0_query *q = nvc0_query(pq);
uint64_t *res64 = (uint64_t*)result;
uint32_t *res32 = (uint32_t*)result;
- boolean *res8 = (boolean*)result;
+ uint8_t *res8 = (uint8_t*)result;
uint64_t *data64 = (uint64_t *)q->data;
unsigned i;
@@ -450,7 +450,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
q->type <= NVC0_QUERY_DRV_STAT_LAST) {
res64[0] = q->u.value;
- return TRUE;
+ return true;
} else
#endif
if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
@@ -468,17 +468,17 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
/* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
PUSH_KICK(nvc0->base.pushbuf);
}
- return FALSE;
+ return false;
}
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
- return FALSE;
+ return false;
NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
}
q->state = NVC0_QUERY_STATE_READY;
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
- res8[0] = TRUE;
+ res8[0] = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
res64[0] = q->data[1] - q->data[5];
@@ -502,7 +502,7 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
res64[0] = 1000000000;
- res8[8] = FALSE;
+ res8[8] = false;
break;
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
@@ -516,10 +516,10 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
break;
default:
assert(0); /* can't happen, we don't create queries with invalid type */
- return FALSE;
+ return false;
}
- return TRUE;
+ return true;
}
void
@@ -549,7 +549,7 @@ nvc0_render_condition(struct pipe_context *pipe,
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_query *q;
uint32_t cond;
- boolean wait =
+ bool wait =
mode != PIPE_RENDER_COND_NO_WAIT &&
mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
@@ -563,7 +563,7 @@ nvc0_render_condition(struct pipe_context *pipe,
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
cond = condition ? NVC0_3D_COND_MODE_EQUAL :
NVC0_3D_COND_MODE_NOT_EQUAL;
- wait = TRUE;
+ wait = true;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -626,12 +626,12 @@ nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
void
nvc0_so_target_save_offset(struct pipe_context *pipe,
struct pipe_stream_output_target *ptarg,
- unsigned index, boolean *serialize)
+ unsigned index, bool *serialize)
{
struct nvc0_so_target *targ = nvc0_so_target(ptarg);
if (*serialize) {
- *serialize = FALSE;
+ *serialize = false;
PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
@@ -1080,7 +1080,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_screen *screen = nvc0->screen;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
+ const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
const struct nvc0_mp_pm_query_cfg *cfg;
unsigned i, c;
unsigned num_ab[2] = { 0, 0 };
@@ -1101,7 +1101,7 @@ nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
PUSH_SPACE(push, 4 * 8 * (is_nve4 ? 1 : 6) + 6);
if (!screen->pm.mp_counters_enabled) {
- screen->pm.mp_counters_enabled = TRUE;
+ screen->pm.mp_counters_enabled = true;
BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
PUSH_DATA (push, 0x1fcb);
}
@@ -1168,7 +1168,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
struct nvc0_screen *screen = nvc0->screen;
struct pipe_context *pipe = &nvc0->base.pipe;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
+ const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
uint32_t mask;
uint32_t input[3];
const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
@@ -1181,7 +1181,7 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
if (unlikely(!screen->pm.prog)) {
struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
prog->type = PIPE_SHADER_COMPUTE;
- prog->translated = TRUE;
+ prog->translated = true;
prog->num_gprs = 14;
prog->parm_size = 12;
if (is_nve4) {
@@ -1249,9 +1249,9 @@ nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
}
}
-static INLINE boolean
+static inline bool
nvc0_mp_pm_query_read_data(uint32_t count[32][4],
- struct nvc0_context *nvc0, boolean wait,
+ struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
const struct nvc0_mp_pm_query_cfg *cfg,
unsigned mp_count)
@@ -1264,19 +1264,19 @@ nvc0_mp_pm_query_read_data(uint32_t count[32][4],
for (c = 0; c < cfg->num_counters; ++c) {
if (q->data[b + 8] != q->sequence) {
if (!wait)
- return FALSE;
+ return false;
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
- return FALSE;
+ return false;
}
count[p][c] = q->data[b + q->ctr[c]];
}
}
- return TRUE;
+ return true;
}
-static INLINE boolean
+static inline bool
nve4_mp_pm_query_read_data(uint32_t count[32][4],
- struct nvc0_context *nvc0, boolean wait,
+ struct nvc0_context *nvc0, bool wait,
struct nvc0_query *q,
const struct nvc0_mp_pm_query_cfg *cfg,
unsigned mp_count)
@@ -1291,9 +1291,9 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4],
for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
if (q->data[b + 20 + d] != q->sequence) {
if (!wait)
- return FALSE;
+ return false;
if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
- return FALSE;
+ return false;
}
if (q->ctr[c] & ~0x3)
count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
@@ -1302,7 +1302,7 @@ nve4_mp_pm_query_read_data(uint32_t count[32][4],
}
}
}
- return TRUE;
+ return true;
}
/* Metric calculations:
@@ -1325,7 +1325,7 @@ nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
unsigned p, c;
const struct nvc0_mp_pm_query_cfg *cfg;
- boolean ret;
+ bool ret;
cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
@@ -1334,7 +1334,7 @@ nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
else
ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
if (!ret)
- return FALSE;
+ return false;
if (cfg->op == NVC0_COUNTER_OPn_SUM) {
for (c = 0; c < cfg->num_counters; ++c)
@@ -1394,7 +1394,7 @@ nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
}
*(uint64_t *)result = value;
- return TRUE;
+ return true;
}
int
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 56c230e42fc..ab19b26f156 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -44,16 +44,16 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
unsigned bindings)
{
if (sample_count > 8)
- return FALSE;
+ return false;
if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
- return FALSE;
+ return false;
if (!util_format_is_supported(format, bindings))
- return FALSE;
+ return false;
if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER))
if (util_format_get_blocksizebits(format) == 3 * 32)
- return FALSE;
+ return false;
/* transfers & shared are always supported */
bindings &= ~(PIPE_BIND_TRANSFER_READ |
@@ -120,6 +120,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
case PIPE_CAP_ENDIANNESS:
return PIPE_ENDIAN_LITTLE;
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ return 30;
/* supported caps */
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
@@ -163,7 +165,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_USER_CONSTANT_BUFFERS:
case PIPE_CAP_USER_INDEX_BUFFERS:
case PIPE_CAP_USER_VERTEX_BUFFERS:
- case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
case PIPE_CAP_TEXTURE_QUERY_LOD:
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
@@ -174,11 +175,16 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_COMPUTE:
return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
/* unsupported caps */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
@@ -226,13 +232,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
switch (shader) {
case PIPE_SHADER_VERTEX:
- /*
- case PIPE_SHADER_TESSELLATION_CONTROL:
- case PIPE_SHADER_TESSELLATION_EVALUATION:
- */
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_FRAGMENT:
break;
+ case PIPE_SHADER_TESS_CTRL:
+ case PIPE_SHADER_TESS_EVAL:
+ if (class_3d >= GM107_3D_CLASS)
+ return 0;
+ break;
case PIPE_SHADER_COMPUTE:
if (class_3d != NVE4_3D_CLASS)
return 0;
@@ -341,6 +348,7 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
enum pipe_compute_cap param, void *data)
{
uint64_t *data64 = (uint64_t *)data;
+ uint32_t *data32 = (uint32_t *)data;
const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
switch (param) {
@@ -372,6 +380,9 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
data64[0] = 4096;
return 8;
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ data32[0] = 32;
+ return 4;
default:
return 0;
}
@@ -550,7 +561,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
/* Using COMPUTE has weird effects on 3D state, we need to
* investigate this further before enabling it by default.
*/
- if (debug_get_bool_option("NVC0_COMPUTE", FALSE))
+ if (debug_get_bool_option("NVC0_COMPUTE", false))
return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
return 0;
case 0xe0:
@@ -564,7 +575,7 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
}
}
-boolean
+bool
nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
uint32_t lpos, uint32_t lneg, uint32_t cstack)
{
@@ -574,7 +585,7 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
if (size >= (1 << 20)) {
NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
- return FALSE;
+ return false;
}
size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
@@ -587,11 +598,11 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
NULL, &bo);
if (ret) {
NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
- return FALSE;
+ return false;
}
nouveau_bo_ref(NULL, &screen->tls);
screen->tls = bo;
- return TRUE;
+ return true;
}
#define FAIL_SCREEN_INIT(str, err) \
@@ -610,6 +621,7 @@ nvc0_screen_create(struct nouveau_device *dev)
struct nouveau_pushbuf *push;
uint64_t value;
uint32_t obj_class;
+ uint32_t flags;
int ret;
unsigned i;
@@ -665,8 +677,11 @@ nvc0_screen_create(struct nouveau_device *dev)
screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
- ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL,
- &screen->fence.bo);
+ flags = NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
+ if (dev->drm_version >= 0x01000202)
+ flags |= NOUVEAU_BO_COHERENT;
+
+ ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, &screen->fence.bo);
if (ret)
goto fail;
nouveau_bo_map(screen->fence.bo, 0, NULL);
@@ -781,7 +796,7 @@ nvc0_screen_create(struct nouveau_device *dev)
BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1);
PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS);
- if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+ if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
/* kill shaders after about 1 second (at 100 MHz) */
BEGIN_NVC0(push, NVC0_3D(WATCHDOG_TIMER), 1);
PUSH_DATA (push, 0x17);
@@ -1012,6 +1027,7 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 0x20);
BEGIN_NVC0(push, NVC0_3D(SP_SELECT(0)), 1);
PUSH_DATA (push, 0x00);
+ screen->save_state.patch_vertices = 3;
BEGIN_NVC0(push, NVC0_3D(POINT_COORD_REPLACE), 1);
PUSH_DATA (push, 0);
@@ -1031,7 +1047,7 @@ nvc0_screen_create(struct nouveau_device *dev)
if (!nvc0_blitter_create(screen))
goto fail;
- nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
return pscreen;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index ef2bd43f006..d8826ae0c0d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -28,16 +28,17 @@ struct nvc0_context;
struct nvc0_blitter;
struct nvc0_graph_state {
- boolean flushed;
- boolean rasterizer_discard;
- boolean early_z_forced;
- boolean prim_restart;
+ bool flushed;
+ bool rasterizer_discard;
+ bool early_z_forced;
+ bool prim_restart;
uint32_t instance_elts; /* bitmask of per-instance elements */
uint32_t instance_base;
uint32_t constant_vbos;
uint32_t constant_elts;
int32_t index_bias;
uint16_t scissor;
+ uint8_t patch_vertices;
uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
@@ -95,7 +96,7 @@ struct nvc0_screen {
struct nvc0_program *prog; /* compute state object to read MP counters */
struct pipe_query *mp_counter[8]; /* counter to query allocation */
uint8_t num_mp_pm_active[2];
- boolean mp_counters_enabled;
+ bool mp_counters_enabled;
} pm;
struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
@@ -105,7 +106,7 @@ struct nvc0_screen {
struct nouveau_object *nvsw;
};
-static INLINE struct nvc0_screen *
+static inline struct nvc0_screen *
nvc0_screen(struct pipe_screen *screen)
{
return (struct nvc0_screen *)screen;
@@ -276,7 +277,7 @@ int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
int nvc0_screen_get_driver_query_group_info(struct pipe_screen *, unsigned,
struct pipe_driver_query_group_info *);
-boolean nvc0_blitter_create(struct nvc0_screen *);
+bool nvc0_blitter_create(struct nvc0_screen *);
void nvc0_blitter_destroy(struct nvc0_screen *);
void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
@@ -287,10 +288,10 @@ int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
-boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
- uint32_t lneg, uint32_t cstack);
+bool nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
+ uint32_t lneg, uint32_t cstack);
-static INLINE void
+static inline void
nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
{
struct nvc0_screen *screen = nvc0_screen(res->base.screen);
@@ -302,7 +303,7 @@ nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
}
}
-static INLINE void
+static inline void
nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
{
if (likely(res->bo)) {
@@ -325,21 +326,21 @@ struct nvc0_format {
extern const struct nvc0_format nvc0_format_table[];
-static INLINE void
+static inline void
nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
{
if (tic->id >= 0)
screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
}
-static INLINE void
+static inline void
nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
{
if (tsc->id >= 0)
screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
}
-static INLINE void
+static inline void
nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
{
if (tic->id >= 0) {
@@ -348,7 +349,7 @@ nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
}
}
-static INLINE void
+static inline void
nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
{
if (tsc->id >= 0) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index e0842784a88..8aa127adc0a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -27,7 +27,7 @@
#include "nvc0/nvc0_context.h"
-static INLINE void
+static inline void
nvc0_program_update_context_state(struct nvc0_context *nvc0,
struct nvc0_program *prog, int stage)
{
@@ -63,22 +63,22 @@ nvc0_program_update_context_state(struct nvc0_context *nvc0,
}
}
-static INLINE boolean
+static inline bool
nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
if (prog->mem)
- return TRUE;
+ return true;
if (!prog->translated) {
prog->translated = nvc0_program_translate(
prog, nvc0->screen->base.device->chipset);
if (!prog->translated)
- return FALSE;
+ return false;
}
if (likely(prog->code_size))
return nvc0_program_upload_code(nvc0, prog);
- return TRUE; /* stream output info only */
+ return true; /* stream output info only */
}
void
@@ -147,9 +147,6 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, tp->code_base);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
PUSH_DATA (push, tp->num_gprs);
-
- if (tp->tp.input_patch_size <= 32)
- IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
} else {
BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
PUSH_DATA (push, 0x20);
@@ -192,7 +189,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
/* we allow GPs with no code for specifying stream output state only */
if (gp && gp->code_size) {
- const boolean gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
+ const bool gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
PUSH_DATA (push, 0x41);
@@ -280,7 +277,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0)
nvc0_query_pushbuf_submit(push, targ->pq, 0x4);
} else {
PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
- targ->clean = FALSE;
+ targ->clean = false;
}
}
for (; b < 4; ++b)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 6b7a211e71b..2a33857d9df 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -35,7 +35,7 @@
#include "nouveau_gldefs.h"
-static INLINE uint32_t
+static inline uint32_t
nvc0_colormask(unsigned mask)
{
uint32_t ret = 0;
@@ -55,7 +55,7 @@ nvc0_colormask(unsigned mask)
#define NVC0_BLEND_FACTOR_CASE(a, b) \
case PIPE_BLENDFACTOR_##a: return NV50_BLEND_FACTOR_##b
-static INLINE uint32_t
+static inline uint32_t
nvc0_blend_fac(unsigned factor)
{
switch (factor) {
@@ -92,8 +92,8 @@ nvc0_blend_state_create(struct pipe_context *pipe,
int r; /* reference */
uint32_t ms;
uint8_t blend_en = 0;
- boolean indep_masks = FALSE;
- boolean indep_funcs = FALSE;
+ bool indep_masks = false;
+ bool indep_funcs = false;
so->pipe = *cso;
@@ -111,7 +111,7 @@ nvc0_blend_state_create(struct pipe_context *pipe,
cso->rt[i].alpha_func != cso->rt[r].alpha_func ||
cso->rt[i].alpha_src_factor != cso->rt[r].alpha_src_factor ||
cso->rt[i].alpha_dst_factor != cso->rt[r].alpha_dst_factor) {
- indep_funcs = TRUE;
+ indep_funcs = true;
break;
}
}
@@ -120,7 +120,7 @@ nvc0_blend_state_create(struct pipe_context *pipe,
for (i = 1; i < 8; ++i) {
if (cso->rt[i].colormask != cso->rt[0].colormask) {
- indep_masks = TRUE;
+ indep_masks = true;
break;
}
}
@@ -351,6 +351,13 @@ nvc0_zsa_state_create(struct pipe_context *pipe,
SB_DATA (so, nvgl_comparison_op(cso->depth.func));
}
+ SB_IMMED_3D(so, DEPTH_BOUNDS_EN, cso->depth.bounds_test);
+ if (cso->depth.bounds_test) {
+ SB_BEGIN_3D(so, DEPTH_BOUNDS(0), 2);
+ SB_DATA (so, fui(cso->depth.bounds_min));
+ SB_DATA (so, fui(cso->depth.bounds_max));
+ }
+
if (cso->stencil[0].enabled) {
SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
SB_DATA (so, 1);
@@ -428,7 +435,7 @@ nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
FREE(hwcso);
}
-static INLINE void
+static inline void
nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
unsigned nr, void **hwcso)
{
@@ -508,6 +515,14 @@ nvc0_bind_sampler_states(struct pipe_context *pipe, unsigned shader,
assert(start == 0);
nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
break;
+ case PIPE_SHADER_TESS_CTRL:
+ assert(start == 0);
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 1, nr, s);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ assert(start == 0);
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 2, nr, s);
+ break;
case PIPE_SHADER_GEOMETRY:
assert(start == 0);
nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
@@ -537,7 +552,7 @@ nvc0_sampler_view_destroy(struct pipe_context *pipe,
FREE(nv50_tic_entry(view));
}
-static INLINE void
+static inline void
nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
unsigned nr,
struct pipe_sampler_view **views)
@@ -633,6 +648,12 @@ nvc0_set_sampler_views(struct pipe_context *pipe, unsigned shader,
case PIPE_SHADER_VERTEX:
nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
break;
+ case PIPE_SHADER_TESS_CTRL:
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 1, nr, views);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 2, nr, views);
+ break;
case PIPE_SHADER_GEOMETRY:
nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
break;
@@ -733,6 +754,38 @@ nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
nvc0->dirty |= NVC0_NEW_GMTYPROG;
}
+static void *
+nvc0_tcp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_TESS_CTRL);
+}
+
+static void
+nvc0_tcp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->tctlprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_TCTLPROG;
+}
+
+static void *
+nvc0_tep_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_TESS_EVAL);
+}
+
+static void
+nvc0_tep_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->tevlprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_TEVLPROG;
+}
+
static void *
nvc0_cp_state_create(struct pipe_context *pipe,
const struct pipe_compute_state *cso)
@@ -790,7 +843,7 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
- nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+ nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
if (nvc0->constbuf[s][i].user) {
nvc0->constbuf[s][i].u.data = cb->user_buffer;
nvc0->constbuf[s][i].size = MIN2(cb->buffer_size, 0x10000);
@@ -933,6 +986,18 @@ nvc0_set_viewport_states(struct pipe_context *pipe,
}
+static void
+nvc0_set_tess_state(struct pipe_context *pipe,
+ const float default_tess_outer[4],
+ const float default_tess_inner[2])
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ memcpy(nvc0->default_tess_outer, default_tess_outer, 4 * sizeof(float));
+ memcpy(nvc0->default_tess_inner, default_tess_inner, 2 * sizeof(float));
+ nvc0->dirty |= NVC0_NEW_TESSFACTOR;
+}
+
static void
nvc0_set_vertex_buffers(struct pipe_context *pipe,
unsigned start_slot, unsigned count,
@@ -1018,7 +1083,7 @@ nvc0_so_target_create(struct pipe_context *pipe,
FREE(targ);
return NULL;
}
- targ->clean = TRUE;
+ targ->clean = true;
targ->pipe.buffer_size = size;
targ->pipe.buffer_offset = offset;
@@ -1051,13 +1116,13 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
unsigned i;
- boolean serialize = TRUE;
+ bool serialize = true;
assert(num_targets <= 4);
for (i = 0; i < num_targets; ++i) {
- const boolean changed = nvc0->tfbbuf[i] != targets[i];
- const boolean append = (offsets[i] == ((unsigned)-1));
+ const bool changed = nvc0->tfbbuf[i] != targets[i];
+ const bool append = (offsets[i] == ((unsigned)-1));
if (!changed && append)
continue;
nvc0->tfbbuf_dirty |= 1 << i;
@@ -1066,7 +1131,7 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
if (targets[i] && !append)
- nvc0_so_target(targets[i])->clean = TRUE;
+ nvc0_so_target(targets[i])->clean = true;
pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]);
}
@@ -1125,16 +1190,18 @@ nvc0_set_compute_resources(struct pipe_context *pipe,
}
static void
-nvc0_set_shader_resources(struct pipe_context *pipe,
- unsigned start, unsigned nr,
- struct pipe_surface **resources)
+nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
+ unsigned start_slot, unsigned count,
+ struct pipe_image_view **views)
{
- nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources);
+#if 0
+ nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
+#endif
}
-static INLINE void
+static inline void
nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
{
struct nv04_resource *buf = nv04_resource(res);
@@ -1218,12 +1285,18 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->create_vs_state = nvc0_vp_state_create;
pipe->create_fs_state = nvc0_fp_state_create;
pipe->create_gs_state = nvc0_gp_state_create;
+ pipe->create_tcs_state = nvc0_tcp_state_create;
+ pipe->create_tes_state = nvc0_tep_state_create;
pipe->bind_vs_state = nvc0_vp_state_bind;
pipe->bind_fs_state = nvc0_fp_state_bind;
pipe->bind_gs_state = nvc0_gp_state_bind;
+ pipe->bind_tcs_state = nvc0_tcp_state_bind;
+ pipe->bind_tes_state = nvc0_tep_state_bind;
pipe->delete_vs_state = nvc0_sp_state_delete;
pipe->delete_fs_state = nvc0_sp_state_delete;
pipe->delete_gs_state = nvc0_sp_state_delete;
+ pipe->delete_tcs_state = nvc0_sp_state_delete;
+ pipe->delete_tes_state = nvc0_sp_state_delete;
pipe->create_compute_state = nvc0_cp_state_create;
pipe->bind_compute_state = nvc0_cp_state_bind;
@@ -1239,6 +1312,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
pipe->set_scissor_states = nvc0_set_scissor_states;
pipe->set_viewport_states = nvc0_set_viewport_states;
+ pipe->set_tess_state = nvc0_set_tess_state;
pipe->create_vertex_elements_state = nvc0_vertex_state_create;
pipe->delete_vertex_elements_state = nvc0_vertex_state_delete;
@@ -1253,8 +1327,14 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_global_binding = nvc0_set_global_bindings;
pipe->set_compute_resources = nvc0_set_compute_resources;
- pipe->set_shader_resources = nvc0_set_shader_resources;
+ pipe->set_shader_images = nvc0_set_shader_images;
nvc0->sample_mask = ~0;
nvc0->min_samples = 1;
+ nvc0->default_tess_outer[0] =
+ nvc0->default_tess_outer[1] =
+ nvc0->default_tess_outer[2] =
+ nvc0->default_tess_outer[3] = 1.0;
+ nvc0->default_tess_inner[0] =
+ nvc0->default_tess_inner[1] = 1.0;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index c52399ab312..ce1119c284d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -55,7 +55,7 @@ nvc0_validate_zcull(struct nvc0_context *nvc0)
}
#endif
-static INLINE void
+static inline void
nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i)
{
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 6);
@@ -74,7 +74,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
unsigned i, ms;
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
- boolean serialize = FALSE;
+ bool serialize = false;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
@@ -136,7 +136,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
}
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
- serialize = TRUE;
+ serialize = true;
res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
@@ -168,7 +168,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
ms_mode = mt->ms_mode;
if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
- serialize = TRUE;
+ serialize = true;
mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
@@ -309,7 +309,7 @@ nvc0_validate_viewport(struct nvc0_context *nvc0)
nvc0->viewports_dirty = 0;
}
-static INLINE void
+static inline void
nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -324,7 +324,7 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
}
-static INLINE void
+static inline void
nvc0_check_program_ucps(struct nvc0_context *nvc0,
struct nvc0_program *vp, uint8_t mask)
{
@@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0,
nvc0_vertprog_validate(nvc0);
else
if (likely(vp == nvc0->gmtyprog))
- nvc0_vertprog_validate(nvc0);
+ nvc0_gmtyprog_validate(nvc0);
else
nvc0_tevlprog_validate(nvc0);
}
@@ -455,6 +455,8 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
PUSH_DATA (push, (i << 4) | 1);
BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
+
+ nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
} else {
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
PUSH_DATA (push, (i << 4) | 0);
@@ -518,12 +520,12 @@ static void
nvc0_validate_derived_1(struct nvc0_context *nvc0)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
- boolean rasterizer_discard;
+ bool rasterizer_discard;
if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
- rasterizer_discard = TRUE;
+ rasterizer_discard = true;
} else {
- boolean zs = nvc0->zsa &&
+ bool zs = nvc0->zsa &&
(nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled);
rasterizer_discard = !zs &&
(!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
@@ -535,6 +537,33 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0)
}
}
+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nvc0_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nvc0_validate_derived_2(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled &&
+ nvc0->framebuffer.nr_cbufs == 0) {
+ nvc0_fb_set_null_rt(push, 0);
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | 1);
+ }
+}
+
+static void
+nvc0_validate_tess_state(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ BEGIN_NVC0(push, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);
+ PUSH_DATAp(push, nvc0->default_tess_outer, 4);
+ PUSH_DATAp(push, nvc0->default_tess_inner, 2);
+}
+
static void
nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
{
@@ -593,10 +622,12 @@ static struct state_validate {
{ nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
{ nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
+ { nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
{ nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
NVC0_NEW_RASTERIZER },
+ { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
{ nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
NVC0_NEW_VERTPROG |
NVC0_NEW_TEVLPROG |
@@ -613,7 +644,7 @@ static struct state_validate {
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
-boolean
+bool
nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, unsigned words)
{
uint32_t state_mask;
@@ -634,15 +665,15 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, unsigned words)
}
nvc0->dirty &= ~state_mask;
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, FALSE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, false);
}
nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_3d);
ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);
if (unlikely(nvc0->state.flushed)) {
- nvc0->state.flushed = FALSE;
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, TRUE);
+ nvc0->state.flushed = false;
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, true);
}
return !ret;
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
index 1d70b7c7b23..18fcc12dea3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
@@ -29,7 +29,7 @@ struct nvc0_rasterizer_stateobj {
struct nvc0_zsa_stateobj {
struct pipe_depth_stencil_alpha_state pipe;
int size;
- uint32_t state[26];
+ uint32_t state[30];
};
struct nvc0_constbuf {
@@ -39,7 +39,7 @@ struct nvc0_constbuf {
} u;
uint32_t size;
uint32_t offset;
- boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+ bool user; /* should only be true if u.data is valid and non-NULL */
};
struct nvc0_vertex_element {
@@ -55,8 +55,8 @@ struct nvc0_vertex_stateobj {
unsigned num_elements;
uint32_t instance_elts;
uint32_t instance_bufs;
- boolean shared_slots;
- boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
+ bool shared_slots;
+ bool need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
unsigned size; /* size of vertex in bytes (when packed) */
struct nvc0_vertex_element element[0];
};
@@ -65,10 +65,10 @@ struct nvc0_so_target {
struct pipe_stream_output_target pipe;
struct pipe_query *pq;
unsigned stride;
- boolean clean;
+ bool clean;
};
-static INLINE struct nvc0_so_target *
+static inline struct nvc0_so_target *
nvc0_so_target(struct pipe_stream_output_target *ptarg)
{
return (struct nvc0_so_target *)ptarg;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index a820de7259a..51a6f93f891 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -47,8 +47,8 @@
#define NOUVEAU_DRIVER 0xc0
#include "nv50/nv50_blit.h"
-static INLINE uint8_t
-nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+static inline uint8_t
+nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
{
uint8_t id = nvc0_format_table[format].rt;
@@ -81,9 +81,9 @@ nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
}
static int
-nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst,
+nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst,
struct nv50_miptree *mt, unsigned level, unsigned layer,
- enum pipe_format pformat, boolean dst_src_pformat_equal)
+ enum pipe_format pformat, bool dst_src_pformat_equal)
{
struct nouveau_bo *bo = mt->base.bo;
uint32_t width, height, depth;
@@ -161,16 +161,16 @@ nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
const enum pipe_format dfmt = dst->base.base.format;
const enum pipe_format sfmt = src->base.base.format;
int ret;
- boolean eqfmt = dfmt == sfmt;
+ bool eqfmt = dfmt == sfmt;
if (!PUSH_SPACE(push, 2 * 16 + 32))
return PIPE_ERROR;
- ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt);
+ ret = nvc0_2d_texture_set(push, true, dst, dst_level, dz, dfmt, eqfmt);
if (ret)
return ret;
- ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt);
+ ret = nvc0_2d_texture_set(push, false, src, src_level, sz, sfmt, eqfmt);
if (ret)
return ret;
@@ -189,7 +189,7 @@ nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
PUSH_DATA (push, 0);
PUSH_DATA (push, sx << src->ms_x);
PUSH_DATA (push, 0);
- PUSH_DATA (push, sy << src->ms_x);
+ PUSH_DATA (push, sy << src->ms_y);
return 0;
}
@@ -203,7 +203,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
int ret;
- boolean m2mf;
+ bool m2mf;
unsigned dst_layer = dstz, src_layer = src_box->z;
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
@@ -704,7 +704,7 @@ nvc0_blitter_make_vp(struct nvc0_blitter *blit)
};
blit->vp.type = PIPE_SHADER_VERTEX;
- blit->vp.translated = TRUE;
+ blit->vp.translated = true;
if (blit->screen->base.class_3d >= GM107_3D_CLASS) {
blit->vp.code = (uint32_t *)code_gm107; /* const_cast */
blit->vp.code_size = sizeof(code_gm107);
@@ -1217,7 +1217,7 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
int i;
uint32_t mode;
uint32_t mask = nv50_blit_eng2d_get_mask(info);
- boolean b;
+ bool b;
mode = nv50_blit_get_filter(info) ?
NV50_2D_BLIT_CONTROL_FILTER_BILINEAR :
@@ -1376,39 +1376,40 @@ static void
nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
- boolean eng3d = FALSE;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ bool eng3d = false;
if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
if (!(info->mask & PIPE_MASK_ZS))
return;
if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
- eng3d = TRUE;
+ eng3d = true;
if (info->filter != PIPE_TEX_FILTER_NEAREST)
- eng3d = TRUE;
+ eng3d = true;
} else {
if (!(info->mask & PIPE_MASK_RGBA))
return;
if (info->mask != PIPE_MASK_RGBA)
- eng3d = TRUE;
+ eng3d = true;
}
if (nv50_miptree(info->src.resource)->layout_3d) {
- eng3d = TRUE;
+ eng3d = true;
} else
if (info->src.box.depth != info->dst.box.depth) {
- eng3d = TRUE;
+ eng3d = true;
debug_printf("blit: cannot filter array or cube textures in z direction");
}
if (!eng3d && info->dst.format != info->src.format) {
if (!nv50_2d_dst_format_faithful(info->dst.format)) {
- eng3d = TRUE;
+ eng3d = true;
} else
if (!nv50_2d_src_format_faithful(info->src.format)) {
if (!util_format_is_luminance(info->src.format)) {
if (!nv50_2d_dst_format_ops_supported(info->dst.format))
- eng3d = TRUE;
+ eng3d = true;
else
if (util_format_is_intensity(info->src.format))
eng3d = info->src.format != PIPE_FORMAT_I8_UNORM;
@@ -1420,30 +1421,36 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
}
} else
if (util_format_is_luminance_alpha(info->src.format))
- eng3d = TRUE;
+ eng3d = true;
}
if (info->src.resource->nr_samples == 8 &&
info->dst.resource->nr_samples <= 1)
- eng3d = TRUE;
+ eng3d = true;
#if 0
/* FIXME: can't make this work with eng2d anymore, at least not on nv50 */
if (info->src.resource->nr_samples > 1 ||
info->dst.resource->nr_samples > 1)
- eng3d = TRUE;
+ eng3d = true;
#endif
/* FIXME: find correct src coordinates adjustments */
if ((info->src.box.width != info->dst.box.width &&
info->src.box.width != -info->dst.box.width) ||
(info->src.box.height != info->dst.box.height &&
info->src.box.height != -info->dst.box.height))
- eng3d = TRUE;
+ eng3d = true;
+
+ if (nvc0->screen->num_occlusion_queries_active)
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
if (!eng3d)
nvc0_blit_eng2d(nvc0, info);
else
nvc0_blit_3d(nvc0, info);
+ if (nvc0->screen->num_occlusion_queries_active)
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
+
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1);
}
@@ -1453,13 +1460,13 @@ nvc0_flush_resource(struct pipe_context *ctx,
{
}
-boolean
+bool
nvc0_blitter_create(struct nvc0_screen *screen)
{
screen->blitter = CALLOC_STRUCT(nvc0_blitter);
if (!screen->blitter) {
NOUVEAU_ERR("failed to allocate blitter struct\n");
- return FALSE;
+ return false;
}
screen->blitter->screen = screen;
@@ -1468,7 +1475,7 @@ nvc0_blitter_create(struct nvc0_screen *screen)
nvc0_blitter_make_vp(screen->blitter);
nvc0_blitter_make_sampler(screen->blitter);
- return TRUE;
+ return true;
}
void
@@ -1491,20 +1498,20 @@ nvc0_blitter_destroy(struct nvc0_screen *screen)
FREE(blitter);
}
-boolean
+bool
nvc0_blitctx_create(struct nvc0_context *nvc0)
{
nvc0->blit = CALLOC_STRUCT(nvc0_blitctx);
if (!nvc0->blit) {
NOUVEAU_ERR("failed to allocate blit context\n");
- return FALSE;
+ return false;
}
nvc0->blit->nvc0 = nvc0;
nvc0->blit->rast.pipe.half_pixel_center = 1;
- return TRUE;
+ return true;
}
void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index ddc0409ca86..d19082e0e15 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -34,8 +34,8 @@
(NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
-static INLINE uint32_t
-nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+static inline uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int)
{
switch (swz) {
case PIPE_SWIZZLE_RED:
@@ -82,7 +82,7 @@ nvc0_create_texture_view(struct pipe_context *pipe,
uint32_t depth;
struct nv50_tic_entry *view;
struct nv50_miptree *mt;
- boolean tex_int;
+ bool tex_int;
view = MALLOC_STRUCT(nv50_tic_entry);
if (!view)
@@ -195,7 +195,7 @@ nvc0_create_texture_view(struct pipe_context *pipe,
default:
NOUVEAU_ERR("unexpected/invalid texture target: %d\n",
mt->base.base.target);
- return FALSE;
+ return false;
}
tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
@@ -226,7 +226,7 @@ nvc0_create_texture_view(struct pipe_context *pipe,
return &view->pipe;
}
-static boolean
+static bool
nvc0_validate_tic(struct nvc0_context *nvc0, int s)
{
uint32_t commands[32];
@@ -234,12 +234,12 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
struct nouveau_bo *txc = nvc0->screen->txc;
unsigned i;
unsigned n = 0;
- boolean need_flush = FALSE;
+ bool need_flush = false;
for (i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
struct nv04_resource *res;
- const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+ const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
if (!tic) {
if (dirty)
@@ -263,7 +263,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
BEGIN_NIC0(push, NVC0_M2MF(DATA), 8);
PUSH_DATAp(push, &tic->tic[0], 8);
- need_flush = TRUE;
+ need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
@@ -295,18 +295,18 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
return need_flush;
}
-static boolean
+static bool
nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
{
struct nouveau_bo *txc = nvc0->screen->txc;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
unsigned i;
- boolean need_flush = FALSE;
+ bool need_flush = false;
for (i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
struct nv04_resource *res;
- const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+ const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
if (!tic) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
@@ -328,7 +328,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
PUSH_DATA (push, 0x1001);
PUSH_DATAp(push, &tic->tic[0], 8);
- need_flush = TRUE;
+ need_flush = true;
} else
if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
@@ -356,16 +356,14 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
void nvc0_validate_textures(struct nvc0_context *nvc0)
{
- boolean need_flush;
+ bool need_flush = false;
+ int i;
- if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
- need_flush = nve4_validate_tic(nvc0, 0);
- need_flush |= nve4_validate_tic(nvc0, 3);
- need_flush |= nve4_validate_tic(nvc0, 4);
- } else {
- need_flush = nvc0_validate_tic(nvc0, 0);
- need_flush |= nvc0_validate_tic(nvc0, 3);
- need_flush |= nvc0_validate_tic(nvc0, 4);
+ for (i = 0; i < 5; i++) {
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+ need_flush |= nve4_validate_tic(nvc0, i);
+ else
+ need_flush |= nvc0_validate_tic(nvc0, i);
}
if (need_flush) {
@@ -374,14 +372,14 @@ void nvc0_validate_textures(struct nvc0_context *nvc0)
}
}
-static boolean
+static bool
nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
{
uint32_t commands[16];
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
unsigned i;
unsigned n = 0;
- boolean need_flush = FALSE;
+ bool need_flush = false;
for (i = 0; i < nvc0->num_samplers[s]; ++i) {
struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
@@ -398,7 +396,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
32, tsc->tsc);
- need_flush = TRUE;
+ need_flush = true;
}
nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
@@ -418,13 +416,13 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
return need_flush;
}
-boolean
+bool
nve4_validate_tsc(struct nvc0_context *nvc0, int s)
{
struct nouveau_bo *txc = nvc0->screen->txc;
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
unsigned i;
- boolean need_flush = FALSE;
+ bool need_flush = false;
for (i = 0; i < nvc0->num_samplers[s]; ++i) {
struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
@@ -447,7 +445,7 @@ nve4_validate_tsc(struct nvc0_context *nvc0, int s)
PUSH_DATA (push, 0x1001);
PUSH_DATAp(push, &tsc->tsc[0], 8);
- need_flush = TRUE;
+ need_flush = true;
}
nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
@@ -466,16 +464,14 @@ nve4_validate_tsc(struct nvc0_context *nvc0, int s)
void nvc0_validate_samplers(struct nvc0_context *nvc0)
{
- boolean need_flush;
+ bool need_flush = false;
+ int i;
- if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
- need_flush = nve4_validate_tsc(nvc0, 0);
- need_flush |= nve4_validate_tsc(nvc0, 3);
- need_flush |= nve4_validate_tsc(nvc0, 4);
- } else {
- need_flush = nvc0_validate_tsc(nvc0, 0);
- need_flush |= nvc0_validate_tsc(nvc0, 3);
- need_flush |= nvc0_validate_tsc(nvc0, 4);
+ for (i = 0; i < 5; i++) {
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+ need_flush |= nve4_validate_tsc(nvc0, i);
+ else
+ need_flush |= nvc0_validate_tsc(nvc0, i);
}
if (need_flush) {
@@ -645,13 +641,13 @@ nve4_set_surface_info(struct nouveau_pushbuf *push,
}
}
-static INLINE void
+static inline void
nvc0_update_surface_bindings(struct nvc0_context *nvc0)
{
/* TODO */
}
-static INLINE void
+static inline void
nve4_update_surface_bindings(struct nvc0_context *nvc0)
{
/* TODO */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index 45c6f7cc3ca..7cc5b4b1f48 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -329,17 +329,17 @@ nve4_m2mf_copy_linear(struct nouveau_context *nv,
}
-static INLINE boolean
+static inline bool
nvc0_mt_transfer_can_map_directly(struct nv50_miptree *mt)
{
if (mt->base.domain == NOUVEAU_BO_VRAM)
- return FALSE;
+ return false;
if (mt->base.base.usage != PIPE_USAGE_STAGING)
- return FALSE;
+ return false;
return !nouveau_bo_memtype(mt->base.bo);
}
-static INLINE boolean
+static inline bool
nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
{
if (!mt->base.mm) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 8cf2584b0ce..6f9e7906713 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -61,8 +61,8 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
so->num_elements = num_elements;
so->instance_elts = 0;
so->instance_bufs = 0;
- so->shared_slots = FALSE;
- so->need_conversion = FALSE;
+ so->shared_slots = false;
+ so->need_conversion = false;
memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
@@ -93,7 +93,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
return NULL;
}
so->element[i].state = nvc0_format_table[fmt].vtx;
- so->need_conversion = TRUE;
+ so->need_conversion = true;
}
size = util_format_get_blocksize(fmt);
@@ -141,7 +141,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
if (so->instance_elts || src_offset_max >= (1 << 14))
return so;
- so->shared_slots = TRUE;
+ so->shared_slots = true;
for (i = 0; i < num_elements; ++i) {
const unsigned b = elements[i].vertex_buffer_index;
@@ -196,7 +196,7 @@ nvc0_set_constant_vertex_attrib(struct nvc0_context *nvc0, const unsigned a)
push->cur += 5;
}
-static INLINE void
+static inline void
nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi,
uint32_t *base, uint32_t *size)
{
@@ -214,7 +214,7 @@ nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi,
}
}
-static INLINE void
+static inline void
nvc0_release_user_vbufs(struct nvc0_context *nvc0)
{
if (nvc0->vbo_user) {
@@ -265,7 +265,7 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0)
PUSH_DATAh(push, address[b] + ve->src_offset);
PUSH_DATA (push, address[b] + ve->src_offset);
}
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
}
static void
@@ -419,7 +419,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
uint32_t const_vbos;
unsigned i;
uint8_t vbo_mode;
- boolean update_vertex;
+ bool update_vertex;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
@@ -529,7 +529,7 @@ nvc0_idxbuf_validate(struct nvc0_context *nvc0)
#define NVC0_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
-static INLINE unsigned
+static inline unsigned
nvc0_prim_gl(unsigned prim)
{
switch (prim) {
@@ -547,8 +547,7 @@ nvc0_prim_gl(unsigned prim)
NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
- /*
- NVC0_PRIM_GL_CASE(PATCHES); */
+ NVC0_PRIM_GL_CASE(PATCHES);
default:
return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
}
@@ -559,7 +558,7 @@ nvc0_draw_vbo_kick_notify(struct nouveau_pushbuf *push)
{
struct nvc0_screen *screen = push->user_priv;
- nouveau_fence_update(&screen->base, TRUE);
+ nouveau_fence_update(&screen->base, true);
NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
}
@@ -695,7 +694,7 @@ nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
}
static void
-nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
+nvc0_draw_elements(struct nvc0_context *nvc0, bool shorten,
unsigned mode, unsigned start, unsigned count,
unsigned instance_count, int32_t index_bias)
{
@@ -835,8 +834,8 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | size);
}
-static INLINE void
-nvc0_update_prim_restart(struct nvc0_context *nvc0, boolean en, uint32_t index)
+static inline void
+nvc0_update_prim_restart(struct nvc0_context *nvc0, bool en, uint32_t index)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -889,6 +888,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
}
}
+ if (info->mode == PIPE_PRIM_PATCHES &&
+ nvc0->state.patch_vertices != info->vertices_per_patch) {
+ nvc0->state.patch_vertices = info->vertices_per_patch;
+ IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices);
+ }
+
/* 8 as minimum to avoid immediate double validation of new buffers */
nvc0_state_validate(nvc0, ~0, 8);
@@ -910,13 +915,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
continue;
if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nvc0->cb_dirty = TRUE;
+ nvc0->cb_dirty = true;
}
}
if (nvc0->cb_dirty) {
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
- nvc0->cb_dirty = FALSE;
+ nvc0->cb_dirty = false;
}
if (nvc0->state.vbo_mode) {
@@ -940,19 +945,19 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (!nvc0->vtxbuf[i].buffer)
continue;
if (nvc0->vtxbuf[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
}
if (!nvc0->base.vbo_dirty && nvc0->idxbuf.buffer &&
nvc0->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
- nvc0->base.vbo_dirty = TRUE;
+ nvc0->base.vbo_dirty = true;
nvc0_update_prim_restart(nvc0, info->primitive_restart, info->restart_index);
if (nvc0->base.vbo_dirty) {
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
- nvc0->base.vbo_dirty = FALSE;
+ nvc0->base.vbo_dirty = false;
}
if (unlikely(info->indirect)) {
@@ -962,10 +967,10 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0_draw_stream_output(nvc0, info);
} else
if (info->indexed) {
- boolean shorten = info->max_index <= 65535;
+ bool shorten = info->max_index <= 65535;
if (info->primitive_restart && info->restart_index > 65535)
- shorten = FALSE;
+ shorten = false;
nvc0_draw_elements(nvc0, shorten,
info->mode, info->start, info->count,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
index f180087161d..8b23a4887da 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -21,12 +21,12 @@ struct push_context {
uint32_t restart_index;
uint32_t instance_id;
- boolean prim_restart;
- boolean need_vertex_id;
+ bool prim_restart;
+ bool need_vertex_id;
struct {
- boolean enabled;
- boolean value;
+ bool enabled;
+ bool value;
unsigned stride;
const uint8_t *data;
} edgeflag;
@@ -47,7 +47,7 @@ nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
ctx->need_vertex_id =
nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32);
- ctx->edgeflag.value = TRUE;
+ ctx->edgeflag.value = true;
ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS;
/* silence warnings */
@@ -55,7 +55,7 @@ nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
ctx->edgeflag.stride = 0;
}
-static INLINE void
+static inline void
nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias)
{
struct translate *translate = nvc0->vertex->translate;
@@ -78,7 +78,7 @@ nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias)
}
}
-static INLINE void
+static inline void
nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0)
{
if (nvc0->idxbuf.buffer) {
@@ -90,7 +90,7 @@ nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0)
}
}
-static INLINE void
+static inline void
nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
int32_t index_bias)
{
@@ -112,7 +112,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
ctx->edgeflag.data += (intptr_t)index_bias * vb->stride;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index)
{
unsigned i;
@@ -120,7 +120,7 @@ prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index)
{
unsigned i;
@@ -128,7 +128,7 @@ prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index)
return i;
}
-static INLINE unsigned
+static inline unsigned
prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
{
unsigned i;
@@ -136,21 +136,21 @@ prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
return i;
}
-static INLINE boolean
+static inline bool
ef_value(const struct push_context *ctx, uint32_t index)
{
float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
- return *pf ? TRUE : FALSE;
+ return *pf ? true : false;
}
-static INLINE boolean
+static inline bool
ef_toggle(struct push_context *ctx)
{
ctx->edgeflag.value = !ctx->edgeflag.value;
return ctx->edgeflag.value;
}
-static INLINE unsigned
+static inline unsigned
ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
{
unsigned i;
@@ -158,7 +158,7 @@ ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
return i;
}
-static INLINE unsigned
+static inline unsigned
ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
{
unsigned i;
@@ -166,7 +166,7 @@ ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
return i;
}
-static INLINE unsigned
+static inline unsigned
ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
{
unsigned i;
@@ -174,7 +174,7 @@ ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
return i;
}
-static INLINE unsigned
+static inline unsigned
ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
{
unsigned i;
@@ -182,7 +182,7 @@ ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
return i;
}
-static INLINE void *
+static inline void *
nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
{
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
@@ -409,7 +409,7 @@ disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
#define NVC0_PRIM_GL_CASE(n) \
case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
-static INLINE unsigned
+static inline unsigned
nvc0_prim_gl(unsigned prim)
{
switch (prim) {
@@ -427,8 +427,7 @@ nvc0_prim_gl(unsigned prim)
NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
- /*
- NVC0_PRIM_GL_CASE(PATCHES); */
+ NVC0_PRIM_GL_CASE(PATCHES);
default:
return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
}
@@ -483,7 +482,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
struct pipe_context *pipe = &nvc0->base.pipe;
struct nvc0_so_target *targ;
targ = nvc0_so_target(info->count_from_stream_output);
- pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ pipe->get_query_result(pipe, targ->pq, true, (void *)&vert_count);
vert_count /= targ->stride;
}
ctx.idxbuf = NULL; /* shut up warnings */
@@ -560,7 +559,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1);
}
-static INLINE void
+static inline void
copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n)
{
unsigned i;
@@ -568,7 +567,7 @@ copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n)
dst[i] = elts[i] + bias;
}
-static INLINE void
+static inline void
copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n)
{
unsigned i;
@@ -576,7 +575,7 @@ copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n)
dst[i] = elts[i] + bias;
}
-static INLINE void
+static inline void
copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n)
{
unsigned i;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
index 725e889683f..4ea8ca3cfa2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
@@ -15,14 +15,14 @@
#endif
-static INLINE void
+static inline void
nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
unsigned flags, struct nouveau_bo *bo)
{
nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
}
-static INLINE void
+static inline void
nvc0_add_resident(struct nouveau_bufctx *bufctx, int bin,
struct nv04_resource *res, unsigned flags)
{
@@ -38,7 +38,7 @@ nvc0_add_resident(struct nouveau_bufctx *bufctx, int bin,
#define BCTX_REFN(bctx, bin, res, acc) \
nvc0_add_resident(bctx, NVC0_BIND_##bin, res, NOUVEAU_BO_##acc)
-static INLINE void
+static inline void
PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
{
struct nouveau_pushbuf_refn ref = { bo, flags };
@@ -69,46 +69,46 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define NVC0_3D_SERIALIZE NV50_GRAPH_SERIALIZE
-static INLINE uint32_t
+static inline uint32_t
NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size)
{
return 0x20000000 | (size << 16) | (subc << 13) | (mthd >> 2);
}
-static INLINE uint32_t
+static inline uint32_t
NVC0_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
{
return 0x60000000 | (size << 16) | (subc << 13) | (mthd >> 2);
}
-static INLINE uint32_t
+static inline uint32_t
NVC0_FIFO_PKHDR_IL(int subc, int mthd, uint16_t data)
{
assert(data < 0x2000);
return 0x80000000 | (data << 16) | (subc << 13) | (mthd >> 2);
}
-static INLINE uint32_t
+static inline uint32_t
NVC0_FIFO_PKHDR_1I(int subc, int mthd, unsigned size)
{
return 0xa0000000 | (size << 16) | (subc << 13) | (mthd >> 2);
}
-static INLINE uint8_t
+static inline uint8_t
nouveau_bo_memtype(const struct nouveau_bo *bo)
{
return bo->config.nvc0.memtype;
}
-static INLINE void
+static inline void
PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
{
*push->cur++ = (uint32_t)(data >> 32);
}
-static INLINE void
+static inline void
BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
@@ -117,7 +117,7 @@ BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(subc, mthd, size));
}
-static INLINE void
+static inline void
BEGIN_NIC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
@@ -126,7 +126,7 @@ BEGIN_NIC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
PUSH_DATA (push, NVC0_FIFO_PKHDR_NI(subc, mthd, size));
}
-static INLINE void
+static inline void
BEGIN_1IC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
{
#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
@@ -135,7 +135,7 @@ BEGIN_1IC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
PUSH_DATA (push, NVC0_FIFO_PKHDR_1I(subc, mthd, size));
}
-static INLINE void
+static inline void
IMMED_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, uint16_t data)
{
#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index fce02a7cc57..d3e5676873e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -250,7 +250,7 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
static void
nve4_compute_validate_samplers(struct nvc0_context *nvc0)
{
- boolean need_flush = nve4_validate_tsc(nvc0, 5);
+ bool need_flush = nve4_validate_tsc(nvc0, 5);
if (need_flush) {
BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, 0);
@@ -299,11 +299,11 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
}
-static boolean
+static bool
nve4_compute_state_validate(struct nvc0_context *nvc0)
{
if (!nvc0_compute_validate_program(nvc0))
- return FALSE;
+ return false;
if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
nve4_compute_validate_textures(nvc0);
if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
@@ -316,15 +316,15 @@ nve4_compute_state_validate(struct nvc0_context *nvc0)
nvc0_validate_global_residents(nvc0,
nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
- return FALSE;
+ return false;
if (unlikely(nvc0->state.flushed))
- nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, true);
- return TRUE;
+ return true;
}
@@ -364,7 +364,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
-static INLINE uint8_t
+static inline uint8_t
nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
{
if (shared_size > (32 << 10))
@@ -413,7 +413,7 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
}
-static INLINE struct nve4_cp_launch_desc *
+static inline struct nve4_cp_launch_desc *
nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
struct nouveau_bo **pbo, uint64_t *pgpuaddr)
{
@@ -505,7 +505,7 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
for (i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
struct nv04_resource *res;
- const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+ const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
if (!tic) {
nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
@@ -575,18 +575,18 @@ nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
{
const uint32_t *data = (const uint32_t *)desc;
unsigned i;
- boolean zero = FALSE;
+ bool zero = false;
debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
for (i = 0; i < sizeof(*desc); i += 4) {
if (data[i / 4]) {
debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
- zero = FALSE;
+ zero = false;
} else
if (!zero) {
debug_printf("...\n");
- zero = TRUE;
+ zero = true;
}
}
@@ -606,7 +606,7 @@ nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
for (i = 0; i < 8; ++i) {
uint64_t address;
uint32_t size = desc->cb[i].size;
- boolean valid = !!(desc->cb_mask & (1 << i));
+ bool valid = !!(desc->cb_mask & (1 << i));
address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
index 4d7af54d860..7364a68a579 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
@@ -68,7 +68,7 @@ struct nve4_cp_launch_desc
u32 unk48[16];
};
-static INLINE void
+static inline void
nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
{
memset(desc, 0, sizeof(*desc));
@@ -78,7 +78,7 @@ nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
desc->unk47_20 = 0x300;
}
-static INLINE void
+static inline void
nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
unsigned index,
struct nouveau_bo *bo,
@@ -96,7 +96,7 @@ nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
desc->cb_mask |= 1 << index;
}
-static INLINE void
+static inline void
nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc,
unsigned index,
const struct nvc0_constbuf *cb)
diff --git a/src/gallium/drivers/r300/Makefile.am b/src/gallium/drivers/r300/Makefile.am
index dd1a5ede19b..081f332683e 100644
--- a/src/gallium/drivers/r300/Makefile.am
+++ b/src/gallium/drivers/r300/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
index baf05cea965..6ea8f24cc14 100644
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -382,7 +382,7 @@ static void r300_clear(struct pipe_context* pipe,
r300_get_num_cs_end_dwords(r300);
/* Reserve CS space. */
- if (dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
+ if (dwords > (r300->cs->max_dw - r300->cs->cdw)) {
r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
}
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index c35aa3b24aa..8c24ad6d98a 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -94,6 +94,8 @@ static void r300_destroy_context(struct pipe_context* context)
if (r300->cs)
r300->rws->cs_destroy(r300->cs);
+ if (r300->ctx)
+ r300->rws->ctx_destroy(r300->ctx);
rc_destroy_regalloc_state(&r300->fs_regalloc_state);
@@ -382,7 +384,11 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
sizeof(struct pipe_transfer), 64,
UTIL_SLAB_SINGLETHREADED);
- r300->cs = rws->cs_create(rws, RING_GFX, r300_flush_callback, r300, NULL);
+ r300->ctx = rws->ctx_create(rws);
+ if (!r300->ctx)
+ goto fail;
+
+ r300->cs = rws->cs_create(r300->ctx, RING_GFX, r300_flush_callback, r300, NULL);
if (r300->cs == NULL)
goto fail;
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 3873c9a31c1..18ae11a3a24 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -449,6 +449,8 @@ struct r300_context {
/* The interface to the windowing system, etc. */
struct radeon_winsys *rws;
+ /* The submission context. */
+ struct radeon_winsys_ctx *ctx;
/* The command stream. */
struct radeon_winsys_cs *cs;
/* Screen. */
@@ -647,32 +649,32 @@ struct r300_context {
for (atom = r300->first_dirty; atom != r300->last_dirty; atom++)
/* Convenience cast wrappers. */
-static INLINE struct r300_query* r300_query(struct pipe_query* q)
+static inline struct r300_query* r300_query(struct pipe_query* q)
{
return (struct r300_query*)q;
}
-static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf)
+static inline struct r300_surface* r300_surface(struct pipe_surface* surf)
{
return (struct r300_surface*)surf;
}
-static INLINE struct r300_resource* r300_resource(struct pipe_resource* tex)
+static inline struct r300_resource* r300_resource(struct pipe_resource* tex)
{
return (struct r300_resource*)tex;
}
-static INLINE struct r300_context* r300_context(struct pipe_context* context)
+static inline struct r300_context* r300_context(struct pipe_context* context)
{
return (struct r300_context*)context;
}
-static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
+static inline struct r300_fragment_shader *r300_fs(struct r300_context *r300)
{
return (struct r300_fragment_shader*)r300->fs.state;
}
-static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
+static inline void r300_mark_atom_dirty(struct r300_context *r300,
struct r300_atom *atom)
{
atom->dirty = TRUE;
@@ -688,7 +690,7 @@ static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
}
}
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
r300_get_nonnull_cb(struct pipe_framebuffer_state *fb, unsigned i)
{
if (fb->cbufs[i])
@@ -777,12 +779,12 @@ void r300_update_derived_state(struct r300_context* r300);
void r500_dump_rs_block(struct r300_rs_block *rs);
-static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
+static inline boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
{
return SCREEN_DBG_ON(ctx->screen, flags);
}
-static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags,
+static inline void CTX_DBG(struct r300_context * ctx, unsigned flags,
const char * fmt, ...)
{
if (CTX_DBG_ON(ctx, flags)) {
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 37f9641ab3e..fc150542d4b 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -46,7 +46,7 @@
#ifdef DEBUG
#define BEGIN_CS(size) do { \
- assert(size <= (RADEON_MAX_CMDBUF_DWORDS - cs_copy->cdw)); \
+ assert(size <= (cs_copy->max_dw - cs_copy->cdw)); \
cs_count = size; \
} while (0)
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 39eb73da65d..b39624dad5f 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -77,14 +77,14 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
/* Return TRUE if the shader was switched and should be re-emitted. */
boolean r300_pick_fragment_shader(struct r300_context* r300);
-static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
+static inline boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
{
if (!fs)
return FALSE;
return (fs->shader->code.writes_depth) ? TRUE : FALSE;
}
-static INLINE boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs)
+static inline boolean r300_fragment_shader_writes_all(struct r300_fragment_shader *fs)
{
if (!fs)
return FALSE;
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 01b83b87fcf..4dd8156f616 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -146,10 +146,11 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
if (q->type == PIPE_QUERY_GPU_FINISHED) {
if (wait) {
- r300->rws->buffer_wait(q->buf, RADEON_USAGE_READWRITE);
+ r300->rws->buffer_wait(q->buf, PIPE_TIMEOUT_INFINITE,
+ RADEON_USAGE_READWRITE);
vresult->b = TRUE;
} else {
- vresult->b = !r300->rws->buffer_is_busy(q->buf, RADEON_USAGE_READWRITE);
+ vresult->b = r300->rws->buffer_wait(q->buf, 0, RADEON_USAGE_READWRITE);
}
return vresult->b;
}
@@ -168,8 +169,6 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
map++;
}
- r300->rws->buffer_unmap(q->cs_buf);
-
if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
vresult->b = temp != 0;
} else {
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 4c951d14f10..0487b11e775 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -215,7 +215,7 @@ static boolean r300_reserve_cs_dwords(struct r300_context *r300,
cs_dwords += r300_get_num_cs_end_dwords(r300);
/* Reserve requested CS space. */
- if (cs_dwords > (RADEON_MAX_CMDBUF_DWORDS - r300->cs->cdw)) {
+ if (cs_dwords > (r300->cs->max_dw - r300->cs->cdw)) {
r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
flushed = TRUE;
}
@@ -871,7 +871,7 @@ struct r300_render {
uint8_t *vbo_ptr;
};
-static INLINE struct r300_render*
+static inline struct r300_render*
r300_render(struct vbuf_render* render)
{
return (struct r300_render*)render;
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a7bca915f57..4ca0b268bde 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -191,6 +191,10 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
/* SWTCL-only features. */
@@ -427,7 +431,7 @@ static int r300_get_video_param(struct pipe_screen *screen,
* Whether the format matches:
* PIPE_FORMAT_?10?10?10?2_UNORM
*/
-static INLINE boolean
+static inline boolean
util_format_is_rgba1010102_variant(const struct util_format_description *desc)
{
static const unsigned size[4] = {10, 10, 10, 2};
@@ -660,14 +664,6 @@ static void r300_fence_reference(struct pipe_screen *screen,
rws->fence_reference(ptr, fence);
}
-static boolean r300_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct radeon_winsys *rws = r300_screen(screen)->rws;
-
- return rws->fence_wait(rws, fence, 0);
-}
-
static boolean r300_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
@@ -712,7 +708,6 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws)
r300screen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
r300screen->screen.context_create = r300_create_context;
r300screen->screen.fence_reference = r300_fence_reference;
- r300screen->screen.fence_signalled = r300_fence_signalled;
r300screen->screen.fence_finish = r300_fence_finish;
r300_init_screen_resource_functions(r300screen);
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 7bba39bf12b..e15c3c7de0c 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -51,11 +51,11 @@ struct r300_screen {
/* Convenience cast wrappers. */
-static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
+static inline struct r300_screen* r300_screen(struct pipe_screen* screen) {
return (struct r300_screen*)screen;
}
-static INLINE struct radeon_winsys *
+static inline struct radeon_winsys *
radeon_winsys(struct pipe_screen *screen) {
return r300_screen(screen)->rws;
}
@@ -102,12 +102,12 @@ radeon_winsys(struct pipe_screen *screen) {
#define DBG_P_STAT (1 << 25)
/*@}*/
-static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
+static inline boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
{
return (screen->debug & flags) ? TRUE : FALSE;
}
-static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags,
+static inline void SCREEN_DBG(struct r300_screen * screen, unsigned flags,
const char * fmt, ...)
{
if (SCREEN_DBG_ON(screen, flags)) {
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
index de557b57776..6451a2c8df2 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -96,7 +96,7 @@ r300_buffer_transfer_map( struct pipe_context *context,
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r300->rws->cs_is_buffer_referenced(r300->cs, rbuf->cs_buf, RADEON_USAGE_READWRITE) ||
- r300->rws->buffer_is_busy(rbuf->buf, RADEON_USAGE_READWRITE)) {
+ !r300->rws->buffer_wait(rbuf->buf, 0, RADEON_USAGE_READWRITE)) {
unsigned i;
struct pb_buffer *new_buf;
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h
index b4c8520039b..14b849c8c93 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.h
+++ b/src/gallium/drivers/r300/r300_screen_buffer.h
@@ -46,7 +46,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
/* Inline functions. */
-static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
+static inline struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
{
return (struct r300_buffer *)buffer;
}
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
index b756048c6c7..93bbc9d4a96 100644
--- a/src/gallium/drivers/r300/r300_shader_semantics.h
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -46,7 +46,7 @@ struct r300_shader_semantics {
int num_generic;
};
-static INLINE void r300_shader_semantics_reset(
+static inline void r300_shader_semantics_reset(
struct r300_shader_semantics* info)
{
int i;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index e886df87a60..d99d5ae0152 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -844,7 +844,7 @@ static void r300_tex_set_tiling_flags(struct r300_context *r300,
tex->tex.macrotile[level]) {
r300->rws->buffer_set_tiling(tex->buf, r300->cs,
tex->tex.microtile, tex->tex.macrotile[level],
- 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
tex->tex.stride_in_bytes[0], false);
tex->surface_level = level;
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index feec494c4dc..fbd91cda9fe 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -32,13 +32,13 @@
/* Some maths. These should probably find their way to u_math, if needed. */
-static INLINE int pack_float_16_6x(float f) {
+static inline int pack_float_16_6x(float f) {
return ((int)(f * 6.0) & 0xffff);
}
/* Blend state. */
-static INLINE uint32_t r300_translate_blend_function(int blend_func,
+static inline uint32_t r300_translate_blend_function(int blend_func,
boolean clamp)
{
switch (blend_func) {
@@ -60,7 +60,7 @@ static INLINE uint32_t r300_translate_blend_function(int blend_func,
return 0;
}
-static INLINE uint32_t r300_translate_blend_factor(int blend_fact)
+static inline uint32_t r300_translate_blend_factor(int blend_fact)
{
switch (blend_fact) {
case PIPE_BLENDFACTOR_ONE:
@@ -113,7 +113,7 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact)
/* DSA state. */
-static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func)
+static inline uint32_t r300_translate_depth_stencil_function(int zs_func)
{
switch (zs_func) {
case PIPE_FUNC_NEVER:
@@ -141,7 +141,7 @@ static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func)
return 0;
}
-static INLINE uint32_t r300_translate_stencil_op(int s_op)
+static inline uint32_t r300_translate_stencil_op(int s_op)
{
switch (s_op) {
case PIPE_STENCIL_OP_KEEP:
@@ -168,7 +168,7 @@ static INLINE uint32_t r300_translate_stencil_op(int s_op)
return 0;
}
-static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
+static inline uint32_t r300_translate_alpha_function(int alpha_func)
{
switch (alpha_func) {
case PIPE_FUNC_NEVER:
@@ -195,7 +195,7 @@ static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
return 0;
}
-static INLINE uint32_t
+static inline uint32_t
r300_translate_polygon_mode_front(unsigned mode) {
switch (mode)
{
@@ -213,7 +213,7 @@ r300_translate_polygon_mode_front(unsigned mode) {
}
}
-static INLINE uint32_t
+static inline uint32_t
r300_translate_polygon_mode_back(unsigned mode) {
switch (mode)
{
@@ -233,7 +233,7 @@ r300_translate_polygon_mode_back(unsigned mode) {
/* Texture sampler state. */
-static INLINE uint32_t r300_translate_wrap(int wrap)
+static inline uint32_t r300_translate_wrap(int wrap)
{
switch (wrap) {
case PIPE_TEX_WRAP_REPEAT:
@@ -259,7 +259,7 @@ static INLINE uint32_t r300_translate_wrap(int wrap)
}
}
-static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
+static inline uint32_t r300_translate_tex_filters(int min, int mag, int mip,
boolean is_anisotropic)
{
uint32_t retval = 0;
@@ -308,7 +308,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
return retval;
}
-static INLINE uint32_t r300_anisotropy(unsigned max_aniso)
+static inline uint32_t r300_anisotropy(unsigned max_aniso)
{
if (max_aniso >= 16) {
return R300_TX_MAX_ANISO_16_TO_1;
@@ -323,7 +323,7 @@ static INLINE uint32_t r300_anisotropy(unsigned max_aniso)
}
}
-static INLINE uint32_t r500_anisotropy(unsigned max_aniso)
+static inline uint32_t r500_anisotropy(unsigned max_aniso)
{
if (!max_aniso) {
return 0;
@@ -336,7 +336,7 @@ static INLINE uint32_t r500_anisotropy(unsigned max_aniso)
}
/* Translate pipe_formats into PSC vertex types. */
-static INLINE uint16_t
+static inline uint16_t
r300_translate_vertex_data_type(enum pipe_format format) {
uint32_t result = 0;
const struct util_format_description *desc;
@@ -410,7 +410,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
return result;
}
-static INLINE uint16_t
+static inline uint16_t
r300_translate_vertex_data_swizzle(enum pipe_format format) {
const struct util_format_description *desc;
unsigned i, swizzle = 0;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 6c01c0d21e4..5e4d50df27d 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -1063,7 +1063,7 @@ r300_texture_create_object(struct r300_screen *rscreen,
rws->buffer_set_tiling(tex->buf, NULL,
tex->tex.microtile, tex->tex.macrotile[0],
- 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
tex->tex.stride_in_bytes[0], false);
return tex;
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
index b87164ba836..44303792f51 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -41,7 +41,7 @@ struct r300_transfer {
};
/* Convenience cast wrapper. */
-static INLINE struct r300_transfer*
+static inline struct r300_transfer*
r300_transfer(struct pipe_transfer* transfer)
{
return (struct r300_transfer*)transfer;
@@ -120,7 +120,7 @@ r300_texture_transfer_map(struct pipe_context *ctx,
referenced_hw = TRUE;
} else {
referenced_hw =
- r300->rws->buffer_is_busy(tex->buf, RADEON_USAGE_READWRITE);
+ !r300->rws->buffer_wait(tex->buf, 0, RADEON_USAGE_READWRITE);
}
trans = CALLOC_STRUCT(r300_transfer);
@@ -251,16 +251,12 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx,
struct r300_resource *tex = r300_resource(transfer->resource);
if (trans->linear_texture) {
- rws->buffer_unmap(trans->linear_texture->cs_buf);
-
if (transfer->usage & PIPE_TRANSFER_WRITE) {
r300_copy_into_tiled_texture(ctx, trans);
}
pipe_resource_reference(
(struct pipe_resource**)&trans->linear_texture, NULL);
- } else {
- rws->buffer_unmap(tex->cs_buf);
}
FREE(transfer);
}
diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am
index dc0d90d759b..8317da727a2 100644
--- a/src/gallium/drivers/r600/Makefile.am
+++ b/src/gallium/drivers/r600/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 295cb4d80b7..42e8b0b1761 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -160,6 +160,9 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
alu.op = ALU_OP1_MOVA_INT;
alu.src[0].sel = bc->index_reg[id];
alu.src[0].chan = 0;
+ if (bc->chip_class == CAYMAN)
+ alu.dst.sel = id == 0 ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
+
alu.last = 1;
r = r600_bytecode_add_alu(bc, &alu);
if (r)
@@ -167,12 +170,14 @@ int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_c
bc->ar_loaded = 0; /* clobbered */
- memset(&alu, 0, sizeof(alu));
- alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
- alu.last = 1;
- r = r600_bytecode_add_alu(bc, &alu);
- if (r)
- return r;
+ if (bc->chip_class == EVERGREEN) {
+ memset(&alu, 0, sizeof(alu));
+ alu.op = id == 0 ? ALU_OP0_SET_CF_IDX0 : ALU_OP0_SET_CF_IDX1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(bc, &alu);
+ if (r)
+ return r;
+ }
/* Must split ALU group as index only applies to following group */
if (inside_alu_clause) {
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index b534872f062..97e230f56c7 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -521,4 +521,11 @@
#define V_SQ_REL_ABSOLUTE 0
#define V_SQ_REL_RELATIVE 1
+
+/* CAYMAN has special encoding for MOVA_INT destination */
+#define CM_V_SQ_MOVA_DST_AR_X 0
+#define CM_V_SQ_MOVA_DST_CF_PC 1
+#define CM_V_SQ_MOVA_DST_CF_IDX0 2
+#define CM_V_SQ_MOVA_DST_CF_IDX1 3
+
#endif
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 4c3c34cd664..c52e43e9c2a 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -163,7 +163,7 @@ static void evergreen_cs_set_vertex_buffer(
rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE;
state->enabled_mask |= 1 << vb_index;
state->dirty_mask |= 1 << vb_index;
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
static void evergreen_cs_set_constant_buffer(
@@ -226,7 +226,7 @@ void *evergreen_create_compute_state(
}
#else
memset(&shader->binary, 0, sizeof(shader->binary));
- radeon_elf_read(code, header->num_bytes, &shader->binary, true);
+ radeon_elf_read(code, header->num_bytes, &shader->binary);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
@@ -487,6 +487,12 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
/* Emit constant buffer state */
r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom);
+ /* Emit sampler state */
+ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom);
+
+ /* Emit sampler view (texture resource) state */
+ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom);
+
/* Emit compute shader state */
r600_emit_atom(ctx, &ctx->cs_shader_state.atom);
@@ -655,25 +661,6 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
}
}
-void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
- unsigned start_slot, unsigned count,
- struct pipe_sampler_view **views)
-{
- struct r600_pipe_sampler_view **resource =
- (struct r600_pipe_sampler_view **)views;
-
- for (unsigned i = 0; i < count; i++) {
- if (resource[i]) {
- assert(i+1 < 12);
- /* XXX: Implement */
- assert(!"Compute samplers not implemented.");
- ///FETCH0 = VTX0 (param buffer),
- //FETCH1 = VTX1 (global buffer pool), FETCH2... = TEX
- }
- }
-}
-
-
static void evergreen_set_global_binding(
struct pipe_context *ctx_, unsigned first, unsigned n,
struct pipe_resource **resources,
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 4ddbc0beba5..6a91d4709f4 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -32,7 +32,7 @@
#include "evergreen_compute.h"
#include "util/u_math.h"
-static INLINE unsigned evergreen_array_mode(unsigned mode)
+static inline unsigned evergreen_array_mode(unsigned mode)
{
switch (mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_028C70_ARRAY_LINEAR_ALIGNED;
@@ -485,7 +485,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
/* offset */
rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 12.0f;
+ rs->offset_scale = state->offset_scale * 16.0f;
rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri;
if (state->point_size_per_vertex) {
@@ -896,7 +896,7 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx,
for (i = start_slot; i < start_slot + num_scissors; i++) {
rctx->scissor[i].scissor = state[i - start_slot];
- rctx->scissor[i].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
}
}
@@ -1028,7 +1028,10 @@ void evergreen_init_color_surface(struct r600_context *rctx,
macro_aspect = rtex->surface.mtilea;
bankw = rtex->surface.bankw;
bankh = rtex->surface.bankh;
- fmask_bankh = rtex->fmask.bank_height;
+ if (rtex->fmask.size)
+ fmask_bankh = rtex->fmask.bank_height;
+ else
+ fmask_bankh = rtex->surface.bankh;
tile_split = eg_tile_split(tile_split);
macro_aspect = eg_macro_tile_aspect(macro_aspect);
bankw = eg_bank_wh(bankw);
@@ -1149,10 +1152,11 @@ void evergreen_init_color_surface(struct r600_context *rctx,
surf->cb_color_attrib = color_attrib;
if (rtex->fmask.size) {
surf->cb_color_fmask = (base_offset + rtex->fmask.offset) >> 8;
+ surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
} else {
surf->cb_color_fmask = surf->cb_color_base;
+ surf->cb_color_fmask_slice = S_028C88_TILE_MAX(slice);
}
- surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
surf->color_initialized = true;
}
@@ -1342,11 +1346,11 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
if (rctx->alphatest_state.bypass != alphatest_bypass) {
rctx->alphatest_state.bypass = alphatest_bypass;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
if (rctx->alphatest_state.cb0_export_16bpc != export_16bpc) {
rctx->alphatest_state.cb0_export_16bpc = export_16bpc;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
}
@@ -1362,28 +1366,28 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
if (state->zsbuf->format != rctx->poly_offset_state.zs_format) {
rctx->poly_offset_state.zs_format = state->zsbuf->format;
- rctx->poly_offset_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
}
if (rctx->db_state.rsurf != surf) {
rctx->db_state.rsurf = surf;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
} else if (rctx->db_state.rsurf) {
rctx->db_state.rsurf = NULL;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) {
rctx->alphatest_state.bypass = false;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
log_samples = util_logbase2(rctx->framebuffer.nr_samples);
@@ -1392,7 +1396,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
rctx->b.family == CHIP_RV770) &&
rctx->db_misc_state.log_samples != log_samples) {
rctx->db_misc_state.log_samples = log_samples;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
@@ -1420,7 +1424,7 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
rctx->framebuffer.atom.num_dw += 4;
}
- rctx->framebuffer.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
r600_set_sample_locations_constant_buffer(rctx);
}
@@ -1434,7 +1438,7 @@ static void evergreen_set_min_samples(struct pipe_context *ctx, unsigned min_sam
rctx->ps_iter_samples = min_samples;
if (rctx->framebuffer.nr_samples > 1) {
- rctx->framebuffer.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
}
}
@@ -1732,10 +1736,10 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
- /* Always enable the first colorbuffer in CB_SHADER_MASK. This
- * will assure that the alpha-test will work even if there is
- * no colorbuffer bound. */
- radeon_emit(cs, 0xf | (a->dual_src_blend ? ps_colormask : 0) | fb_colormask); /* R_02823C_CB_SHADER_MASK */
+ /* This must match the used export instructions exactly.
+ * Other values may lead to undefined behavior and hangs.
+ */
+ radeon_emit(cs, ps_colormask); /* R_02823C_CB_SHADER_MASK */
}
static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -1980,7 +1984,7 @@ static void evergreen_emit_cs_constant_buffers(struct r600_context *rctx, struct
static void evergreen_emit_sampler_views(struct r600_context *rctx,
struct r600_samplerview_state *state,
- unsigned resource_id_base)
+ unsigned resource_id_base, unsigned pkt_flags)
{
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint32_t dirty_mask = state->dirty_mask;
@@ -1993,7 +1997,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
rview = state->views[resource_index];
assert(rview);
- radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0));
+ radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
radeon_emit(cs, (resource_id_base + resource_index) * 8);
radeon_emit_array(cs, rview->tex_resource_words, 8);
@@ -2002,11 +2006,11 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
rview->tex_resource->b.b.nr_samples > 1 ?
RADEON_PRIO_SHADER_TEXTURE_MSAA :
RADEON_PRIO_SHADER_TEXTURE_RO);
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, reloc);
if (!rview->skip_mip_address_reloc) {
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
radeon_emit(cs, reloc);
}
}
@@ -2015,23 +2019,33 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
static void evergreen_emit_vs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views, 176 + R600_MAX_CONST_BUFFERS);
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views,
+ 176 + R600_MAX_CONST_BUFFERS, 0);
}
static void evergreen_emit_gs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views, 336 + R600_MAX_CONST_BUFFERS);
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views,
+ 336 + R600_MAX_CONST_BUFFERS, 0);
}
static void evergreen_emit_ps_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views, R600_MAX_CONST_BUFFERS);
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views,
+ R600_MAX_CONST_BUFFERS, 0);
+}
+
+static void evergreen_emit_cs_sampler_views(struct r600_context *rctx, struct r600_atom *atom)
+{
+ evergreen_emit_sampler_views(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views,
+ 816 + 2, RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_sampler_states(struct r600_context *rctx,
struct r600_textures_info *texinfo,
unsigned resource_id_base,
- unsigned border_index_reg)
+ unsigned border_index_reg,
+ unsigned pkt_flags)
{
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint32_t dirty_mask = texinfo->states.dirty_mask;
@@ -2043,7 +2057,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
rstate = texinfo->states.states[i];
assert(rstate);
- radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0));
+ radeon_emit(cs, PKT3(PKT3_SET_SAMPLER, 3, 0) | pkt_flags);
radeon_emit(cs, (resource_id_base + i) * 3);
radeon_emit_array(cs, rstate->tex_sampler_words, 3);
@@ -2058,17 +2072,27 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
static void evergreen_emit_vs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18, R_00A414_TD_VS_SAMPLER0_BORDER_INDEX);
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_VERTEX], 18,
+ R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, 0);
}
static void evergreen_emit_gs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36, R_00A428_TD_GS_SAMPLER0_BORDER_INDEX);
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY], 36,
+ R_00A428_TD_GS_SAMPLER0_BORDER_INDEX, 0);
}
static void evergreen_emit_ps_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
{
- evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0, R_00A400_TD_PS_SAMPLER0_BORDER_INDEX);
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT], 0,
+ R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0);
+}
+
+static void evergreen_emit_cs_sampler_states(struct r600_context *rctx, struct r600_atom *atom)
+{
+ evergreen_emit_sampler_states(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE], 90,
+ R_00A464_TD_CS_SAMPLER0_BORDER_INDEX,
+ RADEON_CP_PACKET3_COMPUTE_MODE);
}
static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
@@ -3176,7 +3200,7 @@ void evergreen_update_db_shader_control(struct r600_context * rctx)
if (db_shader_control != rctx->db_misc_state.db_shader_control) {
rctx->db_misc_state.db_shader_control = db_shader_control;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -3431,12 +3455,14 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].states.atom, id++, evergreen_emit_vs_sampler_states, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].states.atom, id++, evergreen_emit_gs_sampler_states, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].states.atom, id++, evergreen_emit_ps_sampler_states, 0);
+ r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom, id++, evergreen_emit_cs_sampler_states, 0);
/* resources */
r600_init_atom(rctx, &rctx->vertex_buffer_state.atom, id++, evergreen_fs_emit_vertex_buffers, 0);
r600_init_atom(rctx, &rctx->cs_vertex_buffer_state.atom, id++, evergreen_cs_emit_vertex_buffers, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_VERTEX].views.atom, id++, evergreen_emit_vs_sampler_views, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_GEOMETRY].views.atom, id++, evergreen_emit_gs_sampler_views, 0);
r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_FRAGMENT].views.atom, id++, evergreen_emit_ps_sampler_views, 0);
+ r600_init_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom, id++, evergreen_emit_cs_sampler_views, 0);
r600_init_atom(rctx, &rctx->vgt_state.atom, id++, r600_emit_vgt_state, 10);
@@ -3466,8 +3492,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
}
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
- rctx->atoms[id++] = &rctx->b.streamout.begin_atom;
- rctx->atoms[id++] = &rctx->b.streamout.enable_atom;
+ r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
+ r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index cd4ff46b103..ad6ad434b78 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1253,6 +1253,11 @@
#define R_00A430_TD_GS_SAMPLER0_BORDER_GREEN 0x00A430
#define R_00A434_TD_GS_SAMPLER0_BORDER_BLUE 0x00A434
#define R_00A438_TD_GS_SAMPLER0_BORDER_ALPHA 0x00A438
+#define R_00A464_TD_CS_SAMPLER0_BORDER_INDEX 0x00A464
+#define R_00A468_TD_CS_SAMPLER0_BORDER_RED 0x00A468
+#define R_00A46C_TD_CS_SAMPLER0_BORDER_GREEN 0x00A46C
+#define R_00A470_TD_CS_SAMPLER0_BORDER_BLUE 0x00A470
+#define R_00A474_TD_CS_SAMPLER0_BORDER_ALPHA 0x00A474
#define R_03C000_SQ_TEX_SAMPLER_WORD0_0 0x03C000
#define S_03C000_CLAMP_X(x) (((x) & 0x7) << 0)
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 01262a59e90..b0002c3b50f 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -145,7 +145,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
rctx->db_misc_state.copy_depth = util_format_has_depth(desc);
rctx->db_misc_state.copy_stencil = util_format_has_stencil(desc);
rctx->db_misc_state.copy_sample = first_sample;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
for (level = first_level; level <= last_level; level++) {
if (!staging && !(texture->dirty_level_mask & (1 << level)))
@@ -162,7 +162,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
if (sample != rctx->db_misc_state.copy_sample) {
rctx->db_misc_state.copy_sample = sample;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
surf_tmpl.format = texture->resource.b.b.format;
@@ -197,7 +197,7 @@ static void r600_blit_decompress_depth(struct pipe_context *ctx,
/* reenable compression in DB_RENDER_CONTROL */
rctx->db_misc_state.flush_depthstencil_through_cb = false;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
@@ -210,7 +210,7 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
/* Enable decompression in DB_RENDER_CONTROL */
rctx->db_misc_state.flush_depthstencil_in_place = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
surf_tmpl.format = texture->resource.b.b.format;
@@ -248,7 +248,7 @@ static void r600_blit_decompress_depth_in_place(struct r600_context *rctx,
/* Disable decompression in DB_RENDER_CONTROL */
rctx->db_misc_state.flush_depthstencil_in_place = false;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
void r600_decompress_depth_textures(struct r600_context *rctx,
@@ -396,6 +396,8 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
&buffers, color);
+ if (!buffers)
+ return; /* all buffers have been fast cleared */
}
if (buffers & PIPE_CLEAR_COLOR) {
@@ -435,10 +437,10 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
fb->zsbuf->u.tex.last_layer == util_max_layer(&rtex->resource.b.b, level)) {
if (rtex->depth_clear_value != depth) {
rtex->depth_clear_value = depth;
- rctx->db_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
}
rctx->db_misc_state.htile_clear = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -451,7 +453,7 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
/* disable fast clear */
if (rctx->db_misc_state.htile_clear) {
rctx->db_misc_state.htile_clear = false;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
diff --git a/src/gallium/drivers/r600/r600_formats.h b/src/gallium/drivers/r600/r600_formats.h
index fa374d92e6f..9533aaa1378 100644
--- a/src/gallium/drivers/r600/r600_formats.h
+++ b/src/gallium/drivers/r600/r600_formats.h
@@ -64,7 +64,7 @@
#define ENDIAN_8IN32 2
#define ENDIAN_8IN64 3
-static INLINE unsigned r600_endian_swap(unsigned size)
+static inline unsigned r600_endian_swap(unsigned size)
{
if (R600_BIG_ENDIAN) {
switch (size) {
@@ -82,7 +82,7 @@ static INLINE unsigned r600_endian_swap(unsigned size)
}
}
-static INLINE bool r600_is_vertex_format_supported(enum pipe_format format)
+static inline bool r600_is_vertex_format_supported(enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
unsigned i;
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 8eb0c6806b9..64451516c23 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -51,13 +51,13 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
unsigned i;
/* The number of dwords all the dirty states would take. */
- for (i = 0; i < R600_NUM_ATOMS; i++) {
- if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
- num_dw += ctx->atoms[i]->num_dw;
- if (ctx->screen->b.trace_bo) {
- num_dw += R600_TRACE_CS_DWORDS;
- }
+ i = r600_next_dirty_atom(ctx, 0);
+ while (i < R600_NUM_ATOMS) {
+ num_dw += ctx->atoms[i]->num_dw;
+ if (ctx->screen->b.trace_bo) {
+ num_dw += R600_TRACE_CS_DWORDS;
}
+ i = r600_next_dirty_atom(ctx, i + 1);
}
/* The upper-bound of how much space a draw command would take. */
@@ -68,7 +68,8 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
}
/* Count in queries_suspend. */
- num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend;
+ num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
+ ctx->b.num_cs_dw_timer_queries_suspend;
/* Count in streamout_end at the end of CS. */
if (ctx->b.streamout.begin_emitted) {
@@ -92,7 +93,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
num_dw += 10;
/* Flush if there's not enough space. */
- if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+ if (num_dw > ctx->b.rings.gfx.cs->max_dw) {
ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
}
@@ -295,43 +296,45 @@ void r600_begin_new_cs(struct r600_context *ctx)
r600_emit_command_buffer(ctx->b.rings.gfx.cs, &ctx->start_cs_cmd);
/* Re-emit states. */
- ctx->alphatest_state.atom.dirty = true;
- ctx->blend_color.atom.dirty = true;
- ctx->cb_misc_state.atom.dirty = true;
- ctx->clip_misc_state.atom.dirty = true;
- ctx->clip_state.atom.dirty = true;
- ctx->db_misc_state.atom.dirty = true;
- ctx->db_state.atom.dirty = true;
- ctx->framebuffer.atom.dirty = true;
- ctx->pixel_shader.atom.dirty = true;
- ctx->poly_offset_state.atom.dirty = true;
- ctx->vgt_state.atom.dirty = true;
- ctx->sample_mask.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->blend_color.atom);
+ r600_mark_atom_dirty(ctx, &ctx->cb_misc_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->clip_misc_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->clip_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->db_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+ r600_mark_atom_dirty(ctx, &ctx->pixel_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
+ r600_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
for (i = 0; i < R600_MAX_VIEWPORTS; i++) {
- ctx->scissor[i].atom.dirty = true;
- ctx->viewport[i].atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->scissor[i].atom);
+ r600_mark_atom_dirty(ctx, &ctx->viewport[i].atom);
}
- ctx->config_state.atom.dirty = true;
- ctx->stencil_ref.atom.dirty = true;
- ctx->vertex_fetch_shader.atom.dirty = true;
- ctx->export_shader.atom.dirty = true;
- ctx->shader_stages.atom.dirty = true;
+ if (ctx->b.chip_class < EVERGREEN) {
+ r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
+ }
+ r600_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
+ r600_mark_atom_dirty(ctx, &ctx->vertex_fetch_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->export_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->shader_stages.atom);
if (ctx->gs_shader) {
- ctx->geometry_shader.atom.dirty = true;
- ctx->gs_rings.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->geometry_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->gs_rings.atom);
}
- ctx->vertex_shader.atom.dirty = true;
- ctx->b.streamout.enable_atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->vertex_shader.atom);
+ r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
if (ctx->blend_state.cso)
- ctx->blend_state.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->blend_state.atom);
if (ctx->dsa_state.cso)
- ctx->dsa_state.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->dsa_state.atom);
if (ctx->rasterizer_state.cso)
- ctx->rasterizer_state.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->rasterizer_state.atom);
if (ctx->b.chip_class <= R700) {
- ctx->seamless_cube_map.atom.dirty = true;
+ r600_mark_atom_dirty(ctx, &ctx->seamless_cube_map.atom);
}
ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c
index 72e2dc42f7e..faf538ccbb5 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -84,7 +84,7 @@ static void llvm_load_system_value(
#else
LLVMValueRef reg = lp_build_const_int32(
ctx->soa.bld_base.base.gallivm, chan);
- ctx->system_values[index] = build_intrinsic(
+ ctx->system_values[index] = lp_build_intrinsic(
ctx->soa.bld_base.base.gallivm->builder,
"llvm.R600.load.input",
ctx->soa.bld_base.base.elem_type, ®, 1,
@@ -111,9 +111,9 @@ llvm_load_input_vector(
Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), "");
LLVMValueRef HalfVec[2] = {
- build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
+ lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
VecType, Args, ArgCount, LLVMReadNoneAttribute),
- build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
+ lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
VecType, Args, ArgCount, LLVMReadNoneAttribute)
};
LLVMValueRef MaskInputs[4] = {
@@ -127,7 +127,7 @@ llvm_load_input_vector(
Mask, "");
} else {
VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4);
- return build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
+ return lp_build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
VecType, Args, ArgCount, LLVMReadNoneAttribute);
}
}
@@ -153,7 +153,7 @@ llvm_load_input_helper(
arg_count = 1;
}
- return build_intrinsic(bb->gallivm->builder, intrinsic,
+ return lp_build_intrinsic(bb->gallivm->builder, intrinsic,
bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute);
}
#endif
@@ -332,7 +332,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer);
args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component);
lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output",
- LLVMVoidTypeInContext(base->gallivm->context), args, 4);
+ LLVMVoidTypeInContext(base->gallivm->context), args, 4, 0);
}
}
@@ -356,7 +356,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = output;
args[1] = lp_build_const_int32(base->gallivm, next_pos++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -373,7 +373,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE);
args[0] = output;
args[1] = base_vector;
- adjusted_elements[chan] = build_intrinsic(base->gallivm->builder,
+ adjusted_elements[chan] = lp_build_intrinsic(base->gallivm->builder,
"llvm.AMDGPU.dp4", bld_base->base.elem_type,
args, 2, LLVMReadNoneAttribute);
}
@@ -381,7 +381,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
adjusted_elements, 4);
args[1] = lp_build_const_int32(base->gallivm, next_pos++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -394,14 +394,14 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = output;
args[1] = lp_build_const_int32(base->gallivm, next_pos++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
args, 3, 0);
args[1] = lp_build_const_int32(base->gallivm, next_param++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -418,7 +418,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = lp_build_gather_values(base->gallivm, elements, 4);
args[1] = lp_build_const_int32(base->gallivm, next_param++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -430,7 +430,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
args[0] = output;
args[1] = lp_build_const_int32(base->gallivm, next_param++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -449,7 +449,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
args[1] = lp_build_const_int32(base->gallivm, j);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -458,7 +458,7 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
} else {
args[1] = lp_build_const_int32(base->gallivm, color_count++);
args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
- build_intrinsic(
+ lp_build_intrinsic(
base->gallivm->builder,
"llvm.R600.store.swizzle",
LLVMVoidTypeInContext(base->gallivm->context),
@@ -543,7 +543,7 @@ static void llvm_emit_tex(
case TGSI_OPCODE_TXF: {
args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
- emit_data->output[0] = build_intrinsic(gallivm->builder,
+ emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
"llvm.R600.load.texbuf",
emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
if (ctx->chip_class >= EVERGREEN)
@@ -658,7 +658,7 @@ static void llvm_emit_tex(
lp_build_const_int32(gallivm, 1),
lp_build_const_int32(gallivm, 1)
};
- LLVMValueRef ptr = build_intrinsic(gallivm->builder,
+ LLVMValueRef ptr = lp_build_intrinsic(gallivm->builder,
"llvm.R600.ldptr",
emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
@@ -679,7 +679,7 @@ static void llvm_emit_tex(
}
}
- emit_data->output[0] = build_intrinsic(gallivm->builder,
+ emit_data->output[0] = lp_build_intrinsic(gallivm->builder,
action->intr_name,
emit_data->dst_type, args, c, LLVMReadNoneAttribute);
@@ -754,7 +754,131 @@ static struct lp_build_tgsi_action dot_action = {
.intr_name = "llvm.AMDGPU.dp4"
};
+static void txd_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ LLVMValueRef coords[4];
+ unsigned chan, src;
+ for (src = 0; src < 3; src++) {
+ for (chan = 0; chan < 4; chan++)
+ coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
+
+ emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ }
+ emit_data->arg_count = 3;
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
+
+static void txp_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ LLVMValueRef src_w;
+ unsigned chan;
+ LLVMValueRef coords[5];
+
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+ src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+
+ for (chan = 0; chan < 3; chan++ ) {
+ LLVMValueRef arg = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 0, chan);
+ coords[chan] = lp_build_emit_llvm_binary(bld_base,
+ TGSI_OPCODE_DIV, arg, src_w);
+ }
+ coords[3] = bld_base->base.one;
+
+ if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+ radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
+ }
+
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ emit_data->arg_count = 1;
+}
+
+static void tex_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+
+ LLVMValueRef coords[5];
+ unsigned chan;
+ for (chan = 0; chan < 4; chan++) {
+ coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
+ inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
+ /* These instructions have additional operand that should be packed
+ * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
+ * That operand should be passed as a float value in the args array
+ * right after the coord vector. After packing it's not used anymore,
+ * that's why arg_count is not increased */
+ coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+ }
+
+ if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+ radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, NULL);
+ }
+
+ emit_data->arg_count = 1;
+ emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+ coords, 4);
+ emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+
+static void txf_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ const struct tgsi_texture_offset * off = inst->TexOffsets;
+ LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
+
+ /* fetch tex coords */
+ tex_fetch_args(bld_base, emit_data);
+
+ /* fetch tex offsets */
+ if (inst->Texture.NumOffsets) {
+ assert(inst->Texture.NumOffsets == 1);
+
+ emit_data->args[1] = LLVMConstBitCast(
+ bld->immediates[off->Index][off->SwizzleX],
+ offset_type);
+ emit_data->args[2] = LLVMConstBitCast(
+ bld->immediates[off->Index][off->SwizzleY],
+ offset_type);
+ emit_data->args[3] = LLVMConstBitCast(
+ bld->immediates[off->Index][off->SwizzleZ],
+ offset_type);
+ } else {
+ emit_data->args[1] = bld_base->int_bld.zero;
+ emit_data->args[2] = bld_base->int_bld.zero;
+ emit_data->args[3] = bld_base->int_bld.zero;
+ }
+
+ emit_data->arg_count = 4;
+}
LLVMModuleRef r600_tgsi_llvm(
struct radeon_llvm_context * ctx,
@@ -783,7 +907,6 @@ LLVMModuleRef r600_tgsi_llvm(
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const;
bld_base->emit_prologue = llvm_emit_prologue;
bld_base->emit_epilogue = llvm_emit_epilogue;
- ctx->userdata = ctx;
ctx->load_input = llvm_load_input;
ctx->load_system_value = llvm_load_system_value;
@@ -791,18 +914,42 @@ LLVMModuleRef r600_tgsi_llvm(
bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action;
bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action;
bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action;
+ bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
+ bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
+ bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TEX2].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
bld_base->op_actions[TGSI_OPCODE_TXB2].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex;
- bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
+ bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
+ bld_base->op_actions[TGSI_OPCODE_TXL2].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex;
+ bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
+ bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex;
bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cndlt;
lp_build_tgsi_llvm(bld_base, tokens);
@@ -881,7 +1028,7 @@ unsigned r600_llvm_compile(
const char * gpu_family = r600_get_llvm_processor_name(family);
memset(&binary, 0, sizeof(struct radeon_shader_binary));
- r = radeon_llvm_compile(mod, &binary, gpu_family, dump, NULL);
+ r = radeon_llvm_compile(mod, &binary, gpu_family, dump, dump, NULL);
r = r600_create_shader(bc, &binary, use_kill);
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index e122b607b86..6ffe5615fbf 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -120,6 +120,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
rctx->b.b.screen = screen;
rctx->b.b.priv = priv;
rctx->b.b.destroy = r600_destroy_context;
+ rctx->b.set_atom_dirty = (void *)r600_set_atom_dirty;
if (!r600_common_context_init(&rctx->b, &rscreen->b))
goto fail;
@@ -176,7 +177,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
goto fail;
}
- rctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX,
+ rctx->b.rings.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
r600_context_gfx_flush, rctx,
rscreen->b.trace_bo ?
rscreen->b.trace_bo->cs_buf : NULL);
@@ -268,8 +269,14 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_SAMPLE_SHADING:
case PIPE_CAP_CLIP_HALFZ:
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ return rscreen->b.info.drm_major == 2 && rscreen->b.info.drm_minor >= 43;
+
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !R600_BIG_ENDIAN && rscreen->b.info.has_userptr;
@@ -329,10 +336,10 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
- case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
/* Stream output. */
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 4ea270d3839..9b66105641a 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -36,7 +36,7 @@
#include "util/list.h"
#include "util/u_transfer.h"
-#define R600_NUM_ATOMS 73
+#define R600_NUM_ATOMS 75
#define R600_MAX_VIEWPORTS 16
@@ -85,6 +85,9 @@
#define R600_BIG_ENDIAN 0
#endif
+#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
+#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
+
struct r600_context;
struct r600_bytecode;
struct r600_shader_key;
@@ -426,6 +429,8 @@ struct r600_context {
/* State binding slots are here. */
struct r600_atom *atoms[R600_NUM_ATOMS];
+ /* Dirty atom bitmask for fast tests */
+ unsigned long dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
/* States for CS initialization. */
struct r600_command_buffer start_cs_cmd; /* invariant state mostly */
/** Compute specific registers initializations. The start_cs_cmd atom
@@ -490,37 +495,92 @@ struct r600_context {
struct r600_isa *isa;
};
-static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
+static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
struct r600_command_buffer *cb)
{
- assert(cs->cdw + cb->num_dw <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw + cb->num_dw <= cs->max_dw);
memcpy(cs->buf + cs->cdw, cb->buf, 4 * cb->num_dw);
cs->cdw += cb->num_dw;
}
+static inline void r600_set_atom_dirty(struct r600_context *rctx,
+ struct r600_atom *atom,
+ bool dirty)
+{
+ unsigned long mask;
+ unsigned int w;
+
+ atom->dirty = dirty;
+
+ assert(atom->id != 0);
+ w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
+ mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
+ if (dirty)
+ rctx->dirty_atoms[w] |= mask;
+ else
+ rctx->dirty_atoms[w] &= ~mask;
+}
+
+static inline void r600_mark_atom_dirty(struct r600_context *rctx,
+ struct r600_atom *atom)
+{
+ r600_set_atom_dirty(rctx, atom, true);
+}
+
+static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
+ unsigned int id)
+{
+#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
+ unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
+ unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
+ unsigned long bits, mask = (1ul << bit) - 1;
+
+ for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
+ bits = rctx->dirty_atoms[w] & ~mask;
+ if (bits == 0)
+ continue;
+ return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
+ }
+
+ return R600_NUM_ATOMS;
+#else
+ for (; id < R600_NUM_ATOMS; id++) {
+ bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
+ (1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
+ assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
+ if (dirty)
+ break;
+ }
+
+ return id;
+#endif
+}
+
void r600_trace_emit(struct r600_context *rctx);
-static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
+static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
{
atom->emit(&rctx->b, atom);
- atom->dirty = false;
+ r600_set_atom_dirty(rctx, atom, false);
if (rctx->screen->b.trace_bo) {
r600_trace_emit(rctx);
}
}
-static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)
+static inline void r600_set_cso_state(struct r600_context *rctx,
+ struct r600_cso_state *state, void *cso)
{
state->cso = cso;
- state->atom.dirty = cso != NULL;
+ r600_set_atom_dirty(rctx, &state->atom, cso != NULL);
}
-static INLINE void r600_set_cso_state_with_cb(struct r600_cso_state *state, void *cso,
+static inline void r600_set_cso_state_with_cb(struct r600_context *rctx,
+ struct r600_cso_state *state, void *cso,
struct r600_command_buffer *cb)
{
state->cb = cb;
state->atom.num_dw = cb ? cb->num_dw : 0;
- r600_set_cso_state(state, cso);
+ r600_set_cso_state(rctx, state, cso);
}
/* compute_memory_pool.c */
@@ -529,11 +589,6 @@ void compute_memory_pool_delete(struct compute_memory_pool* pool);
struct compute_memory_pool* compute_memory_pool_new(
struct r600_screen *rscreen);
-/* evergreen_compute.c */
-void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
- unsigned start_slot, unsigned count,
- struct pipe_sampler_view **views);
-
/* evergreen_state.c */
struct pipe_sampler_view *
evergreen_create_sampler_view_custom(struct pipe_context *ctx,
@@ -656,6 +711,7 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom);
void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a);
+void r600_add_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id);
void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, unsigned id,
void (*emit)(struct r600_context *ctx, struct r600_atom *state),
unsigned num_dw);
@@ -719,19 +775,19 @@ struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
/*Evergreen Compute packet3*/
#define PKT3C(op, count, predicate) (PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op) | PKT_COUNT_S(count) | PKT3_PREDICATE(predicate) | RADEON_CP_PACKET3_COMPUTE_MODE)
-static INLINE void r600_store_value(struct r600_command_buffer *cb, unsigned value)
+static inline void r600_store_value(struct r600_command_buffer *cb, unsigned value)
{
cb->buf[cb->num_dw++] = value;
}
-static INLINE void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
+static inline void r600_store_array(struct r600_command_buffer *cb, unsigned num, unsigned *ptr)
{
assert(cb->num_dw+num <= cb->max_num_dw);
memcpy(&cb->buf[cb->num_dw], ptr, num * sizeof(ptr[0]));
cb->num_dw += num;
}
-static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_config_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -743,7 +799,7 @@ static INLINE void r600_store_config_reg_seq(struct r600_command_buffer *cb, uns
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_context_reg_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET && reg < R600_CTL_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -755,7 +811,7 @@ static INLINE void r600_store_context_reg_seq(struct r600_command_buffer *cb, un
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -763,7 +819,7 @@ static INLINE void r600_store_ctl_const_seq(struct r600_command_buffer *cb, unsi
cb->buf[cb->num_dw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
}
-static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void r600_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= R600_LOOP_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -775,7 +831,7 @@ static INLINE void r600_store_loop_const_seq(struct r600_command_buffer *cb, uns
* Needs cb->pkt_flags set to RADEON_CP_PACKET3_COMPUTE_MODE for compute
* shaders.
*/
-static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
+static inline void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsigned reg, unsigned num)
{
assert(reg >= EG_LOOP_CONST_OFFSET);
assert(cb->num_dw+2+num <= cb->max_num_dw);
@@ -783,31 +839,31 @@ static INLINE void eg_store_loop_const_seq(struct r600_command_buffer *cb, unsig
cb->buf[cb->num_dw++] = (reg - EG_LOOP_CONST_OFFSET) >> 2;
}
-static INLINE void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_config_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_config_reg_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_context_reg(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_context_reg_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_ctl_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_ctl_const_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void r600_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
r600_store_loop_const_seq(cb, reg, 1);
r600_store_value(cb, value);
}
-static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
+static inline void eg_store_loop_const(struct r600_command_buffer *cb, unsigned reg, unsigned value)
{
eg_store_loop_const_seq(cb, reg, 1);
r600_store_value(cb, value);
@@ -816,28 +872,28 @@ static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned
void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw);
void r600_release_command_buffer(struct r600_command_buffer *cb);
-static INLINE void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
r600_write_context_reg_seq(cs, reg, num);
/* Set the compute bit on the packet header */
cs->buf[cs->cdw - 2] |= RADEON_CP_PACKET3_COMPUTE_MODE;
}
-static INLINE void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CTL_CONST_OFFSET);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
cs->buf[cs->cdw++] = PKT3(PKT3_SET_CTL_CONST, num, 0);
cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
}
-static INLINE void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_compute_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
+static inline void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
{
if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) {
r600_write_compute_context_reg(cs, reg, value);
@@ -846,7 +902,7 @@ static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsi
}
}
-static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_ctl_const_seq(cs, reg, 1);
radeon_emit(cs, value);
@@ -855,21 +911,21 @@ static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned re
/*
* common helpers
*/
-static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
+static inline uint32_t S_FIXED(float value, uint32_t frac_bits)
{
return value * (1 << frac_bits);
}
#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
/* 12.4 fixed-point */
-static INLINE unsigned r600_pack_float_12p4(float x)
+static inline unsigned r600_pack_float_12p4(float x)
{
return x <= 0 ? 0 :
x >= 4096 ? 0xffff : x * 16;
}
/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
-static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
+static inline bool r600_can_read_depth(struct r600_texture *rtex)
{
return rtex->resource.b.b.nr_samples <= 1 &&
(rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
@@ -880,7 +936,7 @@ static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
#define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1
#define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2
-static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode)
+static inline unsigned r600_conv_prim_to_gs_out(unsigned mode)
{
static const int prim_conv[] = {
V_028A6C_OUTPRIM_TYPE_POINTLIST,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index af7622e9b34..8d1f95abddc 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -310,6 +310,7 @@ struct r600_shader_ctx {
int gs_next_vertex;
struct r600_shader *gs_for_vs;
int gs_export_gpr_treg;
+ unsigned enabled_stream_buffers_mask;
};
struct r600_shader_tgsi_instruction {
@@ -1402,6 +1403,9 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
* with MEM_STREAM instructions */
output.array_size = 0xFFF;
output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component;
+
+ ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer);
+
if (ctx->bc->chip_class >= EVERGREEN) {
switch (so->output[i].output_buffer) {
case 0:
@@ -1718,6 +1722,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
gs->gs_copy_shader = cshader;
ctx.bc->nstack = 1;
+
+ cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
cshader->shader.ring_item_size = ocnt * 16;
return r600_bytecode_build(ctx.bc);
@@ -1931,15 +1937,14 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+ ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1;
+ ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2;
+
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
- ctx.temp_reg = ctx.bc->ar_reg + 2;
- ctx.bc->index_reg[0] = ctx.bc->ar_reg + 3;
- ctx.bc->index_reg[1] = ctx.bc->ar_reg + 4;
+ ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 3;
+ ctx.temp_reg = ctx.bc->ar_reg + 4;
} else {
- ctx.temp_reg = ctx.bc->ar_reg + 1;
- ctx.bc->index_reg[0] = ctx.bc->ar_reg + 2;
- ctx.bc->index_reg[1] = ctx.bc->ar_reg + 3;
+ ctx.temp_reg = ctx.bc->ar_reg + 3;
}
shader->max_arrays = 0;
@@ -2086,7 +2091,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN);
radeon_llvm_ctx.stream_outputs = &so;
- radeon_llvm_ctx.clip_vertex = ctx.cv_output;
radeon_llvm_ctx.alpha_to_one = key.alpha_to_one;
radeon_llvm_ctx.has_compressed_msaa_texturing =
ctx.bc->has_compressed_msaa_texturing;
@@ -2262,6 +2266,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
so.num_outputs && !use_llvm)
emit_streamout(&ctx, &so);
+ pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask;
convert_edgeflag_to_int(&ctx);
if (ring_outputs) {
@@ -2485,6 +2490,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
j++;
+ shader->nr_ps_color_exports++;
}
noutput = j;
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index dd359d7e959..5d05c8153d7 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -125,6 +125,7 @@ struct r600_pipe_shader {
struct r600_shader_key key;
unsigned db_shader_control;
unsigned ps_depth_export;
+ unsigned enabled_stream_buffers_mask;
};
/* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 960dfcedfef..5cc2283792d 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -473,7 +473,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
/* offset */
rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 12.0f;
+ rs->offset_scale = state->offset_scale * 16.0f;
rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri;
if (state->point_size_per_vertex) {
@@ -802,7 +802,7 @@ static void r600_set_scissor_states(struct pipe_context *ctx,
return;
for (i = start_slot ; i < start_slot + num_scissors; i++) {
- rctx->scissor[i].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->scissor[i].atom);
}
}
@@ -1193,7 +1193,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
if (rctx->alphatest_state.bypass != alphatest_bypass) {
rctx->alphatest_state.bypass = alphatest_bypass;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
}
@@ -1209,28 +1209,28 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
if (state->zsbuf->format != rctx->poly_offset_state.zs_format) {
rctx->poly_offset_state.zs_format = state->zsbuf->format;
- rctx->poly_offset_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
}
if (rctx->db_state.rsurf != surf) {
rctx->db_state.rsurf = surf;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
} else if (rctx->db_state.rsurf) {
rctx->db_state.rsurf = NULL;
- rctx->db_state.atom.dirty = true;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
if (rctx->cb_misc_state.nr_cbufs != state->nr_cbufs) {
rctx->cb_misc_state.nr_cbufs = state->nr_cbufs;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (state->nr_cbufs == 0 && rctx->alphatest_state.bypass) {
rctx->alphatest_state.bypass = false;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
/* Calculate the CS size. */
@@ -1250,7 +1250,7 @@ static void r600_set_framebuffer_state(struct pipe_context *ctx,
rctx->framebuffer.atom.num_dw += 2;
}
- rctx->framebuffer.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
r600_set_sample_locations_constant_buffer(rctx);
}
@@ -1541,9 +1541,9 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
rctx->ps_iter_samples = min_samples;
if (rctx->framebuffer.nr_samples > 1) {
- rctx->rasterizer_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->rasterizer_state.atom);
if (rctx->b.chip_class == R600)
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -2089,7 +2089,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
if (rctx->config_state.sq_gpr_resource_mgmt_1 != tmp || rctx->config_state.sq_gpr_resource_mgmt_2 != tmp2) {
rctx->config_state.sq_gpr_resource_mgmt_1 = tmp;
rctx->config_state.sq_gpr_resource_mgmt_2 = tmp2;
- rctx->config_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->config_state.atom);
rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
}
return true;
@@ -2796,11 +2796,11 @@ void r600_update_db_shader_control(struct r600_context * rctx)
if (db_shader_control != rctx->db_misc_state.db_shader_control) {
rctx->db_misc_state.db_shader_control = db_shader_control;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
-static INLINE unsigned r600_array_mode(unsigned mode)
+static inline unsigned r600_array_mode(unsigned mode)
{
switch (mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED: return V_0280A0_ARRAY_LINEAR_ALIGNED;
@@ -3074,8 +3074,8 @@ void r600_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
- rctx->atoms[id++] = &rctx->b.streamout.begin_atom;
- rctx->atoms[id++] = &rctx->b.streamout.enable_atom;
+ r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
+ r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
r600_init_atom(rctx, &rctx->pixel_shader.atom, id++, r600_emit_shader, 0);
r600_init_atom(rctx, &rctx->geometry_shader.atom, id++, r600_emit_shader, 0);
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 13dc9ee8c10..aa4a8d0240f 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -47,18 +47,26 @@ void r600_release_command_buffer(struct r600_command_buffer *cb)
FREE(cb->buf);
}
+void r600_add_atom(struct r600_context *rctx,
+ struct r600_atom *atom,
+ unsigned id)
+{
+ assert(id < R600_NUM_ATOMS);
+ assert(rctx->atoms[id] == NULL);
+ rctx->atoms[id] = atom;
+ atom->id = id;
+ atom->dirty = false;
+}
+
void r600_init_atom(struct r600_context *rctx,
struct r600_atom *atom,
unsigned id,
void (*emit)(struct r600_context *ctx, struct r600_atom *state),
unsigned num_dw)
{
- assert(id < R600_NUM_ATOMS);
- assert(rctx->atoms[id] == NULL);
- rctx->atoms[id] = atom;
atom->emit = (void*)emit;
atom->num_dw = num_dw;
- atom->dirty = false;
+ r600_add_atom(rctx, atom, id);
}
void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -127,11 +135,11 @@ static void r600_bind_blend_state_internal(struct r600_context *rctx,
rctx->dual_src_blend = blend->dual_src_blend;
if (!blend_disable) {
- r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer);
+ r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer);
color_control = blend->cb_color_control;
} else {
/* Blending is disabled. */
- r600_set_cso_state_with_cb(&rctx->blend_state, blend, &blend->buffer_no_blend);
+ r600_set_cso_state_with_cb(rctx, &rctx->blend_state, blend, &blend->buffer_no_blend);
color_control = blend->cb_color_control_no_blend;
}
@@ -150,7 +158,7 @@ static void r600_bind_blend_state_internal(struct r600_context *rctx,
update_cb = true;
}
if (update_cb) {
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
}
@@ -160,7 +168,7 @@ static void r600_bind_blend_state(struct pipe_context *ctx, void *state)
struct r600_blend_state *blend = (struct r600_blend_state *)state;
if (blend == NULL) {
- r600_set_cso_state_with_cb(&rctx->blend_state, NULL, NULL);
+ r600_set_cso_state_with_cb(rctx, &rctx->blend_state, NULL, NULL);
return;
}
@@ -173,7 +181,7 @@ static void r600_set_blend_color(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->blend_color.state = *state;
- rctx->blend_color.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->blend_color.atom);
}
void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
@@ -210,7 +218,7 @@ static void r600_set_clip_state(struct pipe_context *ctx,
struct pipe_constant_buffer cb;
rctx->clip_state.state = *state;
- rctx->clip_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_state.atom);
cb.buffer = NULL;
cb.user_buffer = state->ucp;
@@ -226,7 +234,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
struct r600_context *rctx = (struct r600_context *)ctx;
rctx->stencil_ref.state = *state;
- rctx->stencil_ref.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->stencil_ref.atom);
}
void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
@@ -274,11 +282,11 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
struct r600_stencil_ref ref;
if (state == NULL) {
- r600_set_cso_state_with_cb(&rctx->dsa_state, NULL, NULL);
+ r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, NULL, NULL);
return;
}
- r600_set_cso_state_with_cb(&rctx->dsa_state, dsa, &dsa->buffer);
+ r600_set_cso_state_with_cb(rctx, &rctx->dsa_state, dsa, &dsa->buffer);
ref.ref_value[0] = rctx->stencil_ref.pipe_state.ref_value[0];
ref.ref_value[1] = rctx->stencil_ref.pipe_state.ref_value[1];
@@ -293,7 +301,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
* we are having lockup on evergreen so do not enable
* hyperz when not writing zbuffer
*/
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
@@ -304,7 +312,7 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, void *state)
rctx->alphatest_state.sx_alpha_ref != dsa->alpha_ref) {
rctx->alphatest_state.sx_alpha_test_control = dsa->sx_alpha_test_control;
rctx->alphatest_state.sx_alpha_ref = dsa->alpha_ref;
- rctx->alphatest_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->alphatest_state.atom);
}
}
@@ -318,14 +326,14 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
rctx->rasterizer = rs;
- r600_set_cso_state_with_cb(&rctx->rasterizer_state, rs, &rs->buffer);
+ r600_set_cso_state_with_cb(rctx, &rctx->rasterizer_state, rs, &rs->buffer);
if (rs->offset_enable &&
(rs->offset_units != rctx->poly_offset_state.offset_units ||
rs->offset_scale != rctx->poly_offset_state.offset_scale)) {
rctx->poly_offset_state.offset_units = rs->offset_units;
rctx->poly_offset_state.offset_scale = rs->offset_scale;
- rctx->poly_offset_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->poly_offset_state.atom);
}
/* Update clip_misc_state. */
@@ -333,14 +341,14 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
rctx->clip_misc_state.clip_plane_enable != rs->clip_plane_enable) {
rctx->clip_misc_state.pa_cl_clip_cntl = rs->pa_cl_clip_cntl;
rctx->clip_misc_state.clip_plane_enable = rs->clip_plane_enable;
- rctx->clip_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
/* Workaround for a missing scissor enable on r600. */
if (rctx->b.chip_class == R600 &&
rs->scissor_enable != rctx->scissor[0].enable) {
rctx->scissor[0].enable = rs->scissor_enable;
- rctx->scissor[0].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->scissor[0].atom);
}
/* Re-emit PA_SC_LINE_STIPPLE. */
@@ -378,7 +386,7 @@ void r600_sampler_states_dirty(struct r600_context *rctx,
state->atom.num_dw =
util_bitcount(state->dirty_mask & state->has_bordercolor_mask) * 11 +
util_bitcount(state->dirty_mask & ~state->has_bordercolor_mask) * 5;
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
}
@@ -399,9 +407,9 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
assert(start == 0); /* XXX fix below */
- if (shader != PIPE_SHADER_VERTEX &&
- shader != PIPE_SHADER_FRAGMENT) {
- return;
+ if (!states) {
+ disable_mask = ~0u;
+ count = 0;
}
for (i = 0; i < count; i++) {
@@ -443,7 +451,7 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,
/* change in TA_CNTL_AUX need a pipeline flush */
rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
rctx->seamless_cube_map.enabled = seamless_cube_map;
- rctx->seamless_cube_map.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->seamless_cube_map.atom);
}
}
@@ -483,7 +491,7 @@ static void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- r600_set_cso_state(&rctx->vertex_fetch_shader, state);
+ r600_set_cso_state(rctx, &rctx->vertex_fetch_shader, state);
}
static void r600_delete_vertex_elements(struct pipe_context *ctx, void *state)
@@ -513,7 +521,7 @@ void r600_vertex_buffers_dirty(struct r600_context *rctx)
rctx->b.flags |= R600_CONTEXT_INV_VERTEX_CACHE;
rctx->vertex_buffer_state.atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 12 : 11) *
util_bitcount(rctx->vertex_buffer_state.dirty_mask);
- rctx->vertex_buffer_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
}
}
@@ -570,7 +578,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE;
state->atom.num_dw = (rctx->b.chip_class >= EVERGREEN ? 14 : 13) *
util_bitcount(state->dirty_mask);
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
}
@@ -593,9 +601,9 @@ static void r600_set_sampler_views(struct pipe_context *pipe, unsigned shader,
assert(start == 0); /* XXX fix below */
- if (shader == PIPE_SHADER_COMPUTE) {
- evergreen_set_cs_sampler_view(pipe, start, count, views);
- return;
+ if (!views) {
+ disable_mask = ~0u;
+ count = 0;
}
remaining_mask = dst->views.enabled_mask & disable_mask;
@@ -673,7 +681,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
for (i = start_slot; i < start_slot + num_viewports; i++) {
rctx->viewport[i].state = state[i - start_slot];
- rctx->viewport[i].atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->viewport[i].atom);
}
}
@@ -694,7 +702,7 @@ void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
}
/* Compute the key for the hw shader variant */
-static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
+static inline struct r600_shader_key r600_shader_selector_key(struct pipe_context * ctx,
struct r600_pipe_shader_selector * sel)
{
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -913,7 +921,7 @@ void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf
rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE;
state->atom.num_dw = rctx->b.chip_class >= EVERGREEN ? util_bitcount(state->dirty_mask)*20
: util_bitcount(state->dirty_mask)*19;
- state->atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &state->atom);
}
}
@@ -982,7 +990,7 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask
return;
rctx->sample_mask.sample_mask = sample_mask;
- rctx->sample_mask.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom);
}
/*
@@ -1107,27 +1115,28 @@ static void update_shader_atom(struct pipe_context *ctx,
struct r600_shader_state *state,
struct r600_pipe_shader *shader)
{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+
state->shader = shader;
if (shader) {
state->atom.num_dw = shader->command_buffer.num_dw;
- state->atom.dirty = true;
r600_context_add_resource_size(ctx, (struct pipe_resource *)shader->bo);
} else {
state->atom.num_dw = 0;
- state->atom.dirty = false;
}
+ r600_mark_atom_dirty(rctx, &state->atom);
}
static void update_gs_block_state(struct r600_context *rctx, unsigned enable)
{
if (rctx->shader_stages.geom_enable != enable) {
rctx->shader_stages.geom_enable = enable;
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
}
if (rctx->gs_rings.enable != enable) {
rctx->gs_rings.enable = enable;
- rctx->gs_rings.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->gs_rings.atom);
if (enable && !rctx->gs_rings.esgs_ring.buffer) {
unsigned size = 0x1C000;
@@ -1192,7 +1201,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (!rctx->shader_stages.geom_enable) {
rctx->shader_stages.geom_enable = true;
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
}
/* gs_shader provides GS and VS (copy shader) */
@@ -1206,8 +1215,9 @@ static bool r600_update_derived_state(struct r600_context *rctx)
rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->gs_shader->current->gs_copy_shader->pa_cl_vs_out_cntl;
rctx->clip_misc_state.clip_dist_write = rctx->gs_shader->current->gs_copy_shader->shader.clip_dist_write;
rctx->clip_misc_state.clip_disable = rctx->gs_shader->current->shader.vs_position_window_space;
- rctx->clip_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
+ rctx->b.streamout.enabled_stream_buffers_mask = rctx->gs_shader->current->gs_copy_shader->enabled_stream_buffers_mask;
}
r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
@@ -1223,7 +1233,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
update_shader_atom(ctx, &rctx->geometry_shader, NULL);
update_shader_atom(ctx, &rctx->export_shader, NULL);
rctx->shader_stages.geom_enable = false;
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
}
r600_shader_select(ctx, rctx->vs_shader, &vs_dirty);
@@ -1240,8 +1250,9 @@ static bool r600_update_derived_state(struct r600_context *rctx)
rctx->clip_misc_state.pa_cl_vs_out_cntl = rctx->vs_shader->current->pa_cl_vs_out_cntl;
rctx->clip_misc_state.clip_dist_write = rctx->vs_shader->current->shader.clip_dist_write;
rctx->clip_misc_state.clip_disable = rctx->vs_shader->current->shader.vs_position_window_space;
- rctx->clip_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
+ rctx->b.streamout.enabled_stream_buffers_mask = rctx->vs_shader->current->enabled_stream_buffers_mask;
}
}
@@ -1252,7 +1263,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (rctx->cb_misc_state.nr_ps_color_outputs != rctx->ps_shader->current->nr_ps_color_outputs) {
rctx->cb_misc_state.nr_ps_color_outputs = rctx->ps_shader->current->nr_ps_color_outputs;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (rctx->b.chip_class <= R700) {
@@ -1260,7 +1271,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
if (rctx->cb_misc_state.multiwrite != multiwrite) {
rctx->cb_misc_state.multiwrite = multiwrite;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
}
@@ -1274,7 +1285,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
r600_update_ps_state(ctx, rctx->ps_shader->current);
}
- rctx->shader_stages.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->shader_stages.atom);
update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current);
}
@@ -1409,7 +1420,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
data += info.indirect_offset / sizeof(unsigned);
start = data[2] * ib.index_size;
count = data[0];
- rctx->b.ws->buffer_unmap(indirect_resource->cs_buf);
}
else {
start = 0;
@@ -1454,24 +1464,23 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
rctx->vgt_state.vgt_multi_prim_ib_reset_en = info.primitive_restart;
rctx->vgt_state.vgt_multi_prim_ib_reset_indx = info.restart_index;
rctx->vgt_state.vgt_indx_offset = info.index_bias;
- rctx->vgt_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->vgt_state.atom);
}
/* Workaround for hardware deadlock on certain R600 ASICs: write into a CB register. */
if (rctx->b.chip_class == R600) {
rctx->b.flags |= R600_CONTEXT_PS_PARTIAL_FLUSH;
- rctx->cb_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
/* Emit states. */
r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
r600_flush_emit(rctx);
- for (i = 0; i < R600_NUM_ATOMS; i++) {
- if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) {
- continue;
- }
+ i = r600_next_dirty_atom(rctx, 0);
+ while (i < R600_NUM_ATOMS) {
r600_emit_atom(rctx, rctx->atoms[i]);
+ i = r600_next_dirty_atom(rctx, i + 1);
}
if (rctx->b.chip_class == CAYMAN) {
@@ -2490,7 +2499,7 @@ static void r600_set_occlusion_query_state(struct pipe_context *ctx, bool enable
if (rctx->db_misc_state.occlusion_query_enabled != enable) {
rctx->db_misc_state.occlusion_query_enabled = enable;
- rctx->db_misc_state.atom.dirty = true;
+ r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
}
}
diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp
index 2e38a62c05a..62680788c5e 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -489,7 +489,7 @@ bool alu_group_tracker::try_reserve(alu_node* n) {
n->bc.bank_swizzle = 0;
- if (!trans & fbs)
+ if (!trans && fbs)
n->bc.bank_swizzle = VEC_210;
if (gpr.try_reserve(n)) {
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index fc5f6c29870..cb9809f2449 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -84,7 +84,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
}
}
- if (busy || ctx->ws->buffer_is_busy(resource->buf, rusage)) {
+ if (busy || !ctx->ws->buffer_wait(resource->buf, 0, rusage)) {
if (usage & PIPE_TRANSFER_DONTBLOCK) {
return NULL;
} else {
@@ -121,7 +121,8 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
/* Older kernels didn't always flush the HDP cache before
* CS execution
*/
- if (rscreen->info.drm_minor < 40) {
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40) {
res->domains = RADEON_DOMAIN_GTT;
flags |= RADEON_FLAG_GTT_WC;
break;
@@ -147,7 +148,8 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
* Write-combined CPU mappings are fine, the kernel ensures all CPU
* writes finish before the GPU executes a command stream.
*/
- if (rscreen->info.drm_minor < 40)
+ if (rscreen->info.drm_major == 2 &&
+ rscreen->info.drm_minor < 40)
res->domains = RADEON_DOMAIN_GTT;
else if (res->domains & RADEON_DOMAIN_VRAM)
flags |= RADEON_FLAG_CPU_ACCESS;
@@ -161,6 +163,9 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
flags |= RADEON_FLAG_NO_CPU_ACCESS;
}
+ if (rscreen->debug_flags & DBG_NO_WC)
+ flags &= ~RADEON_FLAG_GTT_WC;
+
/* Allocate a new resource. */
new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
use_reusable_pool,
@@ -274,7 +279,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
- rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+ !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
}
/* At this point, the buffer is always idle. */
@@ -288,7 +293,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
/* Check if mapping this buffer would cause waiting for the GPU. */
if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
- rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+ !rctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
/* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset;
struct r600_resource *staging = NULL;
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h
index b51eebbc68e..03a04b754d6 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -33,7 +33,7 @@
#include "r600_pipe_common.h"
#include "r600d_common.h"
-static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
+static inline unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
struct r600_ring *ring,
struct r600_resource *rbo,
enum radeon_bo_usage usage,
@@ -59,7 +59,7 @@ static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
rbo->domains, priority) * 4;
}
-static INLINE void r600_emit_reloc(struct r600_common_context *rctx,
+static inline void r600_emit_reloc(struct r600_common_context *rctx,
struct r600_ring *ring, struct r600_resource *rbo,
enum radeon_bo_usage usage,
enum radeon_bo_priority priority)
@@ -74,57 +74,57 @@ static INLINE void r600_emit_reloc(struct r600_common_context *rctx,
}
}
-static INLINE void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg < R600_CONTEXT_REG_OFFSET);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
}
-static INLINE void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_config_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= R600_CONTEXT_REG_OFFSET);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
}
-static INLINE void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
r600_write_context_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
-static INLINE void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
si_write_sh_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
}
-static INLINE void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+static inline void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
{
assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
- assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+ assert(cs->cdw+2+num <= cs->max_dw);
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
-static INLINE void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static inline void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
{
cik_write_uconfig_reg_seq(cs, reg, 1);
radeon_emit(cs, value);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 3def4446882..ed5d1dabdc3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -108,9 +108,9 @@ void r600_draw_rectangle(struct blitter_context *blitter,
void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
{
/* Flush if there's not enough space. */
- if ((num_dw + ctx->rings.dma.cs->cdw) > RADEON_MAX_CMDBUF_DWORDS) {
+ if ((num_dw + ctx->rings.dma.cs->cdw) > ctx->rings.dma.cs->max_dw) {
ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
- assert((num_dw + ctx->rings.dma.cs->cdw) <= RADEON_MAX_CMDBUF_DWORDS);
+ assert((num_dw + ctx->rings.dma.cs->cdw) <= ctx->rings.dma.cs->max_dw);
}
}
@@ -132,10 +132,11 @@ void r600_preflush_suspend_features(struct r600_common_context *ctx)
}
/* suspend queries */
- ctx->nontimer_queries_suspended = false;
+ ctx->queries_suspended_for_flush = false;
if (ctx->num_cs_dw_nontimer_queries_suspend) {
r600_suspend_nontimer_queries(ctx);
- ctx->nontimer_queries_suspended = true;
+ r600_suspend_timer_queries(ctx);
+ ctx->queries_suspended_for_flush = true;
}
ctx->streamout.suspended = false;
@@ -153,8 +154,9 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
}
/* resume queries */
- if (ctx->nontimer_queries_suspended) {
+ if (ctx->queries_suspended_for_flush) {
r600_resume_nontimer_queries(ctx);
+ r600_resume_timer_queries(ctx);
}
/* Re-enable render condition. */
@@ -196,6 +198,19 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
rctx->rings.dma.flushing = false;
}
+static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ unsigned latest = rctx->ws->query_value(rctx->ws,
+ RADEON_GPU_RESET_COUNTER);
+
+ if (rctx->gpu_reset_counter == latest)
+ return PIPE_NO_RESET;
+
+ rctx->gpu_reset_counter = latest;
+ return PIPE_UNKNOWN_CONTEXT_RESET;
+}
+
bool r600_common_context_init(struct r600_common_context *rctx,
struct r600_common_screen *rscreen)
{
@@ -222,6 +237,13 @@ bool r600_common_context_init(struct r600_common_context *rctx,
rctx->b.memory_barrier = r600_memory_barrier;
rctx->b.flush = r600_flush_from_st;
+ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) {
+ rctx->b.get_device_reset_status = r600_get_reset_status;
+ rctx->gpu_reset_counter =
+ rctx->ws->query_value(rctx->ws,
+ RADEON_GPU_RESET_COUNTER);
+ }
+
LIST_INITHEAD(&rctx->texture_buffers);
r600_init_context_texture_functions(rctx);
@@ -240,8 +262,12 @@ bool r600_common_context_init(struct r600_common_context *rctx,
if (!rctx->uploader)
return false;
+ rctx->ctx = rctx->ws->ctx_create(rctx->ws);
+ if (!rctx->ctx)
+ return false;
+
if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
- rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA,
+ rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
r600_flush_dma_ring,
rctx, NULL);
rctx->rings.dma.flush = r600_flush_dma_ring;
@@ -252,12 +278,12 @@ bool r600_common_context_init(struct r600_common_context *rctx,
void r600_common_context_cleanup(struct r600_common_context *rctx)
{
- if (rctx->rings.gfx.cs) {
+ if (rctx->rings.gfx.cs)
rctx->ws->cs_destroy(rctx->rings.gfx.cs);
- }
- if (rctx->rings.dma.cs) {
+ if (rctx->rings.dma.cs)
rctx->ws->cs_destroy(rctx->rings.dma.cs);
- }
+ if (rctx->ctx)
+ rctx->ws->ctx_destroy(rctx->ctx);
if (rctx->uploader) {
u_upload_destroy(rctx->uploader);
@@ -313,6 +339,11 @@ static const struct debug_named_value common_debug_options[] = {
{ "gs", DBG_GS, "Print geometry shaders" },
{ "ps", DBG_PS, "Print pixel shaders" },
{ "cs", DBG_CS, "Print compute shaders" },
+ { "tcs", DBG_TCS, "Print tessellation control shaders" },
+ { "tes", DBG_TES, "Print tessellation evaluation shaders" },
+ { "noir", DBG_NO_IR, "Don't print the LLVM IR"},
+ { "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
+ { "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
/* features */
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
@@ -324,6 +355,7 @@ static const struct debug_named_value common_debug_options[] = {
{ "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
{ "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
+ { "nowc", DBG_NO_WC, "Disable GTT write combining" },
DEBUG_NAMED_VALUE_END /* must be last */
};
@@ -338,11 +370,9 @@ static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
return "AMD";
}
-static const char* r600_get_name(struct pipe_screen* pscreen)
+static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
{
- struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
-
- switch (rscreen->family) {
+ switch (rscreen->info.family) {
case CHIP_R600: return "AMD R600";
case CHIP_RV610: return "AMD RV610";
case CHIP_RV630: return "AMD RV630";
@@ -378,10 +408,21 @@ static const char* r600_get_name(struct pipe_screen* pscreen)
case CHIP_KABINI: return "AMD KABINI";
case CHIP_HAWAII: return "AMD HAWAII";
case CHIP_MULLINS: return "AMD MULLINS";
+ case CHIP_TONGA: return "AMD TONGA";
+ case CHIP_ICELAND: return "AMD ICELAND";
+ case CHIP_CARRIZO: return "AMD CARRIZO";
+ case CHIP_FIJI: return "AMD FIJI";
default: return "AMD unknown";
}
}
+static const char* r600_get_name(struct pipe_screen* pscreen)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
+
+ return rscreen->renderer_string;
+}
+
static float r600_get_paramf(struct pipe_screen* pscreen,
enum pipe_capf param)
{
@@ -495,6 +536,10 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
#else
return "kabini";
#endif
+ case CHIP_TONGA: return "tonga";
+ case CHIP_ICELAND: return "iceland";
+ case CHIP_CARRIZO: return "carrizo";
+ case CHIP_FIJI: return "fiji";
default: return "";
}
}
@@ -636,6 +681,12 @@ static int r600_get_compute_param(struct pipe_screen *screen,
return sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
break; /* unused */
+ case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+ if (ret) {
+ uint32_t *subgroup_size = ret;
+ *subgroup_size = r600_wavefront_size(rscreen->family);
+ }
+ return sizeof(uint32_t);
}
fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
@@ -656,25 +707,33 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
{
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct pipe_driver_query_info list[] = {
+ {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
+ {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, PIPE_DRIVER_QUERY_TYPE_UINT64,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
{"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
{"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
- {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}},
+ {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
{"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
- {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+ {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
{"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
{"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+ {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
{"temperature", R600_QUERY_GPU_TEMPERATURE, {100}},
- {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}},
- {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}},
- {"GPU-load", R600_QUERY_GPU_LOAD, {100}}
+ {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
+ {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, PIPE_DRIVER_QUERY_TYPE_HZ},
};
unsigned num_queries;
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
num_queries = Elements(list);
+ else if (rscreen->info.drm_major == 3)
+ num_queries = Elements(list) - 3;
else
- num_queries = 8;
+ num_queries = Elements(list) - 4;
if (!info)
return num_queries;
@@ -695,14 +754,6 @@ static void r600_fence_reference(struct pipe_screen *screen,
rws->fence_reference(ptr, fence);
}
-static boolean r600_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
-
- return rws->fence_wait(rws, fence, 0);
-}
-
static boolean r600_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
uint64_t timeout)
@@ -837,8 +888,22 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
bool r600_common_screen_init(struct r600_common_screen *rscreen,
struct radeon_winsys *ws)
{
+ char llvm_string[32] = {};
+
ws->query_info(ws, &rscreen->info);
+#if HAVE_LLVM
+ snprintf(llvm_string, sizeof(llvm_string),
+ ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+ HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+#endif
+
+ snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
+ "%s (DRM %i.%i.%i%s)",
+ r600_get_chip_name(rscreen), rscreen->info.drm_major,
+ rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
+ llvm_string);
+
rscreen->b.get_name = r600_get_name;
rscreen->b.get_vendor = r600_get_vendor;
rscreen->b.get_device_vendor = r600_get_device_vendor;
@@ -848,7 +913,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
rscreen->b.get_timestamp = r600_get_timestamp;
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
- rscreen->b.fence_signalled = r600_fence_signalled;
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
@@ -874,7 +938,9 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
pipe_mutex_init(rscreen->aux_context_lock);
pipe_mutex_init(rscreen->gpu_load_mutex);
- if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) {
+ if (((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 28) ||
+ rscreen->info.drm_major == 3) &&
+ (rscreen->debug_flags & DBG_TRACE_CS)) {
rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b,
PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING,
@@ -922,10 +988,8 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen)
pipe_mutex_destroy(rscreen->aux_context_lock);
rscreen->aux_context->destroy(rscreen->aux_context);
- if (rscreen->trace_bo) {
- rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
+ if (rscreen->trace_bo)
pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
- }
rscreen->ws->destroy(rscreen->ws);
FREE(rscreen);
@@ -941,6 +1005,10 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
switch (tgsi_get_processor_type(tokens)) {
case TGSI_PROCESSOR_VERTEX:
return (rscreen->debug_flags & DBG_VS) != 0;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return (rscreen->debug_flags & DBG_TCS) != 0;
+ case TGSI_PROCESSOR_TESS_EVAL:
+ return (rscreen->debug_flags & DBG_TES) != 0;
case TGSI_PROCESSOR_GEOMETRY:
return (rscreen->debug_flags & DBG_GS) != 0;
case TGSI_PROCESSOR_FRAGMENT:
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 6ce81d33ddd..29db1cc4e07 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -59,6 +59,8 @@
#define R600_QUERY_CURRENT_GPU_SCLK (PIPE_QUERY_DRIVER_SPECIFIC + 9)
#define R600_QUERY_CURRENT_GPU_MCLK (PIPE_QUERY_DRIVER_SPECIFIC + 10)
#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
+#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
#define R600_CONTEXT_PRIVATE_FLAG (1u << 1)
@@ -79,17 +81,23 @@
#define DBG_GS (1 << 7)
#define DBG_PS (1 << 8)
#define DBG_CS (1 << 9)
+#define DBG_TCS (1 << 10)
+#define DBG_TES (1 << 11)
+#define DBG_NO_IR (1 << 12)
+#define DBG_NO_TGSI (1 << 13)
+#define DBG_NO_ASM (1 << 14)
+/* Bits 21-31 are reserved for the r600g driver. */
/* features */
-#define DBG_NO_ASYNC_DMA (1 << 10)
-#define DBG_NO_HYPERZ (1 << 11)
-#define DBG_NO_DISCARD_RANGE (1 << 12)
-#define DBG_NO_2D_TILING (1 << 13)
-#define DBG_NO_TILING (1 << 14)
-#define DBG_SWITCH_ON_EOP (1 << 15)
-#define DBG_FORCE_DMA (1 << 16)
-#define DBG_PRECOMPILE (1 << 17)
-#define DBG_INFO (1 << 18)
-/* The maximum allowed bit is 20. */
+#define DBG_NO_ASYNC_DMA (1llu << 32)
+#define DBG_NO_HYPERZ (1llu << 33)
+#define DBG_NO_DISCARD_RANGE (1llu << 34)
+#define DBG_NO_2D_TILING (1llu << 35)
+#define DBG_NO_TILING (1llu << 36)
+#define DBG_SWITCH_ON_EOP (1llu << 37)
+#define DBG_FORCE_DMA (1llu << 38)
+#define DBG_PRECOMPILE (1llu << 39)
+#define DBG_INFO (1llu << 40)
+#define DBG_NO_WC (1llu << 41)
#define R600_MAP_BUFFER_ALIGNMENT 64
@@ -127,9 +135,8 @@ struct radeon_shader_binary {
struct radeon_shader_reloc *relocs;
unsigned reloc_count;
- /** Set to 1 if the disassembly for this binary has been dumped to
- * stderr. */
- int disassembled;
+ /** Disassembled shader in a string. */
+ char *disasm_string;
};
struct r600_resource {
@@ -214,7 +221,6 @@ struct r600_texture {
float depth_clear_value;
bool non_disp_tiling; /* R600-Cayman only */
- unsigned mipmap_shift;
};
struct r600_surface {
@@ -236,6 +242,7 @@ struct r600_surface {
unsigned cb_color_pitch; /* EG and later */
unsigned cb_color_slice; /* EG and later */
unsigned cb_color_attrib; /* EG and later */
+ unsigned cb_dcc_control; /* VI and later */
unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
unsigned cb_color_fmask_slice; /* EG and later */
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
@@ -272,7 +279,7 @@ struct r600_common_screen {
enum chip_class chip_class;
struct radeon_info info;
struct r600_tiling_info tiling_info;
- unsigned debug_flags;
+ uint64_t debug_flags;
bool has_cp_dma;
bool has_streamout;
@@ -285,12 +292,23 @@ struct r600_common_screen {
uint32_t *trace_ptr;
unsigned cs_count;
+ /* This must be in the screen, because UE4 uses one context for
+ * compilation and another one for rendering.
+ */
+ unsigned num_compilations;
+ /* Along with ST_DEBUG=precompile, this should show if applications
+ * are loading shaders on demand. This is a monotonic counter.
+ */
+ unsigned num_shaders_created;
+
/* GPU load thread. */
pipe_mutex gpu_load_mutex;
pipe_thread gpu_load_thread;
unsigned gpu_load_counter_busy;
unsigned gpu_load_counter_idle;
- unsigned gpu_load_stop_thread; /* bool */
+ volatile unsigned gpu_load_stop_thread; /* bool */
+
+ char renderer_string[64];
};
/* This encapsulates a state or an operation which can emitted into the GPU
@@ -298,6 +316,7 @@ struct r600_common_screen {
struct r600_atom {
void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
unsigned num_dw;
+ unsigned short id; /* used by r600 only */
bool dirty;
};
@@ -327,6 +346,10 @@ struct r600_streamout {
/* External state which comes from the vertex shader,
* it must be set explicitly when binding a shader. */
unsigned *stride_in_dw;
+ unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
+
+ /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
+ unsigned hw_enabled_mask;
/* The state of VGT_STRMOUT_(CONFIG|EN). */
struct r600_atom enable_atom;
@@ -352,10 +375,12 @@ struct r600_common_context {
struct r600_common_screen *screen;
struct radeon_winsys *ws;
+ struct radeon_winsys_ctx *ctx;
enum radeon_family family;
enum chip_class chip_class;
struct r600_rings rings;
unsigned initial_gfx_cs_size;
+ unsigned gpu_reset_counter;
struct u_upload_mgr *uploader;
struct u_suballocator *allocator_so_filled_size;
@@ -376,11 +401,14 @@ struct r600_common_context {
int num_occlusion_queries;
/* Keep track of non-timer queries, because they should be suspended
* during context flushing.
- * The timer queries (TIME_ELAPSED) shouldn't be suspended. */
+ * The timer queries (TIME_ELAPSED) shouldn't be suspended for blits,
+ * but they should be suspended between IBs. */
struct list_head active_nontimer_queries;
+ struct list_head active_timer_queries;
unsigned num_cs_dw_nontimer_queries_suspend;
+ unsigned num_cs_dw_timer_queries_suspend;
/* If queries have been suspended. */
- bool nontimer_queries_suspended;
+ bool queries_suspended_for_flush;
/* Additional hardware info. */
unsigned backend_mask;
unsigned max_db; /* for OQ */
@@ -441,6 +469,9 @@ struct r600_common_context {
/* This ensures there is enough space in the command stream. */
void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
bool include_draw_vbo);
+
+ void (*set_atom_dirty)(struct r600_common_context *ctx,
+ struct r600_atom *atom, bool dirty);
};
/* r600_buffer.c */
@@ -495,6 +526,8 @@ unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
void r600_query_init(struct r600_common_context *rctx);
void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
void r600_resume_nontimer_queries(struct r600_common_context *ctx);
+void r600_suspend_timer_queries(struct r600_common_context *ctx);
+void r600_resume_timer_queries(struct r600_common_context *ctx);
void r600_query_init_backend_mask(struct r600_common_context *ctx);
/* r600_streamout.c */
@@ -549,12 +582,12 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
/* Inline helpers. */
-static INLINE struct r600_resource *r600_resource(struct pipe_resource *r)
+static inline struct r600_resource *r600_resource(struct pipe_resource *r)
{
return (struct r600_resource*)r;
}
-static INLINE void
+static inline void
r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
{
pipe_resource_reference((struct pipe_resource **)ptr,
@@ -570,6 +603,26 @@ static inline unsigned r600_tex_aniso_filter(unsigned filter)
/* else */ return 4;
}
+static inline unsigned r600_wavefront_size(enum radeon_family family)
+{
+ switch (family) {
+ case CHIP_RV610:
+ case CHIP_RS780:
+ case CHIP_RV620:
+ case CHIP_RS880:
+ return 16;
+ case CHIP_RV630:
+ case CHIP_RV635:
+ case CHIP_RV730:
+ case CHIP_RV710:
+ case CHIP_PALM:
+ case CHIP_CEDAR:
+ return 32;
+ default:
+ return 64;
+ }
+}
+
#define COMPUTE_DBG(rscreen, fmt, args...) \
do { \
if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 71f4a1522f9..7057aa19a7c 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -54,6 +54,8 @@ struct r600_query {
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
+ /* For transform feedback: which stream the query is for */
+ unsigned stream;
};
@@ -90,6 +92,8 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_GPU_LOAD:
+ case R600_QUERY_NUM_COMPILATIONS:
+ case R600_QUERY_NUM_SHADERS_CREATED:
return NULL;
}
@@ -118,7 +122,6 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
}
results += 4 * ctx->max_db;
}
- ctx->ws->buffer_unmap(buf->cs_buf);
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
@@ -130,7 +133,6 @@ static struct r600_resource *r600_new_query_buffer(struct r600_common_context *c
case PIPE_QUERY_PIPELINE_STATISTICS:
results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
memset(results, 0, buf_size);
- ctx->ws->buffer_unmap(buf->cs_buf);
break;
default:
assert(0);
@@ -157,6 +159,17 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
}
}
+static unsigned event_type_for_stream(struct r600_query *query)
+{
+ switch (query->stream) {
+ default:
+ case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
+ case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1;
+ case 2: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS2;
+ case 3: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS3;
+ }
+}
+
static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
{
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
@@ -191,7 +204,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
case PIPE_QUERY_SO_STATISTICS:
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
+ radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
@@ -215,9 +228,10 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
RADEON_PRIO_MIN);
- if (!r600_is_timer_query(query->type)) {
+ if (r600_is_timer_query(query->type))
+ ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
+ else
ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
- }
}
static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
@@ -248,7 +262,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
va += query->buffer.results_end + query->result_size/2;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
+ radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, (va >> 32UL) & 0xFF);
break;
@@ -279,9 +293,10 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
query->buffer.results_end += query->result_size;
if (r600_query_needs_begin(query->type)) {
- if (!r600_is_timer_query(query->type)) {
+ if (r600_is_timer_query(query->type))
+ ctx->num_cs_dw_timer_queries_suspend -= query->num_cs_dw;
+ else
ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
- }
}
r600_update_occlusion_query_state(ctx, query->type, -1);
@@ -292,6 +307,13 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
int operation, bool flag_wait)
{
struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+ uint32_t op = PRED_OP(operation);
+
+ /* if true then invert, see GL_ARB_conditional_render_inverted */
+ if (ctx->current_render_cond_cond)
+ op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visable/overflow */
+ else
+ op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
if (operation == PREDICATION_OP_CLEAR) {
ctx->need_gfx_cs_space(&ctx->b, 3, FALSE);
@@ -302,24 +324,21 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
} else {
struct r600_query_buffer *qbuf;
unsigned count;
- uint32_t op;
-
/* Find how many results there are. */
count = 0;
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
count += qbuf->results_end / query->result_size;
}
-
+
ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE);
-
- op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
- (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
-
+
+ op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
+
/* emit predicate packets for all data blocks */
for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
unsigned results_base = 0;
uint64_t va = qbuf->buf->gpu_address;
-
+
while (results_base < qbuf->results_end) {
radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL);
@@ -327,7 +346,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
RADEON_PRIO_MIN);
results_base += query->result_size;
-
+
/* set CONTINUE bit for all packets except the first */
op |= PREDICATION_CONTINUE;
}
@@ -369,6 +388,7 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32;
query->num_cs_dw = 6;
+ query->stream = index;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on EG, 8 on R600. */
@@ -390,6 +410,8 @@ static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned q
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_GPU_LOAD:
+ case R600_QUERY_NUM_COMPILATIONS:
+ case R600_QUERY_NUM_SHADERS_CREATED:
skip_allocation = true;
break;
default:
@@ -454,7 +476,7 @@ static boolean r600_begin_query(struct pipe_context *ctx,
rquery->begin_result = 0;
return true;
case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
+ rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
return true;
case R600_QUERY_NUM_CS_FLUSHES:
rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
@@ -465,6 +487,12 @@ static boolean r600_begin_query(struct pipe_context *ctx,
case R600_QUERY_GPU_LOAD:
rquery->begin_result = r600_gpu_load_begin(rctx->screen);
return true;
+ case R600_QUERY_NUM_COMPILATIONS:
+ rquery->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ return true;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ rquery->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ return true;
}
/* Discard the old query buffers. */
@@ -477,7 +505,7 @@ static boolean r600_begin_query(struct pipe_context *ctx,
/* Obtain a new buffer if the current one can't be mapped without a stall. */
if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
- rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
+ !rctx->ws->buffer_wait(rquery->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
}
@@ -487,9 +515,10 @@ static boolean r600_begin_query(struct pipe_context *ctx,
r600_emit_query_begin(rctx, rquery);
- if (!r600_is_timer_query(rquery->type)) {
+ if (r600_is_timer_query(rquery->type))
+ LIST_ADDTAIL(&rquery->list, &rctx->active_timer_queries);
+ else
LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
- }
return true;
}
@@ -515,7 +544,7 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
return;
case R600_QUERY_BUFFER_WAIT_TIME:
- rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
+ rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS) / 1000;
return;
case R600_QUERY_NUM_CS_FLUSHES:
rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
@@ -541,13 +570,18 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
case R600_QUERY_GPU_LOAD:
rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
return;
+ case R600_QUERY_NUM_COMPILATIONS:
+ rquery->end_result = p_atomic_read(&rctx->screen->num_compilations);
+ return;
+ case R600_QUERY_NUM_SHADERS_CREATED:
+ rquery->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ return;
}
r600_emit_query_end(rctx, rquery);
- if (r600_query_needs_begin(rquery->type) && !r600_is_timer_query(rquery->type)) {
+ if (r600_query_needs_begin(rquery->type))
LIST_DELINIT(&rquery->list);
- }
}
static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
@@ -601,6 +635,8 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
+ case R600_QUERY_NUM_COMPILATIONS:
+ case R600_QUERY_NUM_SHADERS_CREATED:
result->u64 = query->end_result - query->begin_result;
return TRUE;
case R600_QUERY_GPU_LOAD:
@@ -751,7 +787,6 @@ static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
assert(0);
}
- ctx->ws->buffer_unmap(qbuf->buf->cs_buf);
return TRUE;
}
@@ -823,22 +858,37 @@ static void r600_render_condition(struct pipe_context *ctx,
}
}
-void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
+static void r600_suspend_queries(struct r600_common_context *ctx,
+ struct list_head *query_list,
+ unsigned *num_cs_dw_queries_suspend)
{
struct r600_query *query;
- LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+ LIST_FOR_EACH_ENTRY(query, query_list, list) {
r600_emit_query_end(ctx, query);
}
- assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+ assert(*num_cs_dw_queries_suspend == 0);
}
-static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx)
+void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
+{
+ r600_suspend_queries(ctx, &ctx->active_nontimer_queries,
+ &ctx->num_cs_dw_nontimer_queries_suspend);
+}
+
+void r600_suspend_timer_queries(struct r600_common_context *ctx)
+{
+ r600_suspend_queries(ctx, &ctx->active_timer_queries,
+ &ctx->num_cs_dw_timer_queries_suspend);
+}
+
+static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx,
+ struct list_head *query_list)
{
struct r600_query *query;
unsigned num_dw = 0;
- LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+ LIST_FOR_EACH_ENTRY(query, query_list, list) {
/* begin + end */
num_dw += query->num_cs_dw * 2;
@@ -857,21 +907,35 @@ static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
return num_dw;
}
-void r600_resume_nontimer_queries(struct r600_common_context *ctx)
+static void r600_resume_queries(struct r600_common_context *ctx,
+ struct list_head *query_list,
+ unsigned *num_cs_dw_queries_suspend)
{
struct r600_query *query;
+ unsigned num_cs_dw = r600_queries_num_cs_dw_for_resuming(ctx, query_list);
- assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+ assert(*num_cs_dw_queries_suspend == 0);
/* Check CS space here. Resuming must not be interrupted by flushes. */
- ctx->need_gfx_cs_space(&ctx->b,
- r600_queries_num_cs_dw_for_resuming(ctx), TRUE);
+ ctx->need_gfx_cs_space(&ctx->b, num_cs_dw, TRUE);
- LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+ LIST_FOR_EACH_ENTRY(query, query_list, list) {
r600_emit_query_begin(ctx, query);
}
}
+void r600_resume_nontimer_queries(struct r600_common_context *ctx)
+{
+ r600_resume_queries(ctx, &ctx->active_nontimer_queries,
+ &ctx->num_cs_dw_nontimer_queries_suspend);
+}
+
+void r600_resume_timer_queries(struct r600_common_context *ctx)
+{
+ r600_resume_queries(ctx, &ctx->active_timer_queries,
+ &ctx->num_cs_dw_timer_queries_suspend);
+}
+
/* Get backends mask */
void r600_query_init_backend_mask(struct r600_common_context *ctx)
{
@@ -919,7 +983,6 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
if (results) {
memset(results, 0, ctx->max_db * 4 * 4);
- ctx->ws->buffer_unmap(buffer->cs_buf);
/* emit EVENT_WRITE for ZPASS_DONE */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
@@ -937,7 +1000,6 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
if (results[i*4 + 1])
mask |= (1<ws->buffer_unmap(buffer->cs_buf);
}
}
@@ -966,4 +1028,5 @@ void r600_query_init(struct r600_common_context *rctx)
rctx->b.render_condition = r600_render_condition;
LIST_INITHEAD(&rctx->active_nontimer_queries);
+ LIST_INITHEAD(&rctx->active_timer_queries);
}
diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c
index bc8bf97ef89..0853f636a27 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -88,8 +88,7 @@ void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
12 + /* flush_vgt_streamout */
num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
- begin->num_dw = 12 + /* flush_vgt_streamout */
- 3; /* VGT_STRMOUT_BUFFER_CONFIG */
+ begin->num_dw = 12; /* flush_vgt_streamout */
if (rctx->chip_class >= SI) {
begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */
@@ -105,7 +104,7 @@ void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
(num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
(rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
- begin->dirty = true;
+ rctx->set_atom_dirty(rctx, begin, true);
r600_set_streamout_enable(rctx, true);
}
@@ -146,7 +145,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
if (num_targets) {
r600_streamout_buffers_dirty(rctx);
} else {
- rctx->streamout.begin_atom.dirty = false;
+ rctx->set_atom_dirty(rctx, &rctx->streamout.begin_atom, false);
r600_set_streamout_enable(rctx, false);
}
}
@@ -192,11 +191,6 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
r600_flush_vgt_streamout(rctx);
- r600_write_context_reg(cs, rctx->chip_class >= EVERGREEN ?
- R_028B98_VGT_STRMOUT_BUFFER_CONFIG :
- R_028B20_VGT_STRMOUT_BUFFER_EN,
- rctx->streamout.enabled_mask);
-
for (i = 0; i < rctx->streamout.num_targets; i++) {
if (!t[i])
continue;
@@ -326,20 +320,42 @@ static bool r600_get_strmout_en(struct r600_common_context *rctx)
static void r600_emit_streamout_enable(struct r600_common_context *rctx,
struct r600_atom *atom)
{
- r600_write_context_reg(rctx->rings.gfx.cs,
- rctx->chip_class >= EVERGREEN ?
- R_028B94_VGT_STRMOUT_CONFIG :
- R_028AB0_VGT_STRMOUT_EN,
- S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx)));
+ unsigned strmout_config_reg = R_028AB0_VGT_STRMOUT_EN;
+ unsigned strmout_config_val = S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx));
+ unsigned strmout_buffer_reg = R_028B20_VGT_STRMOUT_BUFFER_EN;
+ unsigned strmout_buffer_val = rctx->streamout.hw_enabled_mask &
+ rctx->streamout.enabled_stream_buffers_mask;
+
+ if (rctx->chip_class >= EVERGREEN) {
+ strmout_buffer_reg = R_028B98_VGT_STRMOUT_BUFFER_CONFIG;
+
+ strmout_config_reg = R_028B94_VGT_STRMOUT_CONFIG;
+ strmout_config_val |=
+ S_028B94_RAST_STREAM(0) |
+ S_028B94_STREAMOUT_1_EN(r600_get_strmout_en(rctx)) |
+ S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
+ S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
+ }
+ r600_write_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+ r600_write_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
}
static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
{
bool old_strmout_en = r600_get_strmout_en(rctx);
+ unsigned old_hw_enabled_mask = rctx->streamout.hw_enabled_mask;
rctx->streamout.streamout_enabled = enable;
- if (old_strmout_en != r600_get_strmout_en(rctx))
- rctx->streamout.enable_atom.dirty = true;
+
+ rctx->streamout.hw_enabled_mask = rctx->streamout.enabled_mask |
+ (rctx->streamout.enabled_mask << 4) |
+ (rctx->streamout.enabled_mask << 8) |
+ (rctx->streamout.enabled_mask << 12);
+
+ if ((old_strmout_en != r600_get_strmout_en(rctx)) ||
+ (old_hw_enabled_mask != rctx->streamout.hw_enabled_mask)) {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+ }
}
void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
@@ -354,8 +370,9 @@ void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
rctx->streamout.prims_gen_query_enabled =
rctx->streamout.num_prims_gen_queries != 0;
- if (old_strmout_en != r600_get_strmout_en(rctx))
- rctx->streamout.enable_atom.dirty = true;
+ if (old_strmout_en != r600_get_strmout_en(rctx)) {
+ rctx->set_atom_dirty(rctx, &rctx->streamout.enable_atom, true);
+ }
}
}
@@ -365,5 +382,5 @@ void r600_streamout_init(struct r600_common_context *rctx)
rctx->b.stream_output_target_destroy = r600_so_target_destroy;
rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
- rctx->streamout.enable_atom.num_dw = 3;
+ rctx->streamout.enable_atom.num_dw = 6;
}
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index dc510c99749..54696910e43 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -243,10 +243,11 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
surface->level[0].mode >= RADEON_SURF_MODE_2D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
+ surface->pipe_config,
surface->bankw, surface->bankh,
surface->tile_split,
surface->stencil_tile_split,
- surface->mtilea,
+ surface->mtilea, surface->num_banks,
surface->level[0].pitch_bytes,
(surface->flags & RADEON_SURF_SCANOUT) != 0);
@@ -489,7 +490,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
unsigned num_pipes = rscreen->tiling_info.num_channels;
if (rscreen->chip_class <= EVERGREEN &&
- rscreen->info.drm_minor < 26)
+ rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
return 0;
/* HW bug on R6xx. */
@@ -501,7 +502,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
/* HTILE is broken with 1D tiling on old kernels and CIK. */
if (rscreen->chip_class >= CIK &&
rtex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
- rscreen->info.drm_minor < 38)
+ rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
return 0;
switch (num_pipes) {
@@ -706,6 +707,7 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
const struct pipe_resource *templ)
{
const struct util_format_description *desc = util_format_description(templ->format);
+ bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
/* MSAA resources must be 2D tiled. */
if (templ->nr_samples > 1)
@@ -715,10 +717,16 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
+ /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
+ if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
+ (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
+ (templ->target == PIPE_TEXTURE_2D ||
+ templ->target == PIPE_TEXTURE_3D))
+ force_tiling = true;
+
/* Handle common candidates for the linear mode.
* Compressed textures must always be tiled. */
- if (!(templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) &&
- !util_format_is_compressed(templ->format)) {
+ if (!force_tiling && !util_format_is_compressed(templ->format)) {
/* Not everything can be linear, so we cannot enforce it
* for all textures. */
if ((rscreen->debug_flags & DBG_NO_TILING) &&
@@ -934,7 +942,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
use_staging_texture = TRUE;
} else if (!(usage & PIPE_TRANSFER_READ) &&
(r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
- rctx->ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) {
+ !rctx->ws->buffer_wait(rtex->resource.buf, 0, RADEON_USAGE_READWRITE))) {
/* Use a staging texture for uploads if the underlying BO is busy. */
use_staging_texture = TRUE;
}
@@ -1059,18 +1067,9 @@ static void r600_texture_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer* transfer)
{
struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
- struct r600_common_context *rctx = (struct r600_common_context*)ctx;
- struct radeon_winsys_cs_handle *buf;
struct pipe_resource *texture = transfer->resource;
struct r600_texture *rtex = (struct r600_texture*)texture;
- if (rtransfer->staging) {
- buf = rtransfer->staging->cs_buf;
- } else {
- buf = r600_resource(transfer->resource)->cs_buf;
- }
- rctx->ws->buffer_unmap(buf);
-
if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
ctx->resource_copy_region(ctx, texture, transfer->level,
@@ -1262,7 +1261,9 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
/* fast color clear with 1D tiling doesn't work on old kernels and CIK */
if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
- rctx->chip_class >= CIK && rctx->screen->info.drm_minor < 38) {
+ rctx->chip_class >= CIK &&
+ rctx->screen->info.drm_major == 2 &&
+ rctx->screen->info.drm_minor < 38) {
continue;
}
@@ -1278,7 +1279,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
tex->cmask.offset, tex->cmask.size, 0, true);
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
- fb_state->dirty = true;
+ rctx->set_atom_dirty(rctx, fb_state, true);
*buffers &= ~clear_bit;
}
}
diff --git a/src/gallium/drivers/radeon/r600d_common.h b/src/gallium/drivers/radeon/r600d_common.h
index 74c8d8782a6..115042d153e 100644
--- a/src/gallium/drivers/radeon/r600d_common.h
+++ b/src/gallium/drivers/radeon/r600d_common.h
@@ -66,6 +66,9 @@
#define PKT3_SET_SH_REG 0x76 /* SI and later */
#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1 0x1 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2 0x2 /* EG and later */
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS3 0x3 /* EG and later */
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
#define EVENT_TYPE_ZPASS_DONE 0x15
@@ -177,7 +180,7 @@
#define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19)
#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20)
#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21)
-#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((x) & 0x7) << 24)
+#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((x) & 0x07) << 24)
#define S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) & 0x1) << 27)
#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc
#define S_028BDC_EXPAND_LINE_WIDTH(x) (((x) & 0x1) << 9)
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c
index 9b508227fd4..2e45d439e7a 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.c
+++ b/src/gallium/drivers/radeon/radeon_elf_util.c
@@ -103,8 +103,7 @@ static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
}
void radeon_elf_read(const char *elf_data, unsigned elf_size,
- struct radeon_shader_binary *binary,
- unsigned debug)
+ struct radeon_shader_binary *binary)
{
char *elf_buffer;
Elf *elf;
@@ -124,7 +123,6 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size,
elf = elf_memory(elf_buffer, elf_size);
elf_getshdrstrndx(elf, §ion_str_index);
- binary->disassembled = 0;
while ((section = elf_nextscn(elf, section))) {
const char *name;
@@ -145,12 +143,11 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size,
binary->config_size = section_data->d_size;
binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
memcpy(binary->config, section_data->d_buf, binary->config_size);
- } else if (debug && !strcmp(name, ".AMDGPU.disasm")) {
- binary->disassembled = 1;
+ } else if (!strcmp(name, ".AMDGPU.disasm")) {
+ /* Always read disassembly if it's available. */
section_data = elf_getdata(section, section_data);
- fprintf(stderr, "\nShader Disassembly:\n\n");
- fprintf(stderr, "%.*s\n", (int)section_data->d_size,
- (char *)section_data->d_buf);
+ binary->disasm_string = strndup(section_data->d_buf,
+ section_data->d_size);
} else if (!strncmp(name, ".rodata", 7)) {
section_data = elf_getdata(section, section_data);
binary->rodata_size = section_data->d_size;
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.h b/src/gallium/drivers/radeon/radeon_elf_util.h
index ab83f98ea69..ea4ab2f14b2 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.h
+++ b/src/gallium/drivers/radeon/radeon_elf_util.h
@@ -37,7 +37,7 @@ struct radeon_shader_reloc;
* radeon_shader_binary object.
*/
void radeon_elf_read(const char *elf_data, unsigned elf_size,
- struct radeon_shader_binary *binary, unsigned debug);
+ struct radeon_shader_binary *binary);
/**
* @returns A pointer to the start of the configuration information for
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h
index 6a9557b0b73..e967ad2214e 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -58,7 +58,6 @@ struct radeon_llvm_context {
unsigned type;
unsigned face_gpr;
unsigned two_side;
- unsigned clip_vertex;
unsigned inputs_count;
struct r600_shader_io * r600_inputs;
struct r600_shader_io * r600_outputs;
@@ -72,21 +71,6 @@ struct radeon_llvm_context {
/*=== Front end configuration ===*/
- /* Special Intrinsics */
-
- /** Write to an output register: float store_output(float, i32) */
- const char * store_output_intr;
-
- /** Swizzle a vector value: <4 x float> swizzle(<4 x float>, i32)
- * The swizzle is an unsigned integer that encodes a TGSI_SWIZZLE_* value
- * in 2-bits.
- * Swizzle{0-1} = X Channel
- * Swizzle{2-3} = Y Channel
- * Swizzle{4-5} = Z Channel
- * Swizzle{6-7} = W Channel
- */
- const char * swizzle_intr;
-
/* Instructions that are not described by any of the TGSI opcodes. */
/** This function is responsible for initilizing the inputs array and will be
@@ -100,9 +84,6 @@ struct radeon_llvm_context {
unsigned index,
const struct tgsi_full_declaration *decl);
- /** User data to use with the callbacks */
- void * userdata;
-
/** This array contains the input values for the shader. Typically these
* values will be in the form of a target intrinsic that will inform the
* backend how to load the actual inputs to the shader.
@@ -146,6 +127,8 @@ static inline LLVMTypeRef tgsi2llvmtype(
case TGSI_TYPE_UNSIGNED:
case TGSI_TYPE_SIGNED:
return LLVMInt32TypeInContext(ctx);
+ case TGSI_TYPE_DOUBLE:
+ return LLVMDoubleTypeInContext(ctx);
case TGSI_TYPE_UNTYPED:
case TGSI_TYPE_FLOAT:
return LLVMFloatTypeInContext(ctx);
@@ -171,8 +154,9 @@ static inline LLVMValueRef bitcast(
void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data,
- LLVMValueRef *coords_arg);
+ struct lp_build_emit_data * emit_data,
+ LLVMValueRef *coords_arg,
+ LLVMValueRef *derivs_arg);
void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
@@ -191,20 +175,29 @@ unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan);
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx);
-LLVMValueRef
-build_intrinsic(LLVMBuilderRef builder,
- const char *name,
- LLVMTypeRef ret_type,
- LLVMValueRef *args,
- unsigned num_args,
- LLVMAttribute attr);
-
void
build_tgsi_intrinsic_nomem(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data);
+LLVMValueRef
+radeon_llvm_emit_fetch_double(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef ptr,
+ LLVMValueRef ptr2);
+LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef value);
+
+LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type,
+ unsigned swizzle);
+
+void radeon_llvm_emit_store(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction * inst,
+ const struct tgsi_opcode_info * info,
+ LLVMValueRef dst[4]);
#endif /* RADEON_LLVM_H */
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 25580b6bd4c..00025590137 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -62,6 +62,8 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
switch (type) {
case TGSI_PROCESSOR_VERTEX:
+ case TGSI_PROCESSOR_TESS_CTRL:
+ case TGSI_PROCESSOR_TESS_EVAL:
llvm_type = RADEON_LLVM_SHADER_VS;
break;
case TGSI_PROCESSOR_GEOMETRY:
@@ -142,7 +144,8 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
* @returns 0 for success, 1 for failure
*/
unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
- const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm)
+ const char *gpu_family, bool dump_ir, bool dump_asm,
+ LLVMTargetMachineRef tm)
{
char cpu[CPU_STRING_LEN];
@@ -165,17 +168,15 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
}
strncpy(cpu, gpu_family, CPU_STRING_LEN);
memset(fs, 0, sizeof(fs));
- if (dump) {
+ if (dump_asm)
strncpy(fs, "+DumpCode", FS_STRING_LEN);
- }
tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
LLVMCodeGenLevelDefault, LLVMRelocDefault,
LLVMCodeModelDefault);
dispose_tm = true;
}
- if (dump) {
+ if (dump_ir)
LLVMDumpModule(M);
- }
/* Setup Diagnostic Handler*/
llvm_ctx = LLVMGetModuleContext(M);
@@ -204,7 +205,7 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
buffer_size = LLVMGetBufferSize(out_buffer);
buffer_data = LLVMGetBufferStart(out_buffer);
- radeon_elf_read(buffer_data, buffer_size, binary, dump);
+ radeon_elf_read(buffer_data, buffer_size, binary);
/* Clean up */
LLVMDisposeMemoryBuffer(out_buffer);
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.h b/src/gallium/drivers/radeon/radeon_llvm_emit.h
index 3ccef78e36d..e20aed94c6b 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.h
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.h
@@ -29,6 +29,7 @@
#include
#include
+#include
struct radeon_shader_binary;
@@ -36,11 +37,8 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
-unsigned radeon_llvm_compile(
- LLVMModuleRef M,
- struct radeon_shader_binary *binary,
- const char * gpu_family,
- unsigned dump,
- LLVMTargetMachineRef tm);
+unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
+ const char *gpu_family, bool dump_ir, bool dump_asm,
+ LLVMTargetMachineRef tm);
#endif /* RADEON_LLVM_EMIT_H */
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index c8c980d9d32..56694700a47 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -109,12 +109,27 @@ emit_array_index(
return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
}
-static LLVMValueRef
-emit_fetch(
+LLVMValueRef
+radeon_llvm_emit_fetch_double(
struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle);
+ LLVMValueRef ptr,
+ LLVMValueRef ptr2)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMValueRef result;
+
+ result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
+
+ result = LLVMBuildInsertElement(builder,
+ result,
+ bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
+ bld_base->int_bld.zero, "");
+ result = LLVMBuildInsertElement(builder,
+ result,
+ bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
+ bld_base->int_bld.one, "");
+ return bitcast(bld_base, TGSI_TYPE_DOUBLE, result);
+}
static LLVMValueRef
emit_array_fetch(
@@ -136,7 +151,7 @@ emit_array_fetch(
for (i = 0; i < size; ++i) {
tmp_reg.Register.Index = i + range.First;
- LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle);
+ LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
result = LLVMBuildInsertElement(builder, result, temp,
lp_build_const_int32(gallivm, i), "");
}
@@ -150,23 +165,21 @@ static bool uses_temp_indirect_addressing(
return (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
}
-static LLVMValueRef
-emit_fetch(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle)
+LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type,
+ unsigned swizzle)
{
struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMValueRef result = NULL, ptr;
+ LLVMValueRef result = NULL, ptr, ptr2;
if (swizzle == ~0) {
LLVMValueRef values[TGSI_NUM_CHANNELS];
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- values[chan] = emit_fetch(bld_base, reg, type, chan);
+ values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
}
return lp_build_gather_values(bld_base->base.gallivm, values,
TGSI_NUM_CHANNELS);
@@ -184,11 +197,27 @@ emit_fetch(
switch(reg->Register.File) {
case TGSI_FILE_IMMEDIATE: {
LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
- return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+ if (type == TGSI_TYPE_DOUBLE) {
+ result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
+ result = LLVMConstInsertElement(result,
+ bld->immediates[reg->Register.Index][swizzle],
+ bld_base->int_bld.zero);
+ result = LLVMConstInsertElement(result,
+ bld->immediates[reg->Register.Index][swizzle + 1],
+ bld_base->int_bld.one);
+ return LLVMConstBitCast(result, ctype);
+ } else {
+ return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+ }
}
case TGSI_FILE_INPUT:
result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+ if (type == TGSI_TYPE_DOUBLE) {
+ ptr = result;
+ ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
+ return radeon_llvm_emit_fetch_double(bld_base, ptr, ptr2);
+ }
break;
case TGSI_FILE_TEMPORARY:
@@ -199,11 +228,23 @@ emit_fetch(
break;
}
ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
+ if (type == TGSI_TYPE_DOUBLE) {
+ ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
+ return radeon_llvm_emit_fetch_double(bld_base,
+ LLVMBuildLoad(builder, ptr, ""),
+ LLVMBuildLoad(builder, ptr2, ""));
+ }
result = LLVMBuildLoad(builder, ptr, "");
break;
case TGSI_FILE_OUTPUT:
ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
+ if (type == TGSI_TYPE_DOUBLE) {
+ ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
+ return radeon_llvm_emit_fetch_double(bld_base,
+ LLVMBuildLoad(builder, ptr, ""),
+ LLVMBuildLoad(builder, ptr2, ""));
+ }
result = LLVMBuildLoad(builder, ptr, "");
break;
@@ -321,8 +362,8 @@ static void emit_declaration(
}
}
-static LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef value)
+LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef value)
{
struct lp_build_emit_data clamp_emit_data;
@@ -336,8 +377,7 @@ static LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
&clamp_emit_data);
}
-static void
-emit_store(
+void radeon_llvm_emit_store(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_instruction * inst,
const struct tgsi_opcode_info * info,
@@ -348,9 +388,10 @@ emit_store(
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
const struct tgsi_full_dst_register *reg = &inst->Dst[0];
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
- LLVMValueRef temp_ptr;
+ LLVMValueRef temp_ptr, temp_ptr2 = NULL;
unsigned chan, chan_index;
boolean is_vec_store = FALSE;
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
if (dst[0]) {
LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
@@ -371,6 +412,8 @@ emit_store(
TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
LLVMValueRef value = dst[chan_index];
+ if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
+ continue;
if (inst->Instruction.Saturate)
value = radeon_llvm_saturate(bld_base, value);
@@ -379,8 +422,9 @@ emit_store(
LLVMBuildStore(builder, value, temp_ptr);
continue;
}
-
- value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+
+ if (dtype != TGSI_TYPE_DOUBLE)
+ value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
if (reg->Register.Indirect) {
struct tgsi_declaration_range range = get_array_range(bld_base,
@@ -418,6 +462,8 @@ emit_store(
switch(reg->Register.File) {
case TGSI_FILE_OUTPUT:
temp_ptr = bld->outputs[reg->Register.Index][chan_index];
+ if (dtype == TGSI_TYPE_DOUBLE)
+ temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
break;
case TGSI_FILE_TEMPORARY:
@@ -428,12 +474,28 @@ emit_store(
break;
}
temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
+ if (dtype == TGSI_TYPE_DOUBLE)
+ temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
+
break;
default:
return;
}
- LLVMBuildStore(builder, value, temp_ptr);
+ if (dtype != TGSI_TYPE_DOUBLE)
+ LLVMBuildStore(builder, value, temp_ptr);
+ else {
+ LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
+ LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
+ LLVMValueRef val2;
+ value = LLVMBuildExtractElement(builder, ptr,
+ bld_base->uint_bld.zero, "");
+ val2 = LLVMBuildExtractElement(builder, ptr,
+ bld_base->uint_bld.one, "");
+
+ LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
+ LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
+ }
}
}
}
@@ -686,34 +748,26 @@ static void kil_emit(
}
}
-void radeon_llvm_emit_prepare_cube_coords(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data,
- LLVMValueRef *coords_arg)
+static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef *in, LLVMValueRef *out)
{
-
- unsigned target = emit_data->inst->Texture.Texture;
- unsigned opcode = emit_data->inst->Instruction.Opcode;
struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef type = bld_base->base.elem_type;
LLVMValueRef coords[4];
LLVMValueRef mad_args[3];
- LLVMValueRef idx;
- struct LLVMOpaqueValue *cube_vec;
- LLVMValueRef v;
+ LLVMValueRef v, cube_vec;
unsigned i;
- cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4);
- v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
+ cube_vec = lp_build_gather_values(bld_base->base.gallivm, in, 4);
+ v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
&cube_vec, 1, LLVMReadNoneAttribute);
- for (i = 0; i < 4; ++i) {
- idx = lp_build_const_int32(gallivm, i);
- coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
- }
+ for (i = 0; i < 4; ++i)
+ coords[i] = LLVMBuildExtractElement(builder, v,
+ lp_build_const_int32(gallivm, i), "");
- coords[2] = build_intrinsic(builder, "fabs",
+ coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
type, &coords[2], 1, LLVMReadNoneAttribute);
coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
@@ -729,10 +783,60 @@ void radeon_llvm_emit_prepare_cube_coords(
mad_args[0], mad_args[1], mad_args[2]);
/* apply xyz = yxw swizzle to cooords */
- coords[2] = coords[3];
- coords[3] = coords[1];
- coords[1] = coords[0];
- coords[0] = coords[3];
+ out[0] = coords[1];
+ out[1] = coords[0];
+ out[2] = coords[3];
+}
+
+void radeon_llvm_emit_prepare_cube_coords(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data,
+ LLVMValueRef *coords_arg,
+ LLVMValueRef *derivs_arg)
+{
+
+ unsigned target = emit_data->inst->Texture.Texture;
+ unsigned opcode = emit_data->inst->Instruction.Opcode;
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef coords[4];
+ unsigned i;
+
+ radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
+
+ if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
+ LLVMValueRef derivs[4];
+ int axis;
+
+ /* Convert cube derivatives to 2D derivatives. */
+ for (axis = 0; axis < 2; axis++) {
+ LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
+
+ /* Shift the cube coordinates by the derivatives to get
+ * the cube coordinates of the "neighboring pixel".
+ */
+ for (i = 0; i < 3; i++)
+ shifted_cube_coords[i] =
+ LLVMBuildFAdd(builder, coords_arg[i],
+ derivs_arg[axis*3+i], "");
+ shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
+
+ /* Project the shifted cube coordinates onto the face. */
+ radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
+ shifted_coords);
+
+ /* Subtract both sets of 2D coordinates to get 2D derivatives.
+ * This won't work if the shifted coordinates ended up
+ * in a different face.
+ */
+ for (i = 0; i < 2; i++)
+ derivs[axis * 2 + i] =
+ LLVMBuildFSub(builder, shifted_coords[i],
+ coords[i], "");
+ }
+
+ memcpy(derivs_arg, derivs, sizeof(derivs));
+ }
if (target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
@@ -756,140 +860,6 @@ void radeon_llvm_emit_prepare_cube_coords(
memcpy(coords_arg, coords, sizeof(coords));
}
-static void txd_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
-
- LLVMValueRef coords[4];
- unsigned chan, src;
- for (src = 0; src < 3; src++) {
- for (chan = 0; chan < 4; chan++)
- coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
-
- emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- }
- emit_data->arg_count = 3;
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-
-static void txp_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
- LLVMValueRef src_w;
- unsigned chan;
- LLVMValueRef coords[5];
-
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
- src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
-
- for (chan = 0; chan < 3; chan++ ) {
- LLVMValueRef arg = lp_build_emit_fetch(bld_base,
- emit_data->inst, 0, chan);
- coords[chan] = lp_build_emit_llvm_binary(bld_base,
- TGSI_OPCODE_DIV, arg, src_w);
- }
- coords[3] = bld_base->base.one;
-
- if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
- }
-
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- emit_data->arg_count = 1;
-}
-
-static void tex_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- /* XXX: lp_build_swizzle_aos() was failing with wrong arg types,
- * when we used CHAN_ALL. We should be able to get this to work,
- * but for now we will swizzle it ourselves
- emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
- 0, CHAN_ALL);
-
- */
-
- const struct tgsi_full_instruction * inst = emit_data->inst;
-
- LLVMValueRef coords[5];
- unsigned chan;
- for (chan = 0; chan < 4; chan++) {
- coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
- }
-
- if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
- inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
- /* These instructions have additional operand that should be packed
- * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
- * That operand should be passed as a float value in the args array
- * right after the coord vector. After packing it's not used anymore,
- * that's why arg_count is not increased */
- coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
- }
-
- if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
- inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
- }
-
- emit_data->arg_count = 1;
- emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
- coords, 4);
- emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
-}
-
-static void txf_fetch_args(
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- const struct tgsi_full_instruction * inst = emit_data->inst;
- struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
- const struct tgsi_texture_offset * off = inst->TexOffsets;
- LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
-
- /* fetch tex coords */
- tex_fetch_args(bld_base, emit_data);
-
- /* fetch tex offsets */
- if (inst->Texture.NumOffsets) {
- assert(inst->Texture.NumOffsets == 1);
-
- emit_data->args[1] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleX],
- offset_type);
- emit_data->args[2] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleY],
- offset_type);
- emit_data->args[3] = LLVMConstBitCast(
- bld->immediates[off->Index][off->SwizzleZ],
- offset_type);
- } else {
- emit_data->args[1] = bld_base->int_bld.zero;
- emit_data->args[2] = bld_base->int_bld.zero;
- emit_data->args[3] = bld_base->int_bld.zero;
- }
-
- emit_data->arg_count = 4;
-}
-
static void emit_icmp(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -996,6 +966,35 @@ static void emit_fcmp(
emit_data->output[emit_data->chan] = v;
}
+static void emit_dcmp(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+ LLVMRealPredicate pred;
+
+ /* Use ordered for everything but NE (which is usual for
+ * float comparisons)
+ */
+ switch (emit_data->inst->Instruction.Opcode) {
+ case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
+ case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
+ case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
+ case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
+ default: assert(!"unknown instruction"); pred = 0; break;
+ }
+
+ LLVMValueRef v = LLVMBuildFCmp(builder, pred,
+ emit_data->args[0], emit_data->args[1],"");
+
+ v = LLVMBuildSExtOrBitCast(builder, v,
+ LLVMInt32TypeInContext(context), "");
+
+ emit_data->output[emit_data->chan] = v;
+}
+
static void emit_not(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -1161,6 +1160,40 @@ static void emit_ineg(
emit_data->args[0], "");
}
+static void emit_dneg(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
+ emit_data->args[0], "");
+}
+
+static void emit_frac(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ char *intr;
+
+ if (emit_data->info->opcode == TGSI_OPCODE_FRC)
+ intr = "llvm.floor.f32";
+ else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
+ intr = "llvm.floor.f64";
+ else {
+ assert(0);
+ return;
+ }
+
+ LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
+ &emit_data->args[0], 1,
+ LLVMReadNoneAttribute);
+ emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
+ emit_data->args[0], floor, "");
+}
+
static void emit_f2i(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -1215,58 +1248,16 @@ static void emit_immediate(struct lp_build_tgsi_context * bld_base,
ctx->soa.num_immediates++;
}
-LLVMValueRef
-build_intrinsic(LLVMBuilderRef builder,
- const char *name,
- LLVMTypeRef ret_type,
- LLVMValueRef *args,
- unsigned num_args,
- LLVMAttribute attr)
-{
- LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
- LLVMValueRef function;
-
- function = LLVMGetNamedFunction(module, name);
- if(!function) {
- LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
- unsigned i;
-
- assert(num_args <= LP_MAX_FUNC_ARGS);
-
- for(i = 0; i < num_args; ++i) {
- assert(args[i]);
- arg_types[i] = LLVMTypeOf(args[i]);
- }
-
- function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
-
- if (attr)
- LLVMAddFunctionAttr(function, attr);
- }
-
- return LLVMBuildCall(builder, function, args, num_args, "");
-}
-
-static void build_tgsi_intrinsic(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data,
- LLVMAttribute attr)
-{
- struct lp_build_context * base = &bld_base->base;
- emit_data->output[emit_data->chan] = build_intrinsic(
- base->gallivm->builder, action->intr_name,
- emit_data->dst_type, emit_data->args,
- emit_data->arg_count, attr);
-}
-
void
-build_tgsi_intrinsic_nomem(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
+build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
{
- build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute);
+ struct lp_build_context * base = &bld_base->base;
+ emit_data->output[emit_data->chan] =
+ lp_build_intrinsic(base->gallivm->builder, action->intr_name,
+ emit_data->dst_type, emit_data->args,
+ emit_data->arg_count, LLVMReadNoneAttribute);
}
static void emit_bfi(const struct lp_build_tgsi_action * action,
@@ -1322,7 +1313,7 @@ static void emit_lsb(const struct lp_build_tgsi_action * action,
};
emit_data->output[emit_data->chan] =
- build_intrinsic(gallivm->builder, "llvm.cttz.i32",
+ lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
emit_data->dst_type, args, Elements(args),
LLVMReadNoneAttribute);
}
@@ -1341,7 +1332,7 @@ static void emit_umsb(const struct lp_build_tgsi_action * action,
};
LLVMValueRef msb =
- build_intrinsic(builder, "llvm.ctlz.i32",
+ lp_build_intrinsic(builder, "llvm.ctlz.i32",
emit_data->dst_type, args, Elements(args),
LLVMReadNoneAttribute);
@@ -1368,7 +1359,7 @@ static void emit_imsb(const struct lp_build_tgsi_action * action,
LLVMValueRef arg = emit_data->args[0];
LLVMValueRef msb =
- build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
+ lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
emit_data->dst_type, &arg, 1,
LLVMReadNoneAttribute);
@@ -1407,12 +1398,8 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
ctx->gallivm.context);
ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
- ctx->store_output_intr = "llvm.AMDGPU.store.output.";
- ctx->swizzle_intr = "llvm.AMDGPU.swizzle";
struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
- /* XXX: We need to revisit this.I think the correct way to do this is
- * to use length = 4 here and use the elem_bld for everything. */
type.floating = TRUE;
type.fixed = FALSE;
type.sign = TRUE;
@@ -1423,28 +1410,32 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
+ {
+ struct lp_type dbl_type;
+ dbl_type = type;
+ dbl_type.width *= 2;
+ lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
+ }
bld_base->soa = 1;
- bld_base->emit_store = emit_store;
+ bld_base->emit_store = radeon_llvm_emit_store;
bld_base->emit_swizzle = emit_swizzle;
bld_base->emit_declaration = emit_declaration;
bld_base->emit_immediate = emit_immediate;
- bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch;
- bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch;
- bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch;
- bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch;
+ bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
+ bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
+ bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
+ bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
/* Allocate outputs */
ctx->soa.outputs = ctx->outputs;
- /* XXX: Is there a better way to initialize all this ? */
-
lp_set_default_actions(bld_base);
bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs";
+ bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
@@ -1453,7 +1444,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";
bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
+ bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
@@ -1461,21 +1452,30 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
- bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
- bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
- bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
+ bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
+ bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
+ bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
+ bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
+ bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
+ bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
+ bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
+ bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.AMDGPU.rsq.f64";
+ bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
+ bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor";
+ bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
- bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
- bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
+ bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
@@ -1520,6 +1520,9 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
+ bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name =
+ HAVE_LLVM >= 0x0305 ? "llvm.AMDGPU.rsq.clamped.f32" : "llvm.AMDGPU.rsq";
+ bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
@@ -1532,26 +1535,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
- bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
- bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
- bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
- bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
- bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
- bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
- bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
- bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
- bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
@@ -1571,13 +1554,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
-
- bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem;
-#if HAVE_LLVM >= 0x0305
- bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq.clamped.f32";
-#else
- bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq";
-#endif
}
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index be58d0b9ce3..16ee5410273 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -57,6 +57,7 @@
#define FB_BUFFER_OFFSET 0x1000
#define FB_BUFFER_SIZE 2048
+#define IT_SCALING_TABLE_SIZE 992
/* UVD decoder representation */
struct ruvd_decoder {
@@ -65,6 +66,7 @@ struct ruvd_decoder {
ruvd_set_dtb set_dtb;
unsigned stream_handle;
+ unsigned stream_type;
unsigned frame_number;
struct pipe_screen *screen;
@@ -73,15 +75,18 @@ struct ruvd_decoder {
unsigned cur_buffer;
- struct rvid_buffer msg_fb_buffers[NUM_BUFFERS];
+ struct rvid_buffer msg_fb_it_buffers[NUM_BUFFERS];
struct ruvd_msg *msg;
uint32_t *fb;
+ uint8_t *it;
struct rvid_buffer bs_buffers[NUM_BUFFERS];
void* bs_ptr;
unsigned bs_size;
struct rvid_buffer dpb;
+ bool use_legacy;
+ struct rvid_buffer ctx;
};
/* flush IB to the hardware */
@@ -107,19 +112,34 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
RADEON_PRIO_MIN);
- set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
- set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
+ if (!dec->use_legacy) {
+ uint64_t addr;
+ addr = dec->ws->buffer_get_virtual_address(cs_buf);
+ addr = addr + off;
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA0, addr);
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA1, addr >> 32);
+ } else {
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
+ set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
+ }
set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
}
-/* map the next available message/feedback buffer */
-static void map_msg_fb_buf(struct ruvd_decoder *dec)
+/* do the codec needs an IT buffer ?*/
+static bool have_it(struct ruvd_decoder *dec)
+{
+ return dec->stream_type == RUVD_CODEC_H264_PERF ||
+ dec->stream_type == RUVD_CODEC_H265;
+}
+
+/* map the next available message/feedback/itscaling buffer */
+static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
{
struct rvid_buffer* buf;
uint8_t *ptr;
/* grab the current message/feedback buffer */
- buf = &dec->msg_fb_buffers[dec->cur_buffer];
+ buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* and map it for CPU access */
ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs, PIPE_TRANSFER_WRITE);
@@ -127,6 +147,8 @@ static void map_msg_fb_buf(struct ruvd_decoder *dec)
/* calc buffer offsets */
dec->msg = (struct ruvd_msg *)ptr;
dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
+ if (have_it(dec))
+ dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
}
/* unmap and send a message command to the VCPU */
@@ -139,12 +161,13 @@ static void send_msg_buf(struct ruvd_decoder *dec)
return;
/* grab the current message buffer */
- buf = &dec->msg_fb_buffers[dec->cur_buffer];
+ buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
/* unmap the buffer */
dec->ws->buffer_unmap(buf->res->cs_buf);
dec->msg = NULL;
dec->fb = NULL;
+ dec->it = NULL;
/* and send it to the hardware */
send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0,
@@ -159,11 +182,12 @@ static void next_buffer(struct ruvd_decoder *dec)
}
/* convert the profile into something UVD understands */
-static uint32_t profile2stream_type(enum pipe_video_profile profile)
+static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
{
- switch (u_reduce_video_profile(profile)) {
+ switch (u_reduce_video_profile(dec->base.profile)) {
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- return RUVD_CODEC_H264;
+ return (family >= CHIP_TONGA) ?
+ RUVD_CODEC_H264_PERF : RUVD_CODEC_H264;
case PIPE_VIDEO_FORMAT_VC1:
return RUVD_CODEC_VC1;
@@ -174,23 +198,46 @@ static uint32_t profile2stream_type(enum pipe_video_profile profile)
case PIPE_VIDEO_FORMAT_MPEG4:
return RUVD_CODEC_MPEG4;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ return RUVD_CODEC_H265;
+
default:
assert(0);
return 0;
}
}
+static unsigned calc_ctx_size(struct ruvd_decoder *dec)
+{
+ unsigned width_in_mb, height_in_mb, ctx_size;
+
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
+
+ unsigned max_references = dec->base.max_references + 1;
+
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
+
+ width = align (width, 16);
+ height = align (height, 16);
+ ctx_size = ((width + 255) / 16)*((height + 255) / 16) * 16 * max_references + 52 * 1024;
+ return ctx_size;
+}
+
/* calculate size of reference picture buffer */
-static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
+static unsigned calc_dpb_size(struct ruvd_decoder *dec)
{
unsigned width_in_mb, height_in_mb, image_size, dpb_size;
// always align them to MB size for dpb calculation
- unsigned width = align(templ->width, VL_MACROBLOCK_WIDTH);
- unsigned height = align(templ->height, VL_MACROBLOCK_HEIGHT);
+ unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
+ unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
// always one more for currently decoded picture
- unsigned max_references = templ->max_references + 1;
+ unsigned max_references = dec->base.max_references + 1;
// aligned size of a single frame
image_size = width * height;
@@ -201,19 +248,67 @@ static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
width_in_mb = width / VL_MACROBLOCK_WIDTH;
height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
- switch (u_reduce_video_profile(templ->profile)) {
- case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- // the firmware seems to allways assume a minimum of ref frames
- max_references = MAX2(NUM_H264_REFS, max_references);
+ switch (u_reduce_video_profile(dec->base.profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
+ if (!dec->use_legacy) {
+ unsigned fs_in_mb = width_in_mb * height_in_mb;
+ unsigned alignment = 64, num_dpb_buffer;
- // reference picture buffer
- dpb_size = image_size * max_references;
+ if (dec->stream_type == RUVD_CODEC_H264_PERF)
+ alignment = 256;
+ switch(dec->base.level) {
+ case 30:
+ num_dpb_buffer = 8100 / fs_in_mb;
+ break;
+ case 31:
+ num_dpb_buffer = 18000 / fs_in_mb;
+ break;
+ case 32:
+ num_dpb_buffer = 20480 / fs_in_mb;
+ break;
+ case 41:
+ num_dpb_buffer = 32768 / fs_in_mb;
+ break;
+ case 42:
+ num_dpb_buffer = 34816 / fs_in_mb;
+ break;
+ case 50:
+ num_dpb_buffer = 110400 / fs_in_mb;
+ break;
+ case 51:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ default:
+ num_dpb_buffer = 184320 / fs_in_mb;
+ break;
+ }
+ num_dpb_buffer++;
+ max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
+ dpb_size = image_size * max_references;
+ dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment);
+ dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
+ } else {
+ // the firmware seems to allways assume a minimum of ref frames
+ max_references = MAX2(NUM_H264_REFS, max_references);
+ // reference picture buffer
+ dpb_size = image_size * max_references;
+ // macroblock context buffer
+ dpb_size += width_in_mb * height_in_mb * max_references * 192;
+ // IT surface buffer
+ dpb_size += width_in_mb * height_in_mb * 32;
+ }
+ break;
+ }
- // macroblock context buffer
- dpb_size += width_in_mb * height_in_mb * max_references * 192;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ if (dec->base.width * dec->base.height >= 4096*2000)
+ max_references = MAX2(max_references, 8);
+ else
+ max_references = MAX2(max_references, 17);
- // IT surface buffer
- dpb_size += width_in_mb * height_in_mb * 32;
+ width = align (width, 16);
+ height = align (height, 16);
+ dpb_size = align((width * height * 3) / 2, 256) * max_references;
break;
case PIPE_VIDEO_FORMAT_VC1:
@@ -250,6 +345,8 @@ static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
// IT surface buffer
dpb_size += align(width_in_mb * height_in_mb * 32, 64);
+
+ dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
break;
default:
@@ -263,6 +360,12 @@ static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
return dpb_size;
}
+/* free associated data in the video buffer callback */
+static void ruvd_destroy_associated_data(void *data)
+{
+ /* NOOP, since we only use an intptr */
+}
+
/* get h264 specific message bits */
static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
{
@@ -286,10 +389,8 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
assert(0);
break;
}
- if (((dec->base.width * dec->base.height) >> 8) <= 1620)
- result.level = 30;
- else
- result.level = 41;
+
+ result.level = dec->base.level;
result.sps_info_flags = 0;
result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
@@ -338,6 +439,11 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
+ if (dec->stream_type == RUVD_CODEC_H264_PERF) {
+ memcpy(dec->it, result.scaling_list_4x4, 6*16);
+ memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);
+ }
+
result.num_ref_frames = pic->num_ref_frames;
result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
@@ -354,6 +460,151 @@ static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_
return result;
}
+/* get h265 specific message bits */
+static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video_buffer *target,
+ struct pipe_h265_picture_desc *pic)
+{
+ struct ruvd_h265 result;
+ unsigned i;
+
+ memset(&result, 0, sizeof(result));
+
+ result.sps_info_flags = 0;
+ result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
+ result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
+ result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
+ result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
+ result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
+ result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
+ result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
+ result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
+ result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
+ if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
+ result.sps_info_flags |= 1 << 9;
+
+ result.chroma_format = pic->pps->sps->chroma_format_idc;
+ result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+ result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+ result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+ result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
+ result.log2_min_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
+ result.log2_min_transform_block_size_minus2 = pic->pps->sps->log2_min_transform_block_size_minus2;
+ result.log2_diff_max_min_transform_block_size = pic->pps->sps->log2_diff_max_min_transform_block_size;
+ result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
+ result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
+ result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
+ result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
+ result.log2_min_pcm_luma_coding_block_size_minus3 = pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
+ result.log2_diff_max_min_pcm_luma_coding_block_size = pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
+ result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
+
+ result.pps_info_flags = 0;
+ result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
+ result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
+ result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
+ result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
+ result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
+ result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
+ result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
+ result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
+ result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
+ result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
+ result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
+ result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
+ result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
+ result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
+ result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
+ result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
+ result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
+ result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
+ result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
+ result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
+ //result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag; ???
+
+ result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
+ result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
+ result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
+ result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
+ result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
+ result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
+ result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
+ result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
+ result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
+ result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
+ result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
+ result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
+ result.init_qp_minus26 = pic->pps->init_qp_minus26;
+
+ for (i = 0; i < 19; ++i)
+ result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
+
+ for (i = 0; i < 21; ++i)
+ result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
+
+ result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
+ result.curr_idx = pic->CurrPicOrderCntVal;
+ result.curr_poc = pic->CurrPicOrderCntVal;
+
+ vl_video_buffer_set_associated_data(target, &dec->base,
+ (void *)(uintptr_t)pic->CurrPicOrderCntVal,
+ &ruvd_destroy_associated_data);
+
+ for (i = 0; i < 16; ++i) {
+ struct pipe_video_buffer *ref = pic->ref[i];
+ uintptr_t ref_pic = 0;
+
+ result.poc_list[i] = pic->PicOrderCntVal[i];
+
+ if (ref)
+ ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+ else
+ ref_pic = 0x7F;
+ result.ref_pic_list[i] = ref_pic;
+ }
+
+ for (i = 0; i < 8; ++i) {
+ result.ref_pic_set_st_curr_before[i] = 0xFF;
+ result.ref_pic_set_st_curr_after[i] = 0xFF;
+ result.ref_pic_set_lt_curr[i] = 0xFF;
+ }
+
+ for (i = 0; i < pic->NumPocStCurrBefore; ++i)
+ result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
+
+ for (i = 0; i < pic->NumPocStCurrAfter; ++i)
+ result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
+
+ for (i = 0; i < pic->NumPocLtCurr; ++i)
+ result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
+
+ for (i = 0; i < 6; ++i)
+ result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
+
+ for (i = 0; i < 2; ++i)
+ result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
+
+ memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
+ memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
+ memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
+ memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
+
+ /* TODO
+ result.highestTid;
+ result.isNonRef;
+
+ IDRPicFlag;
+ RAPPicFlag;
+ NumPocTotalCurr;
+ NumShortTermPictureSliceHeaderBits;
+ NumLongTermPictureSliceHeaderBits;
+
+ IsLongTerm[16];
+ */
+
+ return result;
+}
+
/* get vc1 specific message bits */
static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
{
@@ -556,7 +807,7 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
assert(decoder);
- map_msg_fb_buf(dec);
+ map_msg_fb_it_buf(dec);
memset(dec->msg, 0, sizeof(*dec->msg));
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_DESTROY;
@@ -568,21 +819,17 @@ static void ruvd_destroy(struct pipe_video_codec *decoder)
dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
+ rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
}
rvid_destroy_buffer(&dec->dpb);
+ if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC)
+ rvid_destroy_buffer(&dec->ctx);
FREE(dec);
}
-/* free associated data in the video buffer callback */
-static void ruvd_destroy_associated_data(void *data)
-{
- /* NOOP, since we only use an intptr */
-}
-
/**
* start decoding of a new frame
*/
@@ -670,7 +917,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
{
struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
struct radeon_winsys_cs_handle *dt;
- struct rvid_buffer *msg_fb_buf, *bs_buf;
+ struct rvid_buffer *msg_fb_it_buf, *bs_buf;
unsigned bs_size;
assert(decoder);
@@ -678,26 +925,27 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
if (!dec->bs_ptr)
return;
- msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer];
+ msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
bs_buf = &dec->bs_buffers[dec->cur_buffer];
bs_size = align(dec->bs_size, 128);
memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
dec->ws->buffer_unmap(bs_buf->res->cs_buf);
- map_msg_fb_buf(dec);
+ map_msg_fb_it_buf(dec);
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_DECODE;
dec->msg->stream_handle = dec->stream_handle;
dec->msg->status_report_feedback_number = dec->frame_number;
- dec->msg->body.decode.stream_type = profile2stream_type(dec->base.profile);
+ dec->msg->body.decode.stream_type = dec->stream_type;
dec->msg->body.decode.decode_flags = 0x1;
dec->msg->body.decode.width_in_samples = dec->base.width;
dec->msg->body.decode.height_in_samples = dec->base.height;
dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
dec->msg->body.decode.bsd_size = bs_size;
+ dec->msg->body.decode.db_pitch = dec->base.width;
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
@@ -706,6 +954,10 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
break;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ dec->msg->body.decode.codec.h265 = get_h265_msg(dec, target, (struct pipe_h265_picture_desc*)picture);
+ break;
+
case PIPE_VIDEO_FORMAT_VC1:
dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
break;
@@ -733,12 +985,19 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->cs_buf, 0,
RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+ if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
+ send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->cs_buf, 0,
+ RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+ }
send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->cs_buf,
0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
- send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->res->cs_buf,
+ send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->cs_buf,
FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
+ if (have_it(dec))
+ send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->cs_buf,
+ FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
set_reg(dec, RUVD_ENGINE_CNTL, 1);
flush(dec);
@@ -760,7 +1019,8 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
ruvd_set_dtb set_dtb)
{
struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
- unsigned dpb_size = calc_dpb_size(templ);
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
+ unsigned dpb_size;
unsigned width = templ->width, height = templ->height;
unsigned bs_buf_size;
struct radeon_info info;
@@ -791,6 +1051,9 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
if (!dec)
return NULL;
+ if (info.drm_major < 3)
+ dec->use_legacy = TRUE;
+
dec->base = *templ;
dec->base.context = context;
dec->base.width = width;
@@ -803,11 +1066,12 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
dec->base.end_frame = ruvd_end_frame;
dec->base.flush = ruvd_flush;
+ dec->stream_type = profile2stream_type(dec, info.family);
dec->set_dtb = set_dtb;
dec->stream_handle = rvid_alloc_stream_handle();
dec->screen = context->screen;
dec->ws = ws;
- dec->cs = ws->cs_create(ws, RING_UVD, NULL, NULL, NULL);
+ dec->cs = ws->cs_create(rctx->ctx, RING_UVD, NULL, NULL, NULL);
if (!dec->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
@@ -815,10 +1079,12 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
bs_buf_size = width * height * 512 / (16 * 16);
for (i = 0; i < NUM_BUFFERS; ++i) {
- unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
+ unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
- if (!rvid_create_buffer(dec->screen, &dec->msg_fb_buffers[i],
- msg_fb_size, PIPE_USAGE_STAGING)) {
+ if (have_it(dec))
+ msg_fb_it_size += IT_SCALING_TABLE_SIZE;
+ if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
+ msg_fb_it_size, PIPE_USAGE_STAGING)) {
RVID_ERR("Can't allocated message buffers.\n");
goto error;
}
@@ -829,10 +1095,12 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
goto error;
}
- rvid_clear_buffer(context, &dec->msg_fb_buffers[i]);
+ rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
rvid_clear_buffer(context, &dec->bs_buffers[i]);
}
+ dpb_size = calc_dpb_size(dec);
+
if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't allocated dpb.\n");
goto error;
@@ -840,14 +1108,23 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
rvid_clear_buffer(context, &dec->dpb);
- map_msg_fb_buf(dec);
+ if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC) {
+ unsigned ctx_size = calc_ctx_size(dec);
+ if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
+ RVID_ERR("Can't allocated context buffer.\n");
+ goto error;
+ }
+ rvid_clear_buffer(context, &dec->ctx);
+ }
+
+ map_msg_fb_it_buf(dec);
dec->msg->size = sizeof(*dec->msg);
dec->msg->msg_type = RUVD_MSG_CREATE;
dec->msg->stream_handle = dec->stream_handle;
- dec->msg->body.create.stream_type = profile2stream_type(dec->base.profile);
+ dec->msg->body.create.stream_type = dec->stream_type;
dec->msg->body.create.width_in_samples = dec->base.width;
dec->msg->body.create.height_in_samples = dec->base.height;
- dec->msg->body.create.dpb_size = dec->dpb.res->buf->size;
+ dec->msg->body.create.dpb_size = dpb_size;
send_msg_buf(dec);
flush(dec);
next_buffer(dec);
@@ -858,11 +1135,13 @@ error:
if (dec->cs) dec->ws->cs_destroy(dec->cs);
for (i = 0; i < NUM_BUFFERS; ++i) {
- rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
+ rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
rvid_destroy_buffer(&dec->bs_buffers[i]);
}
rvid_destroy_buffer(&dec->dpb);
+ if (u_reduce_video_profile(dec->base.profile) == PIPE_VIDEO_FORMAT_HEVC)
+ rvid_destroy_buffer(&dec->ctx);
FREE(dec);
diff --git a/src/gallium/drivers/radeon/radeon_uvd.h b/src/gallium/drivers/radeon/radeon_uvd.h
index 7442865c9ec..452fbd60880 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.h
+++ b/src/gallium/drivers/radeon/radeon_uvd.h
@@ -62,6 +62,8 @@
#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002
#define RUVD_CMD_FEEDBACK_BUFFER 0x00000003
#define RUVD_CMD_BITSTREAM_BUFFER 0x00000100
+#define RUVD_CMD_ITSCALING_TABLE_BUFFER 0x00000204
+#define RUVD_CMD_CONTEXT_BUFFER 0x00000206
/* UVD message types */
#define RUVD_MSG_CREATE 0
@@ -73,6 +75,8 @@
#define RUVD_CODEC_VC1 0x00000001
#define RUVD_CODEC_MPEG2 0x00000003
#define RUVD_CODEC_MPEG4 0x00000004
+#define RUVD_CODEC_H264_PERF 0x00000007
+#define RUVD_CODEC_H265 0x00000010
/* UVD decode target buffer tiling mode */
#define RUVD_TILE_LINEAR 0x00000000
@@ -171,6 +175,66 @@ struct ruvd_h264 {
} mvc;
};
+struct ruvd_h265 {
+ uint32_t sps_info_flags;
+ uint32_t pps_info_flags;
+
+ uint8_t chroma_format;
+ uint8_t bit_depth_luma_minus8;
+ uint8_t bit_depth_chroma_minus8;
+ uint8_t log2_max_pic_order_cnt_lsb_minus4;
+
+ uint8_t sps_max_dec_pic_buffering_minus1;
+ uint8_t log2_min_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_luma_coding_block_size;
+ uint8_t log2_min_transform_block_size_minus2;
+
+ uint8_t log2_diff_max_min_transform_block_size;
+ uint8_t max_transform_hierarchy_depth_inter;
+ uint8_t max_transform_hierarchy_depth_intra;
+ uint8_t pcm_sample_bit_depth_luma_minus1;
+
+ uint8_t pcm_sample_bit_depth_chroma_minus1;
+ uint8_t log2_min_pcm_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_pcm_luma_coding_block_size;
+ uint8_t num_extra_slice_header_bits;
+
+ uint8_t num_short_term_ref_pic_sets;
+ uint8_t num_long_term_ref_pic_sps;
+ uint8_t num_ref_idx_l0_default_active_minus1;
+ uint8_t num_ref_idx_l1_default_active_minus1;
+
+ int8_t pps_cb_qp_offset;
+ int8_t pps_cr_qp_offset;
+ int8_t pps_beta_offset_div2;
+ int8_t pps_tc_offset_div2;
+
+ uint8_t diff_cu_qp_delta_depth;
+ uint8_t num_tile_columns_minus1;
+ uint8_t num_tile_rows_minus1;
+ uint8_t log2_parallel_merge_level_minus2;
+
+ uint16_t column_width_minus1[19];
+ uint16_t row_height_minus1[21];
+
+ int8_t init_qp_minus26;
+ uint8_t num_delta_pocs_ref_rps_idx;
+ uint8_t curr_idx;
+ uint8_t reserved1;
+ int32_t curr_poc;
+ uint8_t ref_pic_list[16];
+ int32_t poc_list[16];
+ uint8_t ref_pic_set_st_curr_before[8];
+ uint8_t ref_pic_set_st_curr_after[8];
+ uint8_t ref_pic_set_lt_curr[8];
+
+ uint8_t ucScalingListDCCoefSizeID2[6];
+ uint8_t ucScalingListDCCoefSizeID3[2];
+
+ uint8_t highestTid;
+ uint8_t isNonRef;
+};
+
struct ruvd_vc1 {
uint32_t profile;
uint32_t level;
@@ -327,6 +391,7 @@ struct ruvd_msg {
union {
struct ruvd_h264 h264;
+ struct ruvd_h265 h265;
struct ruvd_vc1 vc1;
struct ruvd_mpeg2 mpeg2;
struct ruvd_mpeg4 mpeg4;
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index a6567379fe3..7eab974a3df 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -47,6 +47,8 @@
#define FW_40_2_2 ((40 << 24) | (2 << 16) | (2 << 8))
#define FW_50_0_1 ((50 << 24) | (0 << 16) | (1 << 8))
#define FW_50_1_2 ((50 << 24) | (1 << 16) | (2 << 8))
+#define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
+#define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
/**
* flush commands to the hardware
@@ -54,6 +56,8 @@
static void flush(struct rvce_encoder *enc)
{
enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL, 0);
+ enc->task_info_idx = 0;
+ enc->bs_idx = 0;
}
#if 0
@@ -214,7 +218,7 @@ struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
* Calculate the offsets into the CPB
*/
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
- unsigned *luma_offset, unsigned *chroma_offset)
+ signed *luma_offset, signed *chroma_offset)
{
unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128);
unsigned vpitch = align(enc->luma->npix_y, 16);
@@ -278,24 +282,19 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder,
enc->fb = &fb;
enc->session(enc);
enc->create(enc);
- enc->rate_control(enc);
- need_rate_control = false;
- enc->config_extension(enc);
- enc->motion_estimation(enc);
- enc->rdo(enc);
- if (enc->use_vui)
- enc->vui(enc);
- enc->pic_control(enc);
+ enc->config(enc);
enc->feedback(enc);
flush(enc);
//dump_feedback(enc, &fb);
rvid_destroy_buffer(&fb);
+ need_rate_control = false;
}
- enc->session(enc);
-
- if (need_rate_control)
- enc->rate_control(enc);
+ if (need_rate_control) {
+ enc->session(enc);
+ enc->config(enc);
+ flush(enc);
+ }
}
static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
@@ -312,6 +311,8 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
RVID_ERR("Can't create feedback buffer.\n");
return;
}
+ if (!enc->cs->cdw)
+ enc->session(enc);
enc->encode(enc);
enc->feedback(enc);
}
@@ -324,7 +325,8 @@ static void rvce_end_frame(struct pipe_video_codec *encoder,
struct rvce_cpb_slot *slot = LIST_ENTRY(
struct rvce_cpb_slot, enc->cpb_slots.prev, list);
- flush(enc);
+ if (!enc->dual_inst || enc->bs_idx > 1)
+ flush(enc);
/* update the CPB backtrack with the just encoded frame */
slot->picture_type = enc->pic.picture_type;
@@ -363,6 +365,9 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder,
*/
static void rvce_flush(struct pipe_video_codec *encoder)
{
+ struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+
+ flush(enc);
}
static void rvce_cs_flush(void *ctx, unsigned flags,
@@ -377,6 +382,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
rvce_get_buffer get_buffer)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
+ struct r600_common_context *rctx = (struct r600_common_context*)context;
struct rvce_encoder *enc;
struct pipe_video_buffer *tmp_buf, templat = {};
struct radeon_surf *tmp_surf;
@@ -395,8 +401,17 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
if (!enc)
return NULL;
+ if (rscreen->info.drm_major == 3)
+ enc->use_vm = true;
if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
enc->use_vui = true;
+ if (rscreen->info.family >= CHIP_TONGA)
+ enc->dual_pipe = true;
+ /* TODO enable B frame with dual instance */
+ if ((rscreen->info.family >= CHIP_TONGA) &&
+ (templ->max_references == 1) &&
+ (rscreen->info.vce_harvest_config == 0))
+ enc->dual_inst = true;
enc->base = *templ;
enc->base.context = context;
@@ -411,7 +426,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
enc->screen = context->screen;
enc->ws = ws;
- enc->cs = ws->cs_create(ws, RING_VCE, rvce_cs_flush, enc, NULL);
+ enc->cs = ws->cs_create(rctx->ctx, RING_VCE, rvce_cs_flush, enc, NULL);
if (!enc->cs) {
RVID_ERR("Can't get command submission context.\n");
goto error;
@@ -436,6 +451,9 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
cpb_size = cpb_size * align(tmp_surf->npix_y, 16);
cpb_size = cpb_size * 3 / 2;
cpb_size = cpb_size * enc->cpb_num;
+ if (enc->dual_pipe)
+ cpb_size += RVCE_MAX_AUX_BUFFER_NUM *
+ RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
tmp_buf->destroy(tmp_buf);
if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
RVID_ERR("Can't create CPB buffer.\n");
@@ -455,6 +473,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
case FW_50_0_1:
case FW_50_1_2:
+ case FW_50_10_2:
+ case FW_50_17_3:
radeon_vce_50_init(enc);
break;
@@ -482,5 +502,29 @@ bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
{
return rscreen->info.vce_fw_version == FW_40_2_2 ||
rscreen->info.vce_fw_version == FW_50_0_1 ||
- rscreen->info.vce_fw_version == FW_50_1_2;
+ rscreen->info.vce_fw_version == FW_50_1_2 ||
+ rscreen->info.vce_fw_version == FW_50_10_2 ||
+ rscreen->info.vce_fw_version == FW_50_17_3;
+}
+
+/**
+ * Add the buffer as relocation to the current command submission
+ */
+void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset)
+{
+ int reloc_idx;
+
+ reloc_idx = enc->ws->cs_add_reloc(enc->cs, buf, usage, domain, RADEON_PRIO_MIN);
+ if (enc->use_vm) {
+ uint64_t addr;
+ addr = enc->ws->buffer_get_virtual_address(buf);
+ addr = addr + offset;
+ RVCE_CS(addr >> 32);
+ RVCE_CS(addr);
+ } else {
+ RVCE_CS(reloc_idx * 4);
+ RVCE_CS(offset);
+ }
}
diff --git a/src/gallium/drivers/radeon/radeon_vce.h b/src/gallium/drivers/radeon/radeon_vce.h
index 8319ef48cd5..624bda479f8 100644
--- a/src/gallium/drivers/radeon/radeon_vce.h
+++ b/src/gallium/drivers/radeon/radeon_vce.h
@@ -36,15 +36,16 @@
#include "util/list.h"
-#define RVCE_RELOC(buf, usage, domain) (enc->ws->cs_add_reloc(enc->cs, (buf), (usage), domain, RADEON_PRIO_MIN))
-
#define RVCE_CS(value) (enc->cs->buf[enc->cs->cdw++] = (value))
#define RVCE_BEGIN(cmd) { uint32_t *begin = &enc->cs->buf[enc->cs->cdw++]; RVCE_CS(cmd)
-#define RVCE_READ(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READ, domain) * 4)
-#define RVCE_WRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_WRITE, domain) * 4)
-#define RVCE_READWRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READWRITE, domain) * 4)
+#define RVCE_READ(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
+#define RVCE_WRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
+#define RVCE_READWRITE(buf, domain, off) rvce_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
#define RVCE_END() *begin = (&enc->cs->buf[enc->cs->cdw] - begin) * 4; }
+#define RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE (4096 * 16 * 2.5)
+#define RVCE_MAX_AUX_BUFFER_NUM 4
+
struct r600_common_screen;
/* driver dependent callback */
@@ -76,8 +77,12 @@ struct rvce_encoder {
void (*motion_estimation)(struct rvce_encoder *enc);
void (*rdo)(struct rvce_encoder *enc);
void (*vui)(struct rvce_encoder *enc);
+ void (*config)(struct rvce_encoder *enc);
void (*encode)(struct rvce_encoder *enc);
void (*destroy)(struct rvce_encoder *enc);
+ void (*task_info)(struct rvce_encoder *enc, uint32_t op,
+ uint32_t dep, uint32_t fb_idx,
+ uint32_t ring_idx);
unsigned stream_handle;
@@ -101,7 +106,14 @@ struct rvce_encoder {
struct rvid_buffer *fb;
struct rvid_buffer cpb;
struct pipe_h264_enc_picture_desc pic;
- bool use_vui;
+
+ unsigned task_info_idx;
+ unsigned bs_idx;
+
+ bool use_vm;
+ bool use_vui;
+ bool dual_pipe;
+ bool dual_inst;
};
/* CPB handling functions */
@@ -109,7 +121,7 @@ struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc);
struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc);
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
- unsigned *luma_offset, unsigned *chroma_offset);
+ signed *luma_offset, signed *chroma_offset);
struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
const struct pipe_video_codec *templat,
@@ -118,6 +130,10 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
+void rvce_add_buffer(struct rvce_encoder *enc, struct radeon_winsys_cs_handle *buf,
+ enum radeon_bo_usage usage, enum radeon_bo_domain domain,
+ signed offset);
+
/* init vce fw 40.2.2 specific callbacks */
void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
diff --git a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
index 51b17b5f6a8..e64fbc7afb0 100644
--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -53,30 +53,38 @@ static void session(struct rvce_encoder *enc)
RVCE_END();
}
-static void task_info(struct rvce_encoder *enc, uint32_t taskOperation)
+static void task_info(struct rvce_encoder *enc, uint32_t op,
+ uint32_t dep, uint32_t fb_idx, uint32_t ring_idx)
{
RVCE_BEGIN(0x00000002); // task info
+ if (op == 0x3) {
+ if (enc->task_info_idx) {
+ uint32_t offs = enc->cs->cdw - enc->task_info_idx + 3;
+ // Update offsetOfNextTaskInfo
+ enc->cs->buf[enc->task_info_idx] = offs;
+ }
+ enc->task_info_idx = enc->cs->cdw;
+ }
RVCE_CS(0xffffffff); // offsetOfNextTaskInfo
- RVCE_CS(taskOperation); // taskOperation
- RVCE_CS(0x00000000); // referencePictureDependency
+ RVCE_CS(op); // taskOperation
+ RVCE_CS(dep); // referencePictureDependency
RVCE_CS(0x00000000); // collocateFlagDependency
- RVCE_CS(0x00000000); // feedbackIndex
- RVCE_CS(0x00000000); // videoBitstreamRingIndex
+ RVCE_CS(fb_idx); // feedbackIndex
+ RVCE_CS(ring_idx); // videoBitstreamRingIndex
RVCE_END();
}
static void feedback(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x05000005); // feedback buffer
- RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains); // feedbackRingAddressHi
- RVCE_CS(0x00000000); // feedbackRingAddressLo
+ RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains, 0x0); // feedbackRingAddressHi/Lo
RVCE_CS(0x00000001); // feedbackRingSize
RVCE_END();
}
static void create(struct rvce_encoder *enc)
{
- task_info(enc, 0x00000000);
+ enc->task_info(enc, 0x00000000, 0, 0, 0);
RVCE_BEGIN(0x01000001); // create cmd
RVCE_CS(0x00000000); // encUseCircularBuffer
@@ -272,21 +280,31 @@ static void vui(struct rvce_encoder *enc)
RVCE_END();
}
+static void config(struct rvce_encoder *enc)
+{
+ enc->task_info(enc, 0x00000002, 0, 0xffffffff, 0);
+ enc->rate_control(enc);
+ enc->config_extension(enc);
+ enc->motion_estimation(enc);
+ enc->rdo(enc);
+ if (enc->use_vui)
+ enc->vui(enc);
+ enc->pic_control(enc);
+}
+
static void encode(struct rvce_encoder *enc)
{
+ signed luma_offset, chroma_offset;
int i;
- unsigned luma_offset, chroma_offset;
- task_info(enc, 0x00000003);
+ enc->task_info(enc, 0x00000003, 0, 0, 0);
RVCE_BEGIN(0x05000001); // context buffer
- RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi
- RVCE_CS(0x00000000); // encodeContextAddressLo
+ RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0x0); // encodeContextAddressHi/Lo
RVCE_END();
RVCE_BEGIN(0x05000004); // video bitstream buffer
- RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi
- RVCE_CS(0x00000000); // videoBitstreamRingAddressLo
+ RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, 0x0); // videoBitstreamRingAddressHi/Lo
RVCE_CS(enc->bs_size); // videoBitstreamRingSize
RVCE_END();
@@ -298,10 +316,10 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // insertAUD
RVCE_CS(0x00000000); // endOfSequence
RVCE_CS(0x00000000); // endOfStream
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi
- RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi
- RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
@@ -404,7 +422,7 @@ static void encode(struct rvce_encoder *enc)
static void destroy(struct rvce_encoder *enc)
{
- task_info(enc, 0x00000001);
+ enc->task_info(enc, 0x00000001, 0, 0, 0);
RVCE_BEGIN(0x02000001); // destroy
RVCE_END();
@@ -413,6 +431,7 @@ static void destroy(struct rvce_encoder *enc)
void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
{
enc->session = session;
+ enc->task_info = task_info;
enc->create = create;
enc->feedback = feedback;
enc->rate_control = rate_control;
@@ -421,6 +440,7 @@ void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
enc->motion_estimation = motion_estimation;
enc->rdo = rdo;
enc->vui = vui;
+ enc->config = config;
enc->encode = encode;
enc->destroy = destroy;
}
diff --git a/src/gallium/drivers/radeon/radeon_vce_50.c b/src/gallium/drivers/radeon/radeon_vce_50.c
index 84a2bfb117e..afdab18c0d3 100644
--- a/src/gallium/drivers/radeon/radeon_vce_50.c
+++ b/src/gallium/drivers/radeon/radeon_vce_50.c
@@ -44,18 +44,6 @@
#include "radeon_video.h"
#include "radeon_vce.h"
-static void task_info(struct rvce_encoder *enc, uint32_t taskOperation)
-{
- RVCE_BEGIN(0x00000002); // task info
- RVCE_CS(0xffffffff); // offsetOfNextTaskInfo
- RVCE_CS(taskOperation); // taskOperation
- RVCE_CS(0x00000000); // referencePictureDependency
- RVCE_CS(0x00000000); // collocateFlagDependency
- RVCE_CS(0x00000000); // feedbackIndex
- RVCE_CS(0x00000000); // videoBitstreamRingIndex
- RVCE_END();
-}
-
static void rate_control(struct rvce_encoder *enc)
{
RVCE_BEGIN(0x04000005); // rate control
@@ -90,22 +78,46 @@ static void rate_control(struct rvce_encoder *enc)
static void encode(struct rvce_encoder *enc)
{
+ signed luma_offset, chroma_offset, bs_offset;
+ unsigned dep, bs_idx = enc->bs_idx++;
int i;
- unsigned luma_offset, chroma_offset;
- task_info(enc, 0x00000003);
+ if (enc->dual_inst) {
+ if (bs_idx == 0)
+ dep = 1;
+ else if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+ dep = 0;
+ else
+ dep = 2;
+ } else
+ dep = 0;
+
+ enc->task_info(enc, 0x00000003, dep, 0, bs_idx);
RVCE_BEGIN(0x05000001); // context buffer
- RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi
- RVCE_CS(0x00000000); // encodeContextAddressLo
+ RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains, 0); // encodeContextAddressHi/Lo
RVCE_END();
+ bs_offset = -(signed)(bs_idx * enc->bs_size);
+
RVCE_BEGIN(0x05000004); // video bitstream buffer
- RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi
- RVCE_CS(0x00000000); // videoBitstreamRingAddressLo
+ RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT, bs_offset); // videoBitstreamRingAddressHi/Lo
RVCE_CS(enc->bs_size); // videoBitstreamRingSize
RVCE_END();
+ if (enc->dual_pipe) {
+ unsigned aux_offset = enc->cpb.res->buf->size -
+ RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
+ RVCE_BEGIN(0x05000002); // auxiliary buffer
+ for (i = 0; i < 8; ++i) {
+ RVCE_CS(aux_offset);
+ aux_offset += RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE;
+ }
+ for (i = 0; i < 8; ++i)
+ RVCE_CS(RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE);
+ RVCE_END();
+ }
+
RVCE_BEGIN(0x03000001); // encode
RVCE_CS(enc->pic.frame_num ? 0x0 : 0x11); // insertHeaders
RVCE_CS(0x00000000); // pictureStructure
@@ -114,14 +126,17 @@ static void encode(struct rvce_encoder *enc)
RVCE_CS(0x00000000); // insertAUD
RVCE_CS(0x00000000); // endOfSequence
RVCE_CS(0x00000000); // endOfStream
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi
- RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo
- RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi
- RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->luma->level[0].offset); // inputPictureLumaAddressHi/Lo
+ RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM,
+ enc->chroma->level[0].offset); // inputPictureChromaAddressHi/Lo
RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
- RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ if (enc->dual_pipe)
+ RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
+ else
+ RVCE_CS(0x00010000); // encInputPic(Addr|Array)Mode,encDisable(TwoPipeMode|MBOffloading)
RVCE_CS(0x00000000); // encInputPicTileConfig
RVCE_CS(enc->pic.picture_type); // encPicType
RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index 826e0763c08..3a1834b948f 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -214,9 +214,9 @@ int rvid_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
- return 2048;
+ return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return 1152;
+ return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
@@ -225,6 +225,8 @@ int rvid_get_video_param(struct pipe_screen *screen,
return false;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
+ case PIPE_VIDEO_CAP_STACKED_FRAMES:
+ return (rscreen->family < CHIP_TONGA) ? 1 : 2;
default:
return 0;
}
@@ -262,20 +264,28 @@ int rvid_get_video_param(struct pipe_screen *screen,
/* FIXME: VC-1 simple/main profile is broken */
return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED &&
entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ /* Carrizo only supports HEVC Main */
+ return rscreen->family >= CHIP_CARRIZO &&
+ profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
default:
return false;
}
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
- return 2048;
+ return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return 1152;
+ return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+ if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
+ return false; //The hardware doesn't support interlaced HEVC.
return true;
case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+ if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
+ return false; //The hardware doesn't support interlaced HEVC.
return true;
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
@@ -300,6 +310,8 @@ int rvid_get_video_param(struct pipe_screen *screen,
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
return 41;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+ return 186;
default:
return 0;
}
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 3bfbb6d75b7..7ab6e56e099 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -42,12 +42,9 @@
#include "pipebuffer/pb_buffer.h"
-#define RADEON_MAX_CMDBUF_DWORDS (16 * 1024)
-
#define RADEON_FLUSH_ASYNC (1 << 0)
#define RADEON_FLUSH_KEEP_TILING_FLAGS (1 << 1) /* needs DRM 2.12.0 */
-#define RADEON_FLUSH_COMPUTE (1 << 2)
-#define RADEON_FLUSH_END_OF_FRAME (1 << 3)
+#define RADEON_FLUSH_END_OF_FRAME (1 << 2)
/* Tiling flags. */
enum radeon_bo_layout {
@@ -136,6 +133,10 @@ enum radeon_family {
CHIP_KABINI,
CHIP_HAWAII,
CHIP_MULLINS,
+ CHIP_TONGA,
+ CHIP_ICELAND,
+ CHIP_CARRIZO,
+ CHIP_FIJI,
CHIP_LAST,
};
@@ -150,10 +151,12 @@ enum chip_class {
CAYMAN,
SI,
CIK,
+ VI,
};
enum ring_type {
RING_GFX = 0,
+ RING_COMPUTE,
RING_DMA,
RING_UVD,
RING_VCE,
@@ -169,9 +172,10 @@ enum radeon_value_id {
RADEON_NUM_BYTES_MOVED,
RADEON_VRAM_USAGE,
RADEON_GTT_USAGE,
- RADEON_GPU_TEMPERATURE,
+ RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */
RADEON_CURRENT_SCLK,
- RADEON_CURRENT_MCLK
+ RADEON_CURRENT_MCLK,
+ RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
};
enum radeon_bo_priority {
@@ -192,9 +196,11 @@ enum radeon_bo_priority {
struct winsys_handle;
struct radeon_winsys_cs_handle;
+struct radeon_winsys_ctx;
struct radeon_winsys_cs {
unsigned cdw; /* Number of used dwords. */
+ unsigned max_dw; /* Maximum number of dwords. */
uint32_t *buf; /* The command buffer. */
enum ring_type ring_type;
};
@@ -238,6 +244,7 @@ struct radeon_info {
boolean cik_macrotile_mode_array_valid;
uint32_t cik_macrotile_mode_array[16];
+ uint32_t vce_harvest_config;
};
enum radeon_feature_id {
@@ -317,6 +324,8 @@ struct radeon_surf {
struct radeon_surf_level stencil_level[RADEON_SURF_MAX_LEVEL];
uint32_t tiling_index[RADEON_SURF_MAX_LEVEL];
uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
+ uint32_t pipe_config;
+ uint32_t num_banks;
};
struct radeon_winsys {
@@ -398,24 +407,15 @@ struct radeon_winsys {
void (*buffer_unmap)(struct radeon_winsys_cs_handle *buf);
/**
- * Return TRUE if a buffer object is being used by the GPU.
+ * Wait for the buffer and return true if the buffer is not used
+ * by the device.
*
- * \param buf A winsys buffer object.
- * \param usage Only check whether the buffer is busy for the given usage.
+ * The timeout of 0 will only return the status.
+ * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the buffer
+ * is idle.
*/
- boolean (*buffer_is_busy)(struct pb_buffer *buf,
- enum radeon_bo_usage usage);
-
- /**
- * Wait for a buffer object until it is not used by a GPU. This is
- * equivalent to a fence placed after the last command using the buffer,
- * and synchronizing to the fence.
- *
- * \param buf A winsys buffer object to wait for.
- * \param usage Only wait until the buffer is idle for the given usage,
- * but may still be busy for some other usage.
- */
- void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage);
+ bool (*buffer_wait)(struct pb_buffer *buf, uint64_t timeout,
+ enum radeon_bo_usage usage);
/**
* Return tiling flags describing a memory layout of a buffer object.
@@ -450,10 +450,11 @@ struct radeon_winsys {
struct radeon_winsys_cs *rcs,
enum radeon_bo_layout microtile,
enum radeon_bo_layout macrotile,
+ unsigned pipe_config,
unsigned bankw, unsigned bankh,
unsigned tile_split,
unsigned stencil_tile_split,
- unsigned mtilea,
+ unsigned mtilea, unsigned num_banks,
unsigned stride,
bool scanout);
@@ -514,16 +515,32 @@ struct radeon_winsys {
* commands independently of other contexts.
*************************************************************************/
+ /**
+ * Create a command submission context.
+ * Various command streams can be submitted to the same context.
+ */
+ struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
+
+ /**
+ * Destroy a context.
+ */
+ void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
+
+ /**
+ * Query a GPU reset status.
+ */
+ enum pipe_reset_status (*ctx_query_reset_status)(struct radeon_winsys_ctx *ctx);
+
/**
* Create a command stream.
*
- * \param ws The winsys this function is called from.
+ * \param ctx The submission context
* \param ring_type The ring type (GFX, DMA, UVD)
* \param flush Flush callback function associated with the command stream.
* \param user User pointer that will be passed to the flush callback.
* \param trace_buf Trace buffer when tracing is enabled
*/
- struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
+ struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys_ctx *ctx,
enum ring_type ring_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
@@ -668,12 +685,12 @@ struct radeon_winsys {
};
-static INLINE void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
+static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
{
cs->buf[cs->cdw++] = value;
}
-static INLINE void radeon_emit_array(struct radeon_winsys_cs *cs,
+static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
const uint32_t *values, unsigned count)
{
memcpy(cs->buf+cs->cdw, values, count * 4);
diff --git a/src/gallium/drivers/radeonsi/Automake.inc b/src/gallium/drivers/radeonsi/Automake.inc
index 8686fffd71c..5a9dcfd9fd6 100644
--- a/src/gallium/drivers/radeonsi/Automake.inc
+++ b/src/gallium/drivers/radeonsi/Automake.inc
@@ -5,10 +5,12 @@ TARGET_CPPFLAGS += -DGALLIUM_RADEONSI
TARGET_LIB_DEPS += \
$(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \
$(RADEON_LIBS) \
- $(LIBDRM_LIBS)
+ $(LIBDRM_LIBS) \
+ $(AMDGPU_LIBS)
TARGET_RADEON_WINSYS = \
- $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
+ $(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
+ $(top_builddir)/src/gallium/winsys/amdgpu/drm/libamdgpuwinsys.la
TARGET_RADEON_COMMON = \
$(top_builddir)/src/gallium/drivers/radeon/libradeon.la
diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
index 2876c0ae735..a0b1414f4bb 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -3,6 +3,7 @@ C_SOURCES := \
si_blit.c \
si_commands.c \
si_compute.c \
+ si_cp_dma.c \
si_descriptors.c \
sid.h \
si_dma.c \
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 86111cb86e8..47b586f171e 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -27,7 +27,7 @@
#include "sid.h"
#include "si_pipe.h"
-#include "../radeon/r600_cs.h"
+#include "radeon/r600_cs.h"
#include "util/u_format.h"
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 1f2c4082dbc..48972bd170c 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -57,17 +57,19 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader);
util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader);
+ util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader);
+ util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader);
util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader);
util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
if (sctx->queued.named.sample_mask) {
util_blitter_save_sample_mask(sctx->blitter,
sctx->queued.named.sample_mask->sample_mask);
}
- if (sctx->queued.named.viewport) {
- util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport->viewport);
+ if (sctx->queued.named.viewport[0]) {
+ util_blitter_save_viewport(sctx->blitter, &sctx->queued.named.viewport[0]->viewport);
}
- if (sctx->queued.named.scissor) {
- util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor->scissor);
+ if (sctx->queued.named.scissor[0]) {
+ util_blitter_save_scissor(sctx->blitter, &sctx->queued.named.scissor[0]->scissor);
}
util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
@@ -146,7 +148,7 @@ static void si_blit_decompress_depth(struct pipe_context *ctx,
struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
sctx->dbcb_copy_sample = sample;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
surf_tmpl.format = texture->resource.b.b.format;
surf_tmpl.u.tex.level = level;
@@ -180,7 +182,7 @@ static void si_blit_decompress_depth(struct pipe_context *ctx,
sctx->dbcb_depth_copy_enabled = false;
sctx->dbcb_stencil_copy_enabled = false;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
static void si_blit_decompress_depth_in_place(struct si_context *sctx,
@@ -192,7 +194,7 @@ static void si_blit_decompress_depth_in_place(struct si_context *sctx,
unsigned layer, max_layer, checked_last_layer, level;
sctx->db_inplace_flush_enabled = true;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
surf_tmpl.format = texture->resource.b.b.format;
@@ -230,7 +232,7 @@ static void si_blit_decompress_depth_in_place(struct si_context *sctx,
}
sctx->db_inplace_flush_enabled = false;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
void si_flush_depth_textures(struct si_context *sctx,
@@ -340,6 +342,8 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
if (buffers & PIPE_CLEAR_COLOR) {
evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom,
&buffers, color);
+ if (!buffers)
+ return; /* all buffers have been fast cleared */
}
if (buffers & PIPE_CLEAR_COLOR) {
@@ -374,9 +378,9 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
}
zstex->depth_clear_value = depth;
- sctx->framebuffer.atom.dirty = true; /* updates DB_DEPTH_CLEAR */
+ si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
sctx->db_depth_clear = true;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
si_blitter_begin(ctx, SI_CLEAR);
@@ -389,7 +393,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
sctx->db_depth_clear = false;
sctx->db_depth_disable_expclear = false;
zstex->depth_cleared = true;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
}
@@ -455,89 +459,6 @@ struct texture_orig_info {
unsigned npix0_y;
};
-static void si_compressed_to_blittable(struct pipe_resource *tex,
- unsigned level,
- struct texture_orig_info *orig)
-{
- struct r600_texture *rtex = (struct r600_texture*)tex;
- unsigned pixsize = util_format_get_blocksize(rtex->resource.b.b.format);
- int new_format;
- int new_height, new_width;
-
- orig->format = tex->format;
- orig->width0 = tex->width0;
- orig->height0 = tex->height0;
- orig->npix0_x = rtex->surface.level[0].npix_x;
- orig->npix0_y = rtex->surface.level[0].npix_y;
- orig->npix_x = rtex->surface.level[level].npix_x;
- orig->npix_y = rtex->surface.level[level].npix_y;
-
- if (pixsize == 8)
- new_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
- else
- new_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
-
- new_width = util_format_get_nblocksx(tex->format, orig->width0);
- new_height = util_format_get_nblocksy(tex->format, orig->height0);
-
- tex->width0 = new_width;
- tex->height0 = new_height;
- tex->format = new_format;
- rtex->surface.level[0].npix_x = util_format_get_nblocksx(orig->format, orig->npix0_x);
- rtex->surface.level[0].npix_y = util_format_get_nblocksy(orig->format, orig->npix0_y);
- rtex->surface.level[level].npix_x = util_format_get_nblocksx(orig->format, orig->npix_x);
- rtex->surface.level[level].npix_y = util_format_get_nblocksy(orig->format, orig->npix_y);
-
- /* By dividing the dimensions by 4, we effectively decrement
- * last_level by 2, therefore the last 2 mipmap levels disappear and
- * aren't blittable. Note that the last 3 mipmap levels (4x4, 2x2,
- * 1x1) have equal slice sizes, which is an important assumption
- * for this to work.
- *
- * In order to make the last 2 mipmap levels blittable, we have to
- * add the slice size of the last mipmap level to the texture
- * address, so that even though the hw thinks it reads last_level-2,
- * it will actually read last_level-1, and if we add the slice size*2,
- * it will read last_level. That's how this workaround works.
- */
- if (level > rtex->resource.b.b.last_level-2)
- rtex->mipmap_shift = level - (rtex->resource.b.b.last_level-2);
-}
-
-static void si_change_format(struct pipe_resource *tex,
- unsigned level,
- struct texture_orig_info *orig,
- enum pipe_format format)
-{
- struct r600_texture *rtex = (struct r600_texture*)tex;
-
- orig->format = tex->format;
- orig->width0 = tex->width0;
- orig->height0 = tex->height0;
- orig->npix0_x = rtex->surface.level[0].npix_x;
- orig->npix0_y = rtex->surface.level[0].npix_y;
- orig->npix_x = rtex->surface.level[level].npix_x;
- orig->npix_y = rtex->surface.level[level].npix_y;
-
- tex->format = format;
-}
-
-static void si_reset_blittable_to_orig(struct pipe_resource *tex,
- unsigned level,
- struct texture_orig_info *orig)
-{
- struct r600_texture *rtex = (struct r600_texture*)tex;
-
- tex->format = orig->format;
- tex->width0 = orig->width0;
- tex->height0 = orig->height0;
- rtex->surface.level[0].npix_x = orig->npix0_x;
- rtex->surface.level[0].npix_y = orig->npix0_y;
- rtex->surface.level[level].npix_x = orig->npix_x;
- rtex->surface.level[level].npix_y = orig->npix_y;
- rtex->mipmap_shift = 0;
-}
-
void si_resource_copy_region(struct pipe_context *ctx,
struct pipe_resource *dst,
unsigned dst_level,
@@ -547,114 +468,116 @@ void si_resource_copy_region(struct pipe_context *ctx,
const struct pipe_box *src_box)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct r600_texture *rdst = (struct r600_texture*)dst;
struct pipe_surface *dst_view, dst_templ;
struct pipe_sampler_view src_templ, *src_view;
- struct texture_orig_info orig_info[2];
+ unsigned dst_width, dst_height, src_width0, src_height0;
+ unsigned src_force_level = 0;
struct pipe_box sbox, dstbox;
- boolean restore_orig[2];
- /* Fallback for buffers. */
+ /* Handle buffers first. */
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
return;
}
- memset(orig_info, 0, sizeof(orig_info));
+ assert(u_max_sample(dst) == u_max_sample(src));
/* The driver doesn't decompress resources automatically while
* u_blitter is rendering. */
si_decompress_subresource(ctx, src, src_level,
src_box->z, src_box->z + src_box->depth - 1);
- restore_orig[0] = restore_orig[1] = FALSE;
+ dst_width = u_minify(dst->width0, dst_level);
+ dst_height = u_minify(dst->height0, dst_level);
+ src_width0 = src->width0;
+ src_height0 = src->height0;
+
+ util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
+ util_blitter_default_src_texture(&src_templ, src, src_level);
if (util_format_is_compressed(src->format) &&
util_format_is_compressed(dst->format)) {
- si_compressed_to_blittable(src, src_level, &orig_info[0]);
- restore_orig[0] = TRUE;
- sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x);
- sbox.y = util_format_get_nblocksy(orig_info[0].format, src_box->y);
+ unsigned blocksize = util_format_get_blocksize(src->format);
+
+ if (blocksize == 8)
+ src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
+ else
+ src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
+ dst_templ.format = src_templ.format;
+
+ dst_width = util_format_get_nblocksx(dst->format, dst_width);
+ dst_height = util_format_get_nblocksy(dst->format, dst_height);
+ src_width0 = util_format_get_nblocksx(src->format, src_width0);
+ src_height0 = util_format_get_nblocksy(src->format, src_height0);
+
+ dstx = util_format_get_nblocksx(dst->format, dstx);
+ dsty = util_format_get_nblocksy(dst->format, dsty);
+
+ sbox.x = util_format_get_nblocksx(src->format, src_box->x);
+ sbox.y = util_format_get_nblocksy(src->format, src_box->y);
sbox.z = src_box->z;
- sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width);
- sbox.height = util_format_get_nblocksy(orig_info[0].format, src_box->height);
+ sbox.width = util_format_get_nblocksx(src->format, src_box->width);
+ sbox.height = util_format_get_nblocksy(src->format, src_box->height);
sbox.depth = src_box->depth;
src_box = &sbox;
- si_compressed_to_blittable(dst, dst_level, &orig_info[1]);
- restore_orig[1] = TRUE;
- /* translate the dst box as well */
- dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
- dsty = util_format_get_nblocksy(orig_info[1].format, dsty);
- } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) {
+ src_force_level = src_level;
+ } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src) ||
+ /* also *8_SNORM has precision issues, use UNORM instead */
+ util_format_is_snorm(src->format)) {
if (util_format_is_subsampled_422(src->format)) {
- /* XXX untested */
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8G8B8A8_UINT);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8G8B8A8_UINT);
+ src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
+ dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
+
+ dst_width = util_format_get_nblocksx(dst->format, dst_width);
+ src_width0 = util_format_get_nblocksx(src->format, src_width0);
+
+ dstx = util_format_get_nblocksx(dst->format, dstx);
sbox = *src_box;
- sbox.x = util_format_get_nblocksx(orig_info[0].format, src_box->x);
- sbox.width = util_format_get_nblocksx(orig_info[0].format, src_box->width);
+ sbox.x = util_format_get_nblocksx(src->format, src_box->x);
+ sbox.width = util_format_get_nblocksx(src->format, src_box->width);
src_box = &sbox;
- dstx = util_format_get_nblocksx(orig_info[1].format, dstx);
-
- restore_orig[0] = TRUE;
- restore_orig[1] = TRUE;
} else {
unsigned blocksize = util_format_get_blocksize(src->format);
switch (blocksize) {
case 1:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8_UNORM);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8_UNORM);
+ dst_templ.format = PIPE_FORMAT_R8_UNORM;
+ src_templ.format = PIPE_FORMAT_R8_UNORM;
break;
case 2:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8G8_UNORM);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8G8_UNORM);
+ dst_templ.format = PIPE_FORMAT_R8G8_UNORM;
+ src_templ.format = PIPE_FORMAT_R8G8_UNORM;
break;
case 4:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R8G8B8A8_UNORM);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R8G8B8A8_UNORM);
+ dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
+ src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
break;
case 8:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R16G16B16A16_UINT);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R16G16B16A16_UINT);
+ dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
+ src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
break;
case 16:
- si_change_format(src, src_level, &orig_info[0],
- PIPE_FORMAT_R32G32B32A32_UINT);
- si_change_format(dst, dst_level, &orig_info[1],
- PIPE_FORMAT_R32G32B32A32_UINT);
+ dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
+ src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
break;
default:
fprintf(stderr, "Unhandled format %s with blocksize %u\n",
util_format_short_name(src->format), blocksize);
assert(0);
}
- restore_orig[0] = TRUE;
- restore_orig[1] = TRUE;
}
}
/* Initialize the surface. */
- util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
dst_view = r600_create_surface_custom(ctx, dst, &dst_templ,
- rdst->surface.level[dst_level].npix_x,
- rdst->surface.level[dst_level].npix_y);
+ dst_width, dst_height);
/* Initialize the sampler view. */
- util_blitter_default_src_texture(&src_templ, src, src_level);
- src_view = ctx->create_sampler_view(ctx, src, &src_templ);
+ src_view = si_create_sampler_view_custom(ctx, src, &src_templ,
+ src_width0, src_height0,
+ src_force_level);
u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height),
abs(src_box->depth), &dstbox);
@@ -662,18 +585,12 @@ void si_resource_copy_region(struct pipe_context *ctx,
/* Copy. */
si_blitter_begin(ctx, SI_COPY);
util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox,
- src_view, src_box, src->width0, src->height0,
+ src_view, src_box, src_width0, src_height0,
PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL);
si_blitter_end(ctx);
pipe_surface_reference(&dst_view, NULL);
pipe_sampler_view_reference(&src_view, NULL);
-
- if (restore_orig[0])
- si_reset_blittable_to_orig(src, src_level, &orig_info[0]);
-
- if (restore_orig[1])
- si_reset_blittable_to_orig(dst, dst_level, &orig_info[1]);
}
/* For MSAA integer resolving to work, we change the format to NORM using this function. */
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 89bef2e7afd..d4fe5653687 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -137,14 +137,14 @@ static void *si_create_compute_state(
}
#else
- radeon_elf_read(code, header->num_bytes, &program->shader.binary, true);
+ radeon_elf_read(code, header->num_bytes, &program->shader.binary);
/* init_scratch_buffer patches the shader code with the scratch address,
* so we need to call it before si_shader_binary_read() which uploads
* the shader code to the GPU.
*/
init_scratch_buffer(sctx, program);
- si_shader_binary_read(sctx->screen, &program->shader, &program->shader.binary);
+ si_shader_binary_read(sctx->screen, &program->shader);
#endif
program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
@@ -309,8 +309,6 @@ static void si_launch_grid(
kernel_args[i]);
}
- sctx->b.ws->buffer_unmap(input_buffer->cs_buf);
-
kernel_args_va = input_buffer->gpu_address;
kernel_args_va += kernel_args_offset;
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
new file mode 100644
index 00000000000..f8a9da45a10
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Marek Olšák
+ */
+
+#include "si_pipe.h"
+#include "sid.h"
+#include "radeon/r600_cs.h"
+
+
+/* Set this if you want the 3D engine to wait until CP DMA is done.
+ * It should be set on the last CP DMA packet. */
+#define R600_CP_DMA_SYNC (1 << 0) /* R600+ */
+
+/* Set this if the source data was used as a destination in a previous CP DMA
+ * packet. It's for preventing a read-after-write (RAW) hazard between two
+ * CP DMA packets. */
+#define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */
+#define CIK_CP_DMA_USE_L2 (1 << 2)
+
+/* Emit a CP DMA packet to do a copy from one buffer to another.
+ * The size must fit in bits [20:0].
+ */
+static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
+ uint64_t dst_va, uint64_t src_va,
+ unsigned size, unsigned flags)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
+ uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+ uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
+ PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0;
+
+ assert(size);
+ assert((size & ((1<<21)-1)) == size);
+
+ if (sctx->b.chip_class >= CIK) {
+ radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+ radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */
+ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
+ radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ } else {
+ radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
+ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
+ radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ }
+}
+
+/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
+static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
+ uint64_t dst_va, unsigned size,
+ uint32_t clear_value, unsigned flags)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
+ uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
+ uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0;
+
+ assert(size);
+ assert((size & ((1<<21)-1)) == size);
+
+ if (sctx->b.chip_class >= CIK) {
+ radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+ radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+ radeon_emit(cs, clear_value); /* DATA [31:0] */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ } else {
+ radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
+ radeon_emit(cs, clear_value); /* DATA [31:0] */
+ radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
+ }
+}
+
+/* The max number of bytes to copy per packet. */
+#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+
+static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer)
+{
+ struct si_context *sctx = (struct si_context*)ctx;
+ unsigned flush_flags, tc_l2_flag;
+
+ if (!size)
+ return;
+
+ /* Mark the buffer range of destination as valid (initialized),
+ * so that transfer_map knows it should wait for the GPU when mapping
+ * that range. */
+ util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+ offset + size);
+
+ /* Fallback for unaligned clears. */
+ if (offset % 4 != 0 || size % 4 != 0) {
+ uint32_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
+ sctx->b.rings.gfx.cs,
+ PIPE_TRANSFER_WRITE);
+ size /= 4;
+ for (unsigned i = 0; i < size; i++)
+ *map++ = value;
+ return;
+ }
+
+ uint64_t va = r600_resource(dst)->gpu_address + offset;
+
+ /* Flush the caches where the resource is bound. */
+ if (is_framebuffer) {
+ flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ tc_l2_flag = 0;
+ } else {
+ flush_flags = SI_CONTEXT_INV_TC_L1 |
+ (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
+ SI_CONTEXT_INV_KCACHE;
+ tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ }
+
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ flush_flags;
+
+ while (size) {
+ unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+ unsigned dma_flags = tc_l2_flag;
+
+ si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
+ FALSE);
+
+ /* This must be done after need_cs_space. */
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+ (struct r600_resource*)dst, RADEON_USAGE_WRITE,
+ RADEON_PRIO_MIN);
+
+ /* Flush the caches for the first copy only.
+ * Also wait for the previous CP DMA operations. */
+ if (sctx->b.flags) {
+ si_emit_cache_flush(&sctx->b, NULL);
+ dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
+ }
+
+ /* Do the synchronization after the last copy, so that all data is written to memory. */
+ if (size == byte_count)
+ dma_flags |= R600_CP_DMA_SYNC;
+
+ /* Emit the clear packet. */
+ si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value, dma_flags);
+
+ size -= byte_count;
+ va += byte_count;
+ }
+
+ /* Flush the caches again in case the 3D engine has been prefetching
+ * the resource. */
+ sctx->b.flags |= flush_flags;
+
+ if (tc_l2_flag)
+ r600_resource(dst)->TC_L2_dirty = true;
+}
+
+void si_copy_buffer(struct si_context *sctx,
+ struct pipe_resource *dst, struct pipe_resource *src,
+ uint64_t dst_offset, uint64_t src_offset, unsigned size,
+ bool is_framebuffer)
+{
+ unsigned flush_flags, tc_l2_flag;
+
+ if (!size)
+ return;
+
+ /* Mark the buffer range of destination as valid (initialized),
+ * so that transfer_map knows it should wait for the GPU when mapping
+ * that range. */
+ util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
+ dst_offset + size);
+
+ dst_offset += r600_resource(dst)->gpu_address;
+ src_offset += r600_resource(src)->gpu_address;
+
+ /* Flush the caches where the resource is bound. */
+ if (is_framebuffer) {
+ flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ tc_l2_flag = 0;
+ } else {
+ flush_flags = SI_CONTEXT_INV_TC_L1 |
+ (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
+ SI_CONTEXT_INV_KCACHE;
+ tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ }
+
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ flush_flags;
+
+ while (size) {
+ unsigned sync_flags = tc_l2_flag;
+ unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+
+ si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
+
+ /* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
+ if (sctx->b.flags) {
+ si_emit_cache_flush(&sctx->b, NULL);
+ sync_flags |= SI_CP_DMA_RAW_WAIT;
+ }
+
+ /* Do the synchronization after the last copy, so that all data is written to memory. */
+ if (size == byte_count) {
+ sync_flags |= R600_CP_DMA_SYNC;
+ }
+
+ /* This must be done after r600_need_cs_space. */
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
+ RADEON_USAGE_READ, RADEON_PRIO_MIN);
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
+ RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+
+ si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
+
+ size -= byte_count;
+ src_offset += byte_count;
+ dst_offset += byte_count;
+ }
+
+ /* Flush the caches again in case the 3D engine has been prefetching
+ * the resource. */
+ sctx->b.flags |= flush_flags;
+
+ if (tc_l2_flag)
+ r600_resource(dst)->TC_L2_dirty = true;
+}
+
+void si_init_cp_dma_functions(struct si_context *sctx)
+{
+ sctx->b.clear_buffer = si_clear_buffer;
+}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index bbfd36dcbeb..890be071596 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -24,14 +24,23 @@
* Marek Olšák
*/
-/* Resource binding slots and sampler states (each described with 8 or 4 dwords)
- * live in memory on SI.
+/* Resource binding slots and sampler states (each described with 8 or
+ * 4 dwords) are stored in lists in memory which is accessed by shaders
+ * using scalar load instructions.
*
- * This file is responsible for managing lists of resources and sampler states
- * in memory and binding them, which means updating those structures in memory.
+ * This file is responsible for managing such lists. It keeps a copy of all
+ * descriptors in CPU memory and re-uploads a whole list if some slots have
+ * been changed.
*
- * There is also code for updating shader pointers to resources and sampler
- * states. CP DMA functions are here too.
+ * This code is also reponsible for updating shader pointers to those lists.
+ *
+ * Note that CP DMA can't be used for updating the lists, because a GPU hang
+ * could leave the list in a mid-IB state and the next IB would get wrong
+ * descriptors and the whole context would be unusable at that point.
+ * (Note: The register shadowing can't be used due to the same reason)
+ *
+ * Also, uploading descriptors to newly allocated memory doesn't require
+ * a KCACHE flush.
*/
#include "radeon/r600_cs.h"
@@ -42,7 +51,6 @@
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
-#define SI_NUM_CONTEXTS 16
/* NULL image and buffer descriptor.
*
@@ -64,284 +72,62 @@ static uint32_t null_descriptor[8] = {
* descriptor */
};
-/* Set this if you want the 3D engine to wait until CP DMA is done.
- * It should be set on the last CP DMA packet. */
-#define R600_CP_DMA_SYNC (1 << 0) /* R600+ */
-
-/* Set this if the source data was used as a destination in a previous CP DMA
- * packet. It's for preventing a read-after-write (RAW) hazard between two
- * CP DMA packets. */
-#define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */
-#define CIK_CP_DMA_USE_L2 (1 << 2)
-
-/* Emit a CP DMA packet to do a copy from one buffer to another.
- * The size must fit in bits [20:0].
- */
-static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
- uint64_t dst_va, uint64_t src_va,
- unsigned size, unsigned flags)
-{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
- uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
- uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
- PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0;
-
- assert(size);
- assert((size & ((1<<21)-1)) == size);
-
- if (sctx->b.chip_class >= CIK) {
- radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
- radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */
- radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
- radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
- } else {
- radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
- radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
- radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
- }
-}
-
-/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
-static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
- uint64_t dst_va, unsigned size,
- uint32_t clear_value, unsigned flags)
-{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0;
- uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
- uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0;
-
- assert(size);
- assert((size & ((1<<21)-1)) == size);
-
- if (sctx->b.chip_class >= CIK) {
- radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
- radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
- radeon_emit(cs, clear_value); /* DATA [31:0] */
- radeon_emit(cs, 0);
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
- } else {
- radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
- radeon_emit(cs, clear_value); /* DATA [31:0] */
- radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, size | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
- }
-}
-
-static void si_init_descriptors(struct si_context *sctx,
- struct si_descriptors *desc,
- unsigned shader_userdata_reg,
+static void si_init_descriptors(struct si_descriptors *desc,
+ unsigned shader_userdata_index,
unsigned element_dw_size,
- unsigned num_elements,
- void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
+ unsigned num_elements)
{
- assert(num_elements <= sizeof(desc->enabled_mask)*8);
- assert(num_elements <= sizeof(desc->dirty_mask)*8);
+ int i;
- desc->atom.emit = (void*)emit_func;
- desc->shader_userdata_reg = shader_userdata_reg;
+ assert(num_elements <= sizeof(desc->enabled_mask)*8);
+
+ desc->list = CALLOC(num_elements, element_dw_size * 4);
desc->element_dw_size = element_dw_size;
desc->num_elements = num_elements;
- desc->context_size = num_elements * element_dw_size * 4;
+ desc->list_dirty = true; /* upload the list before the next draw */
+ desc->shader_userdata_offset = shader_userdata_index * 4;
- desc->buffer = (struct r600_resource*)
- pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
- PIPE_USAGE_DEFAULT,
- SI_NUM_CONTEXTS * desc->context_size);
-
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
- RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
-
- /* We don't check for CS space here, because this should be called
- * only once at context initialization. */
- si_emit_cp_dma_clear_buffer(sctx, desc->buffer->gpu_address,
- desc->buffer->b.b.width0, 0,
- R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2);
+ /* Initialize the array to NULL descriptors if the element size is 8. */
+ if (element_dw_size == 8)
+ for (i = 0; i < num_elements; i++)
+ memcpy(desc->list + i*element_dw_size, null_descriptor,
+ sizeof(null_descriptor));
}
static void si_release_descriptors(struct si_descriptors *desc)
{
pipe_resource_reference((struct pipe_resource**)&desc->buffer, NULL);
+ FREE(desc->list);
}
-static void si_update_descriptors(struct si_context *sctx,
+static bool si_upload_descriptors(struct si_context *sctx,
struct si_descriptors *desc)
{
- if (desc->dirty_mask) {
- desc->atom.num_dw =
- 7 + /* copy */
- (4 + desc->element_dw_size) * util_bitcount64(desc->dirty_mask) + /* update */
- 4; /* pointer update */
+ unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
+ void *ptr;
- if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
- desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0)
- desc->atom.num_dw += 4; /* second pointer update */
+ if (!desc->list_dirty)
+ return true;
- desc->atom.dirty = true;
+ u_upload_alloc(sctx->b.uploader, 0, list_size,
+ &desc->buffer_offset,
+ (struct pipe_resource**)&desc->buffer, &ptr);
+ if (!desc->buffer)
+ return false; /* skip the draw call */
- /* TODO: Investigate if these flushes can be removed after
- * adding CE support. */
+ util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
- /* The descriptors are read with the K cache. */
- sctx->b.flags |= SI_CONTEXT_INV_KCACHE;
+ r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
+ RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
- /* Since SI uses uncached CP DMA to update descriptors,
- * we have to flush TC L2, which is used to fetch constants
- * along with KCACHE. */
- if (sctx->b.chip_class == SI)
- sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
- } else {
- desc->atom.dirty = false;
- }
-}
-
-static void si_emit_shader_pointer(struct si_context *sctx,
- struct r600_atom *atom)
-{
- struct si_descriptors *desc = (struct si_descriptors*)atom;
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint64_t va = desc->buffer->gpu_address +
- desc->current_context_id * desc->context_size +
- desc->buffer_offset;
-
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
- radeon_emit(cs, (desc->shader_userdata_reg - SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
- desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0) {
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
- radeon_emit(cs, (desc->shader_userdata_reg +
- (R_00B330_SPI_SHADER_USER_DATA_ES_0 -
- R_00B130_SPI_SHADER_USER_DATA_VS_0) -
- SI_SH_REG_OFFSET) >> 2);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- }
-}
-
-static void si_emit_descriptors(struct si_context *sctx,
- struct si_descriptors *desc,
- uint32_t **descriptors)
-{
- struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- uint64_t va_base;
- int packet_start = 0;
- int packet_size = 0;
- int last_index = desc->num_elements; /* point to a non-existing element */
- uint64_t dirty_mask = desc->dirty_mask;
- unsigned new_context_id = (desc->current_context_id + 1) % SI_NUM_CONTEXTS;
-
- assert(dirty_mask);
-
- va_base = desc->buffer->gpu_address;
-
- /* Copy the descriptors to a new context slot. */
- si_emit_cp_dma_copy_buffer(sctx,
- va_base + new_context_id * desc->context_size,
- va_base + desc->current_context_id * desc->context_size,
- desc->context_size, R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2);
-
- va_base += new_context_id * desc->context_size;
-
- /* Update the descriptors.
- * Updates of consecutive descriptors are merged to one WRITE_DATA packet.
- *
- * XXX When unbinding lots of resources, consider clearing the memory
- * with CP DMA instead of emitting zeros.
- */
- while (dirty_mask) {
- int i = u_bit_scan64(&dirty_mask);
-
- assert(i < desc->num_elements);
-
- if (last_index+1 == i && packet_size) {
- /* Append new data at the end of the last packet. */
- packet_size += desc->element_dw_size;
- cs->buf[packet_start] = PKT3(PKT3_WRITE_DATA, packet_size, 0);
- } else {
- /* Start a new packet. */
- uint64_t va = va_base + i * desc->element_dw_size * 4;
-
- packet_start = cs->cdw;
- packet_size = 2 + desc->element_dw_size;
-
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, packet_size, 0));
- radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(sctx->b.chip_class == SI ?
- PKT3_WRITE_DATA_DST_SEL_MEM_SYNC :
- PKT3_WRITE_DATA_DST_SEL_TC_L2) |
- PKT3_WRITE_DATA_WR_CONFIRM |
- PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
- radeon_emit(cs, va & 0xFFFFFFFFUL);
- radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL);
- }
-
- radeon_emit_array(cs, descriptors[i], desc->element_dw_size);
-
- last_index = i;
- }
-
- desc->dirty_mask = 0;
- desc->current_context_id = new_context_id;
-
- /* Now update the shader userdata pointer. */
- si_emit_shader_pointer(sctx, &desc->atom);
-}
-
-static unsigned si_get_shader_user_data_base(unsigned shader)
-{
- switch (shader) {
- case PIPE_SHADER_VERTEX:
- return R_00B130_SPI_SHADER_USER_DATA_VS_0;
- case PIPE_SHADER_GEOMETRY:
- return R_00B230_SPI_SHADER_USER_DATA_GS_0;
- case PIPE_SHADER_FRAGMENT:
- return R_00B030_SPI_SHADER_USER_DATA_PS_0;
- default:
- assert(0);
- return 0;
- }
+ desc->list_dirty = false;
+ desc->pointer_dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
+ return true;
}
/* SAMPLER VIEWS */
-static void si_emit_sampler_views(struct si_context *sctx, struct r600_atom *atom)
-{
- struct si_sampler_views *views = (struct si_sampler_views*)atom;
-
- si_emit_descriptors(sctx, &views->desc, views->desc_data);
-}
-
-static void si_init_sampler_views(struct si_context *sctx,
- struct si_sampler_views *views,
- unsigned shader)
-{
- int i;
-
- si_init_descriptors(sctx, &views->desc,
- si_get_shader_user_data_base(shader) +
- SI_SGPR_RESOURCE * 4,
- 8, SI_NUM_SAMPLER_VIEWS, si_emit_sampler_views);
-
- for (i = 0; i < views->desc.num_elements; i++) {
- views->desc_data[i] = null_descriptor;
- views->desc.dirty_mask |= 1llu << i;
- }
- si_update_descriptors(sctx, &views->desc);
-}
-
static void si_release_sampler_views(struct si_sampler_views *views)
{
int i;
@@ -382,10 +168,10 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
si_get_resource_ro_priority(rview->resource));
}
+ if (!views->desc.buffer)
+ return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
-
- si_emit_shader_pointer(sctx, &views->desc.atom);
}
static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
@@ -406,17 +192,16 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
rview->resource, RADEON_USAGE_READ,
si_get_resource_ro_priority(rview->resource));
-
pipe_sampler_view_reference(&views->views[slot], view);
- views->desc_data[slot] = view_desc;
+ memcpy(views->desc.list + slot*8, view_desc, 8*4);
views->desc.enabled_mask |= 1llu << slot;
} else {
pipe_sampler_view_reference(&views->views[slot], NULL);
- views->desc_data[slot] = null_descriptor;
+ memcpy(views->desc.list + slot*8, null_descriptor, 8*4);
views->desc.enabled_mask &= ~(1llu << slot);
}
- views->desc.dirty_mask |= 1llu << slot;
+ views->desc.list_dirty = true;
}
static void si_set_sampler_views(struct pipe_context *ctx,
@@ -475,25 +260,17 @@ static void si_set_sampler_views(struct pipe_context *ctx,
NULL, NULL);
}
}
-
- si_update_descriptors(sctx, &samplers->views.desc);
}
/* SAMPLER STATES */
-static void si_emit_sampler_states(struct si_context *sctx, struct r600_atom *atom)
-{
- struct si_sampler_states *states = (struct si_sampler_states*)atom;
-
- si_emit_descriptors(sctx, &states->desc, states->desc_data);
-}
-
static void si_sampler_states_begin_new_cs(struct si_context *sctx,
struct si_sampler_states *states)
{
+ if (!states->desc.buffer)
+ return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
- si_emit_shader_pointer(sctx, &states->desc.atom);
}
void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
@@ -513,66 +290,39 @@ void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
for (i = 0; i < count; i++) {
unsigned slot = start + i;
- if (!sstates[i]) {
- samplers->desc.dirty_mask &= ~(1llu << slot);
+ if (!sstates[i])
continue;
- }
- samplers->desc_data[slot] = sstates[i]->val;
- samplers->desc.dirty_mask |= 1llu << slot;
+ memcpy(samplers->desc.list + slot*4, sstates[i]->val, 4*4);
+ samplers->desc.list_dirty = true;
}
-
- si_update_descriptors(sctx, &samplers->desc);
}
/* BUFFER RESOURCES */
-static void si_emit_buffer_resources(struct si_context *sctx, struct r600_atom *atom)
-{
- struct si_buffer_resources *buffers = (struct si_buffer_resources*)atom;
-
- si_emit_descriptors(sctx, &buffers->desc, buffers->desc_data);
-}
-
-static void si_init_buffer_resources(struct si_context *sctx,
- struct si_buffer_resources *buffers,
- unsigned num_buffers, unsigned shader,
+static void si_init_buffer_resources(struct si_buffer_resources *buffers,
+ unsigned num_buffers,
unsigned shader_userdata_index,
enum radeon_bo_usage shader_usage,
enum radeon_bo_priority priority)
{
- int i;
-
- buffers->num_buffers = num_buffers;
buffers->shader_usage = shader_usage;
buffers->priority = priority;
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
- buffers->desc_storage = CALLOC(num_buffers, sizeof(uint32_t) * 4);
- /* si_emit_descriptors only accepts an array of arrays.
- * This adds such an array. */
- buffers->desc_data = CALLOC(num_buffers, sizeof(uint32_t*));
- for (i = 0; i < num_buffers; i++) {
- buffers->desc_data[i] = &buffers->desc_storage[i*4];
- }
-
- si_init_descriptors(sctx, &buffers->desc,
- si_get_shader_user_data_base(shader) +
- shader_userdata_index*4, 4, num_buffers,
- si_emit_buffer_resources);
+ si_init_descriptors(&buffers->desc, shader_userdata_index, 4,
+ num_buffers);
}
static void si_release_buffer_resources(struct si_buffer_resources *buffers)
{
int i;
- for (i = 0; i < buffers->num_buffers; i++) {
+ for (i = 0; i < buffers->desc.num_elements; i++) {
pipe_resource_reference(&buffers->buffers[i], NULL);
}
FREE(buffers->buffers);
- FREE(buffers->desc_storage);
- FREE(buffers->desc_data);
si_release_descriptors(&buffers->desc);
}
@@ -590,11 +340,11 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
buffers->shader_usage, buffers->priority);
}
+ if (!buffers->desc.buffer)
+ return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
buffers->desc.buffer, RADEON_USAGE_READWRITE,
RADEON_PRIO_SHADER_DATA);
-
- si_emit_shader_pointer(sctx, &buffers->desc.atom);
}
/* VERTEX BUFFERS */
@@ -617,14 +367,15 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
(struct r600_resource*)sctx->vertex_buffer[vb].buffer,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
}
+
+ if (!desc->buffer)
+ return;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
-
- si_emit_shader_pointer(sctx, &desc->atom);
}
-void si_update_vertex_buffers(struct si_context *sctx)
+static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
{
struct si_descriptors *desc = &sctx->vertex_buffers;
bool bound[SI_NUM_VERTEX_BUFFERS] = {};
@@ -632,8 +383,10 @@ void si_update_vertex_buffers(struct si_context *sctx)
uint64_t va;
uint32_t *ptr;
+ if (!sctx->vertex_buffers_dirty)
+ return true;
if (!count || !sctx->vertex_elements)
- return;
+ return true;
/* Vertex buffer descriptors are the only ones which are uploaded
* directly through a staging buffer and don't go through
@@ -641,13 +394,14 @@ void si_update_vertex_buffers(struct si_context *sctx)
*/
u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
(struct pipe_resource**)&desc->buffer, (void**)&ptr);
+ if (!desc->buffer)
+ return false;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
desc->buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA);
assert(count <= SI_NUM_VERTEX_BUFFERS);
- assert(desc->current_context_id == 0);
for (i = 0; i < count; i++) {
struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
@@ -675,7 +429,8 @@ void si_update_vertex_buffers(struct si_context *sctx)
desc[0] = va & 0xFFFFFFFF;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(vb->stride);
- if (vb->stride)
+
+ if (sctx->b.chip_class <= CIK && vb->stride)
/* Round up by rounding down and adding 1 */
desc[2] = (vb->buffer->width0 - offset -
sctx->vertex_elements->format_size[i]) /
@@ -693,13 +448,14 @@ void si_update_vertex_buffers(struct si_context *sctx)
}
}
- desc->atom.num_dw = 8; /* update 2 shader pointers (VS+ES) */
- desc->atom.dirty = true;
-
/* Don't flush the const cache. It would have a very negative effect
* on performance (confirmed by testing). New descriptors are always
* uploaded to a fresh new buffer, so I don't think flushing the const
* cache is needed. */
+ desc->pointer_dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
+ sctx->vertex_buffers_dirty = false;
+ return true;
}
@@ -724,7 +480,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
if (shader >= SI_NUM_SHADERS)
return;
- assert(slot < buffers->num_buffers);
+ assert(slot < buffers->desc.num_elements);
pipe_resource_reference(&buffers->buffers[slot], NULL);
/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
@@ -751,7 +507,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
}
/* Set the descriptor. */
- uint32_t *desc = buffers->desc_data[slot];
+ uint32_t *desc = buffers->desc.list + slot*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(0);
@@ -770,12 +526,11 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
buffers->desc.enabled_mask |= 1llu << slot;
} else {
/* Clear the descriptor. */
- memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
+ memset(buffers->desc.list + slot*4, 0, sizeof(uint32_t) * 4);
buffers->desc.enabled_mask &= ~(1llu << slot);
}
- buffers->desc.dirty_mask |= 1llu << slot;
- si_update_descriptors(sctx, &buffers->desc);
+ buffers->desc.list_dirty = true;
}
/* RING BUFFERS */
@@ -784,7 +539,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
struct pipe_resource *buffer,
unsigned stride, unsigned num_records,
bool add_tid, bool swizzle,
- unsigned element_size, unsigned index_stride)
+ unsigned element_size, unsigned index_stride, uint64_t offset)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
@@ -795,13 +550,13 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
/* The stride field in the resource descriptor has 14 bits */
assert(stride < (1 << 14));
- assert(slot < buffers->num_buffers);
+ assert(slot < buffers->desc.num_elements);
pipe_resource_reference(&buffers->buffers[slot], NULL);
if (buffer) {
uint64_t va;
- va = r600_resource(buffer)->gpu_address;
+ va = r600_resource(buffer)->gpu_address + offset;
switch (element_size) {
default:
@@ -839,8 +594,11 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
break;
}
+ if (sctx->b.chip_class >= VI && stride)
+ num_records *= stride;
+
/* Set the descriptor. */
- uint32_t *desc = buffers->desc_data[slot];
+ uint32_t *desc = buffers->desc.list + slot*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(stride) |
@@ -863,12 +621,11 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
buffers->desc.enabled_mask |= 1llu << slot;
} else {
/* Clear the descriptor. */
- memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
+ memset(buffers->desc.list + slot*4, 0, sizeof(uint32_t) * 4);
buffers->desc.enabled_mask &= ~(1llu << slot);
}
- buffers->desc.dirty_mask |= 1llu << slot;
- si_update_descriptors(sctx, &buffers->desc);
+ buffers->desc.list_dirty = true;
}
/* STREAMOUT BUFFERS */
@@ -929,15 +686,21 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
struct pipe_resource *buffer = targets[i]->buffer;
uint64_t va = r600_resource(buffer)->gpu_address;
- /* Set the descriptor. */
- uint32_t *desc = buffers->desc_data[bufidx];
+ /* Set the descriptor.
+ *
+ * On VI, the format must be non-INVALID, otherwise
+ * the buffer will be considered not bound and store
+ * instructions will be no-ops.
+ */
+ uint32_t *desc = buffers->desc.list + bufidx*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
desc[2] = 0xffffffff;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
/* Set the resource. */
pipe_resource_reference(&buffers->buffers[bufidx],
@@ -948,24 +711,22 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
buffers->desc.enabled_mask |= 1llu << bufidx;
} else {
/* Clear the descriptor and unset the resource. */
- memset(buffers->desc_data[bufidx], 0,
+ memset(buffers->desc.list + bufidx*4, 0,
sizeof(uint32_t) * 4);
pipe_resource_reference(&buffers->buffers[bufidx],
NULL);
buffers->desc.enabled_mask &= ~(1llu << bufidx);
}
- buffers->desc.dirty_mask |= 1llu << bufidx;
}
for (; i < old_num_targets; i++) {
bufidx = SI_SO_BUF_OFFSET + i;
/* Clear the descriptor and unset the resource. */
- memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4);
+ memset(buffers->desc.list + bufidx*4, 0, sizeof(uint32_t) * 4);
pipe_resource_reference(&buffers->buffers[bufidx], NULL);
buffers->desc.enabled_mask &= ~(1llu << bufidx);
- buffers->desc.dirty_mask |= 1llu << bufidx;
}
- si_update_descriptors(sctx, &buffers->desc);
+ buffers->desc.list_dirty = true;
}
static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
@@ -1034,22 +795,19 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
/* Read/Write buffers. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
- bool found = false;
uint64_t mask = buffers->desc.enabled_mask;
while (mask) {
i = u_bit_scan64(&mask);
if (buffers->buffers[i] == buf) {
- si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
+ si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
old_va, buf);
+ buffers->desc.list_dirty = true;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
- buffers->desc.dirty_mask |= 1llu << i;
- found = true;
-
if (i >= SI_SO_BUF_OFFSET && shader == PIPE_SHADER_VERTEX) {
/* Update the streamout state. */
if (sctx->b.streamout.begin_emitted) {
@@ -1061,34 +819,25 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
}
}
}
- if (found) {
- si_update_descriptors(sctx, &buffers->desc);
- }
}
/* Constant buffers. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_buffer_resources *buffers = &sctx->const_buffers[shader];
- bool found = false;
uint64_t mask = buffers->desc.enabled_mask;
while (mask) {
unsigned i = u_bit_scan64(&mask);
if (buffers->buffers[i] == buf) {
- si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
+ si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
old_va, buf);
+ buffers->desc.list_dirty = true;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
rbuffer, buffers->shader_usage,
buffers->priority);
-
- buffers->desc.dirty_mask |= 1llu << i;
- found = true;
}
}
- if (found) {
- si_update_descriptors(sctx, &buffers->desc);
- }
}
/* Texture buffers - update virtual addresses in sampler view descriptors. */
@@ -1100,223 +849,211 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
/* Texture buffers - update bindings. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_sampler_views *views = &sctx->samplers[shader].views;
- bool found = false;
uint64_t mask = views->desc.enabled_mask;
while (mask) {
unsigned i = u_bit_scan64(&mask);
if (views->views[i]->texture == buf) {
+ si_desc_reset_buffer_offset(ctx, views->desc.list + i*8+4,
+ old_va, buf);
+ views->desc.list_dirty = true;
+
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
rbuffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_BUFFER_RO);
-
- views->desc.dirty_mask |= 1llu << i;
- found = true;
}
}
- if (found) {
- si_update_descriptors(sctx, &views->desc);
- }
}
}
-/* CP DMA */
+/* SHADER USER DATA */
-/* The max number of bytes to copy per packet. */
-#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
-
-static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value,
- bool is_framebuffer)
+static void si_mark_shader_pointers_dirty(struct si_context *sctx,
+ unsigned shader)
{
- struct si_context *sctx = (struct si_context*)ctx;
- unsigned flush_flags, tc_l2_flag;
+ sctx->const_buffers[shader].desc.pointer_dirty = true;
+ sctx->rw_buffers[shader].desc.pointer_dirty = true;
+ sctx->samplers[shader].views.desc.pointer_dirty = true;
+ sctx->samplers[shader].states.desc.pointer_dirty = true;
- if (!size)
- return;
+ if (shader == PIPE_SHADER_VERTEX)
+ sctx->vertex_buffers.pointer_dirty = true;
- /* Mark the buffer range of destination as valid (initialized),
- * so that transfer_map knows it should wait for the GPU when mapping
- * that range. */
- util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
- offset + size);
-
- /* Fallback for unaligned clears. */
- if (offset % 4 != 0 || size % 4 != 0) {
- uint32_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
- sctx->b.rings.gfx.cs,
- PIPE_TRANSFER_WRITE);
- size /= 4;
- for (unsigned i = 0; i < size; i++)
- *map++ = value;
- return;
- }
-
- uint64_t va = r600_resource(dst)->gpu_address + offset;
-
- /* Flush the caches where the resource is bound. */
- if (is_framebuffer) {
- flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- tc_l2_flag = 0;
- } else {
- flush_flags = SI_CONTEXT_INV_TC_L1 |
- (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
- SI_CONTEXT_INV_KCACHE;
- tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
- }
-
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- flush_flags;
-
- while (size) {
- unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
- unsigned dma_flags = tc_l2_flag;
-
- si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
- FALSE);
-
- /* This must be done after need_cs_space. */
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
- (struct r600_resource*)dst, RADEON_USAGE_WRITE,
- RADEON_PRIO_MIN);
-
- /* Flush the caches for the first copy only.
- * Also wait for the previous CP DMA operations. */
- if (sctx->b.flags) {
- si_emit_cache_flush(&sctx->b, NULL);
- dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
- }
-
- /* Do the synchronization after the last copy, so that all data is written to memory. */
- if (size == byte_count)
- dma_flags |= R600_CP_DMA_SYNC;
-
- /* Emit the clear packet. */
- si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value, dma_flags);
-
- size -= byte_count;
- va += byte_count;
- }
-
- /* Flush the caches again in case the 3D engine has been prefetching
- * the resource. */
- sctx->b.flags |= flush_flags;
-
- if (tc_l2_flag)
- r600_resource(dst)->TC_L2_dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
}
-void si_copy_buffer(struct si_context *sctx,
- struct pipe_resource *dst, struct pipe_resource *src,
- uint64_t dst_offset, uint64_t src_offset, unsigned size,
- bool is_framebuffer)
+static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
{
- unsigned flush_flags, tc_l2_flag;
+ int i;
- if (!size)
- return;
-
- /* Mark the buffer range of destination as valid (initialized),
- * so that transfer_map knows it should wait for the GPU when mapping
- * that range. */
- util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
- dst_offset + size);
-
- dst_offset += r600_resource(dst)->gpu_address;
- src_offset += r600_resource(src)->gpu_address;
-
- /* Flush the caches where the resource is bound. */
- if (is_framebuffer) {
- flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- tc_l2_flag = 0;
- } else {
- flush_flags = SI_CONTEXT_INV_TC_L1 |
- (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
- SI_CONTEXT_INV_KCACHE;
- tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ for (i = 0; i < SI_NUM_SHADERS; i++) {
+ si_mark_shader_pointers_dirty(sctx, i);
}
-
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
- flush_flags;
-
- while (size) {
- unsigned sync_flags = tc_l2_flag;
- unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
-
- si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
-
- /* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
- if (sctx->b.flags) {
- si_emit_cache_flush(&sctx->b, NULL);
- sync_flags |= SI_CP_DMA_RAW_WAIT;
- }
-
- /* Do the synchronization after the last copy, so that all data is written to memory. */
- if (size == byte_count) {
- sync_flags |= R600_CP_DMA_SYNC;
- }
-
- /* This must be done after r600_need_cs_space. */
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
- RADEON_USAGE_READ, RADEON_PRIO_MIN);
- r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
- RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
-
- si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
-
- size -= byte_count;
- src_offset += byte_count;
- dst_offset += byte_count;
- }
-
- /* Flush the caches again in case the 3D engine has been prefetching
- * the resource. */
- sctx->b.flags |= flush_flags;
-
- if (tc_l2_flag)
- r600_resource(dst)->TC_L2_dirty = true;
}
-/* INIT/DEINIT */
+/* Set a base register address for user data constants in the given shader.
+ * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
+ */
+static void si_set_user_data_base(struct si_context *sctx,
+ unsigned shader, uint32_t new_base)
+{
+ uint32_t *base = &sctx->shader_userdata.sh_base[shader];
+
+ if (*base != new_base) {
+ *base = new_base;
+
+ if (new_base)
+ si_mark_shader_pointers_dirty(sctx, shader);
+ }
+}
+
+/* This must be called when these shaders are changed from non-NULL to NULL
+ * and vice versa:
+ * - geometry shader
+ * - tessellation control shader
+ * - tessellation evaluation shader
+ */
+void si_shader_change_notify(struct si_context *sctx)
+{
+ /* VS can be bound as VS, ES, or LS. */
+ if (sctx->tes_shader)
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B530_SPI_SHADER_USER_DATA_LS_0);
+ else if (sctx->gs_shader)
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0);
+ else
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0);
+
+ /* TES can be bound as ES, VS, or not bound. */
+ if (sctx->tes_shader) {
+ if (sctx->gs_shader)
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0);
+ else
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ } else {
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
+ }
+}
+
+static void si_emit_shader_pointer(struct si_context *sctx,
+ struct si_descriptors *desc,
+ unsigned sh_base, bool keep_dirty)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ uint64_t va;
+
+ if (!desc->pointer_dirty || !desc->buffer)
+ return;
+
+ va = desc->buffer->gpu_address +
+ desc->buffer_offset;
+
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
+ radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ desc->pointer_dirty = keep_dirty;
+}
+
+static void si_emit_shader_userdata(struct si_context *sctx,
+ struct r600_atom *atom)
+{
+ unsigned i;
+ uint32_t *sh_base = sctx->shader_userdata.sh_base;
+
+ if (sctx->gs_shader) {
+ /* The VS copy shader needs these for clipping, streamout, and rings. */
+ unsigned vs_base = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ unsigned i = PIPE_SHADER_VERTEX;
+
+ si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, vs_base, true);
+ si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, vs_base, true);
+
+ /* The TESSEVAL shader needs this for the ESGS ring buffer. */
+ si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0, true);
+ } else if (sctx->tes_shader) {
+ /* The TESSEVAL shader needs this for streamout. */
+ si_emit_shader_pointer(sctx, &sctx->rw_buffers[PIPE_SHADER_VERTEX].desc,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0, true);
+ }
+
+ for (i = 0; i < SI_NUM_SHADERS; i++) {
+ unsigned base = sh_base[i];
+
+ if (!base)
+ continue;
+
+ if (i != PIPE_SHADER_TESS_EVAL)
+ si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false);
+
+ si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
+ si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
+ si_emit_shader_pointer(sctx, &sctx->samplers[i].states.desc, base, false);
+ }
+ si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
+}
+
+/* INIT/DEINIT/UPLOAD */
void si_init_all_descriptors(struct si_context *sctx)
{
int i;
for (i = 0; i < SI_NUM_SHADERS; i++) {
- si_init_buffer_resources(sctx, &sctx->const_buffers[i],
- SI_NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
+ si_init_buffer_resources(&sctx->const_buffers[i],
+ SI_NUM_CONST_BUFFERS, SI_SGPR_CONST,
RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
- si_init_buffer_resources(sctx, &sctx->rw_buffers[i],
- i == PIPE_SHADER_VERTEX ?
- SI_NUM_RW_BUFFERS : SI_NUM_RING_BUFFERS,
- i, SI_SGPR_RW_BUFFERS,
+ si_init_buffer_resources(&sctx->rw_buffers[i],
+ SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
- si_init_sampler_views(sctx, &sctx->samplers[i].views, i);
-
- si_init_descriptors(sctx, &sctx->samplers[i].states.desc,
- si_get_shader_user_data_base(i) + SI_SGPR_SAMPLER * 4,
- 4, SI_NUM_SAMPLER_STATES, si_emit_sampler_states);
-
- sctx->atoms.s.const_buffers[i] = &sctx->const_buffers[i].desc.atom;
- sctx->atoms.s.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom;
- sctx->atoms.s.sampler_views[i] = &sctx->samplers[i].views.desc.atom;
- sctx->atoms.s.sampler_states[i] = &sctx->samplers[i].states.desc.atom;
+ si_init_descriptors(&sctx->samplers[i].views.desc,
+ SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS);
+ si_init_descriptors(&sctx->samplers[i].states.desc,
+ SI_SGPR_SAMPLER, 4, SI_NUM_SAMPLER_STATES);
}
- si_init_descriptors(sctx, &sctx->vertex_buffers,
- si_get_shader_user_data_base(PIPE_SHADER_VERTEX) +
- SI_SGPR_VERTEX_BUFFER*4, 4, SI_NUM_VERTEX_BUFFERS,
- si_emit_shader_pointer);
- sctx->atoms.s.vertex_buffers = &sctx->vertex_buffers.atom;
+ si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFER,
+ 4, SI_NUM_VERTEX_BUFFERS);
/* Set pipe_context functions. */
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
- sctx->b.clear_buffer = si_clear_buffer;
sctx->b.invalidate_buffer = si_invalidate_buffer;
+
+ /* Shader user data. */
+ sctx->atoms.s.shader_userdata = &sctx->shader_userdata.atom;
+ sctx->shader_userdata.atom.emit = (void*)si_emit_shader_userdata;
+
+ /* Upper bound, 4 pointers per shader, +1 for vertex buffers, +2 for the VS copy shader. */
+ sctx->shader_userdata.atom.num_dw = (SI_NUM_SHADERS * 4 + 1 + 2) * 4;
+
+ /* Set default and immutable mappings. */
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0);
+ si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
+ si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
+}
+
+bool si_upload_shader_descriptors(struct si_context *sctx)
+{
+ int i;
+
+ for (i = 0; i < SI_NUM_SHADERS; i++) {
+ if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
+ !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
+ !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
+ !si_upload_descriptors(sctx, &sctx->samplers[i].states.desc))
+ return false;
+ }
+ return si_upload_vertex_buffer_descriptors(sctx);
}
void si_release_all_descriptors(struct si_context *sctx)
@@ -1343,4 +1080,5 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states);
}
si_vertex_buffers_begin_new_cs(sctx);
+ si_shader_userdata_begin_new_cs(sctx);
}
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 313ced7f5d1..307dc391431 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -30,10 +30,32 @@
void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
boolean count_draw_in)
{
+ struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
int i;
+ /* If the CS is sufficiently large, don't count the space needed
+ * and just flush if there is less than 8096 dwords left. */
+ if (cs->max_dw >= 24 * 1024) {
+ if (cs->cdw > cs->max_dw - 8 * 1024)
+ ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ return;
+ }
+
+ /* There are two memory usage counters in the winsys for all buffers
+ * that have been added (cs_add_reloc) and two counters in the pipe
+ * driver for those that haven't been added yet.
+ * */
+ if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
+ ctx->b.gtt = 0;
+ ctx->b.vram = 0;
+ ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+ return;
+ }
+ ctx->b.gtt = 0;
+ ctx->b.vram = 0;
+
/* The number of dwords we already used in the CS so far. */
- num_dw += ctx->b.rings.gfx.cs->cdw;
+ num_dw += cs->cdw;
if (count_draw_in) {
for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
@@ -50,7 +72,8 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
}
/* Count in queries_suspend. */
- num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend;
+ num_dw += ctx->b.num_cs_dw_nontimer_queries_suspend +
+ ctx->b.num_cs_dw_timer_queries_suspend;
/* Count in streamout_end at the end of CS. */
if (ctx->b.streamout.begin_emitted) {
@@ -72,7 +95,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
#endif
/* Flush if there's not enough space. */
- if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+ if (num_dw > cs->max_dw) {
ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}
}
@@ -82,9 +105,16 @@ void si_context_gfx_flush(void *context, unsigned flags,
{
struct si_context *ctx = context;
struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+ struct radeon_winsys *ws = ctx->b.ws;
- if (cs->cdw == ctx->b.initial_gfx_cs_size && !fence)
+ if (cs->cdw == ctx->b.initial_gfx_cs_size &&
+ (!fence || ctx->last_gfx_fence)) {
+ if (fence)
+ ws->fence_reference(fence, ctx->last_gfx_fence);
+ if (!(flags & RADEON_FLUSH_ASYNC))
+ ws->cs_sync_flush(cs);
return;
+ }
ctx->b.rings.gfx.flushing = true;
@@ -101,9 +131,13 @@ void si_context_gfx_flush(void *context, unsigned flags,
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
/* Flush the CS. */
- ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++);
+ ws->cs_flush(cs, flags, &ctx->last_gfx_fence,
+ ctx->screen->b.cs_count++);
ctx->b.rings.gfx.flushing = false;
+ if (fence)
+ ws->fence_reference(fence, ctx->last_gfx_fence);
+
#if SI_TRACE_CS
if (ctx->screen->b.trace_bo) {
struct si_screen *sscreen = ctx->screen;
@@ -111,7 +145,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
for (i = 0; i < 10; i++) {
usleep(5);
- if (!ctx->b.ws->buffer_is_busy(sscreen->b.trace_bo->buf, RADEON_USAGE_READWRITE)) {
+ if (!ws->buffer_is_busy(sscreen->b.trace_bo->buf, RADEON_USAGE_READWRITE)) {
break;
}
}
@@ -130,7 +164,8 @@ void si_context_gfx_flush(void *context, unsigned flags,
void si_begin_new_cs(struct si_context *ctx)
{
/* Flush read caches at the beginning of CS. */
- ctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
+ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
+ SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_INV_ICACHE;
@@ -143,24 +178,32 @@ void si_begin_new_cs(struct si_context *ctx)
/* The CS initialization should be emitted before everything else. */
si_pm4_emit(ctx, ctx->init_config);
- ctx->clip_regs.dirty = true;
- ctx->framebuffer.atom.dirty = true;
- ctx->msaa_sample_locs.dirty = true;
- ctx->msaa_config.dirty = true;
- ctx->db_render_state.dirty = true;
- ctx->b.streamout.enable_atom.dirty = true;
+ si_mark_atom_dirty(ctx, &ctx->clip_regs);
+ si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
+ si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
+ si_mark_atom_dirty(ctx, &ctx->msaa_config);
+ si_mark_atom_dirty(ctx, &ctx->db_render_state);
+ si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_all_descriptors_begin_new_cs(ctx);
r600_postflush_resume_features(&ctx->b);
ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
+
+ /* Invalidate various draw states so that they are emitted before
+ * the first draw call. */
si_invalidate_draw_sh_constants(ctx);
ctx->last_primitive_restart_en = -1;
ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
ctx->last_gs_out_prim = -1;
ctx->last_prim = -1;
ctx->last_multi_vgt_param = -1;
+ ctx->last_ls_hs_config = -1;
ctx->last_rast_prim = -1;
ctx->last_sc_line_stipple = ~0;
ctx->emit_scratch_reloc = true;
+ ctx->last_ls = NULL;
+ ctx->last_tcs = NULL;
+ ctx->last_tes_sh_base = -1;
+ ctx->last_num_tcs_input_cp = -1;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 53ae71a8c92..473a2e9ad12 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -36,32 +36,42 @@
static void si_destroy_context(struct pipe_context *context)
{
struct si_context *sctx = (struct si_context *)context;
+ int i;
si_release_all_descriptors(sctx);
pipe_resource_reference(&sctx->esgs_ring, NULL);
pipe_resource_reference(&sctx->gsvs_ring, NULL);
+ pipe_resource_reference(&sctx->tf_ring, NULL);
pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
r600_resource_reference(&sctx->border_color_table, NULL);
r600_resource_reference(&sctx->scratch_buffer, NULL);
+ sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
si_pm4_free_state(sctx, sctx->init_config, ~0);
si_pm4_delete_state(sctx, gs_rings, sctx->gs_rings);
- si_pm4_delete_state(sctx, gs_onoff, sctx->gs_on);
- si_pm4_delete_state(sctx, gs_onoff, sctx->gs_off);
+ si_pm4_delete_state(sctx, tf_ring, sctx->tf_state);
+ for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
+ si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
if (sctx->pstipple_sampler_state)
sctx->b.b.delete_sampler_state(&sctx->b.b, sctx->pstipple_sampler_state);
- if (sctx->dummy_pixel_shader) {
+ if (sctx->dummy_pixel_shader)
sctx->b.b.delete_fs_state(&sctx->b.b, sctx->dummy_pixel_shader);
- }
- sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
- sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_resolve);
- sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_decompress);
- sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fastclear);
+ if (sctx->fixed_func_tcs_shader)
+ sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader);
+ if (sctx->custom_dsa_flush)
+ sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
+ if (sctx->custom_blend_resolve)
+ sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_resolve);
+ if (sctx->custom_blend_decompress)
+ sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_decompress);
+ if (sctx->custom_blend_fastclear)
+ sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fastclear);
util_unreference_framebuffer_state(&sctx->framebuffer.state);
- util_blitter_destroy(sctx->blitter);
+ if (sctx->blitter)
+ util_blitter_destroy(sctx->blitter);
si_pm4_cleanup(sctx);
@@ -74,6 +84,14 @@ static void si_destroy_context(struct pipe_context *context)
FREE(sctx);
}
+static enum pipe_reset_status
+si_amdgpu_get_reset_status(struct pipe_context *ctx)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+
+ return sctx->b.ws->ctx_query_reset_status(sctx->b.ctx);
+}
+
static struct pipe_context *si_create_context(struct pipe_screen *screen, void *priv)
{
struct si_context *sctx = CALLOC_STRUCT(si_context);
@@ -91,13 +109,18 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
sctx->b.b.screen = screen; /* this must be set first */
sctx->b.b.priv = priv;
sctx->b.b.destroy = si_destroy_context;
+ sctx->b.set_atom_dirty = (void *)si_set_atom_dirty;
sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
if (!r600_common_context_init(&sctx->b, &sscreen->b))
goto fail;
+ if (sscreen->b.info.drm_major == 3)
+ sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status;
+
si_init_blit_functions(sctx);
si_init_compute_functions(sctx);
+ si_init_cp_dma_functions(sctx);
if (sscreen->b.info.has_uvd) {
sctx->b.b.create_video_codec = si_uvd_create_decoder;
@@ -107,7 +130,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
sctx->b.b.create_video_buffer = vl_video_buffer_create;
}
- sctx->b.rings.gfx.cs = ws->cs_create(ws, RING_GFX, si_context_gfx_flush,
+ sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
sctx, sscreen->b.trace_bo ?
sscreen->b.trace_bo->cs_buf : NULL);
sctx->b.rings.gfx.flush = si_context_gfx_flush;
@@ -127,17 +150,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
sctx->atoms.s.streamout_begin = &sctx->b.streamout.begin_atom;
sctx->atoms.s.streamout_enable = &sctx->b.streamout.enable_atom;
- switch (sctx->b.chip_class) {
- case SI:
- case CIK:
- si_init_state_functions(sctx);
- si_init_shader_functions(sctx);
- si_init_config(sctx);
- break;
- default:
- R600_ERR("Unsupported chip class %d.\n", sctx->b.chip_class);
- goto fail;
- }
+ si_init_state_functions(sctx);
+ si_init_shader_functions(sctx);
if (sscreen->b.debug_flags & DBG_FORCE_DMA)
sctx->b.b.resource_copy_region = sctx->b.dma_copy;
@@ -181,7 +195,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
r600_target = radeon_llvm_get_r600_target(triple);
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
r600_get_llvm_processor_name(sscreen->b.family),
- "+DumpCode,+vgpr-spilling",
+ sctx->b.chip_class >= VI ?
+ "+DumpCode" :
+ "+DumpCode,+vgpr-spilling",
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
LLVMCodeModelDefault);
@@ -252,15 +268,27 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
case PIPE_CAP_TGSI_TEXCOORD:
+ case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+ case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
+ case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ return (sscreen->b.info.drm_major == 2 &&
+ sscreen->b.info.drm_minor >= 43) ||
+ sscreen->b.info.drm_major == 3;
+
case PIPE_CAP_TEXTURE_MULTISAMPLE:
/* 2D tiling on CIK is supported since DRM 2.35.0 */
return sscreen->b.chip_class < CIK ||
- sscreen->b.info.drm_minor >= 35;
+ (sscreen->b.info.drm_major == 2 &&
+ sscreen->b.info.drm_minor >= 35) ||
+ sscreen->b.info.drm_major == 3;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return R600_MAP_BUFFER_ALIGNMENT;
@@ -270,7 +298,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 4;
case PIPE_CAP_GLSL_FEATURE_LEVEL:
- return 330;
+ return HAVE_LLVM >= 0x0307 ? 410 : 330;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
@@ -289,13 +317,13 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
- case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
- case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
case PIPE_CAP_SAMPLER_VIEW_TARGET:
case PIPE_CAP_VERTEXID_NOBASE:
- case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return 0;
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ return 30;
+
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600;
@@ -314,7 +342,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
return 4095;
case PIPE_CAP_MAX_VERTEX_STREAMS:
- return 1;
+ return 4;
case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
return 2048;
@@ -335,7 +363,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
return 8;
case PIPE_CAP_MAX_VIEWPORTS:
- return 1;
+ return 16;
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
@@ -375,6 +403,13 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_VERTEX:
case PIPE_SHADER_GEOMETRY:
break;
+ case PIPE_SHADER_TESS_CTRL:
+ case PIPE_SHADER_TESS_EVAL:
+ /* LLVM 3.6.2 is required for tessellation because of bug fixes there */
+ if (HAVE_LLVM < 0x0306 ||
+ (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 2))
+ return 0;
+ break;
case PIPE_SHADER_COMPUTE:
switch (param) {
case PIPE_SHADER_CAP_PREFERRED_IR:
@@ -401,7 +436,6 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
}
break;
default:
- /* TODO: support tessellation */
return 0;
}
@@ -433,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
/* Indirection of geometry shader input dimension is not
* handled yet
*/
- return shader < PIPE_SHADER_GEOMETRY;
+ return shader != PIPE_SHADER_GEOMETRY;
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
@@ -448,6 +482,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_DOUBLES:
+ return HAVE_LLVM >= 0x0307;
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
return 0;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 2d67342f160..553e1f32683 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -48,7 +48,8 @@
#define SI_MAX_DRAW_CS_DWORDS \
(/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
- /*draw regs:*/ 16 + /*draw packets:*/ 31)
+ /*draw regs:*/ 18 + /*draw packets:*/ 31 +\
+ /*derived tess state:*/ 19)
/* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
@@ -125,8 +126,6 @@ struct si_framebuffer {
#define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))
-#define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1)
-
struct si_context {
struct r600_common_context b;
struct blitter_context *blitter;
@@ -137,17 +136,12 @@ struct si_context {
void *pstipple_sampler_state;
struct si_screen *screen;
struct si_pm4_state *init_config;
+ struct pipe_fence_handle *last_gfx_fence;
+ struct si_shader_selector *fixed_func_tcs_shader;
union {
struct {
/* The order matters. */
- struct r600_atom *vertex_buffers;
- struct r600_atom *const_buffers[SI_NUM_SHADERS];
- struct r600_atom *rw_buffers[SI_NUM_SHADERS];
- struct r600_atom *sampler_views[SI_NUM_SHADERS];
- struct r600_atom *sampler_states[SI_NUM_SHADERS];
- /* Caches must be flushed after resource descriptors are
- * updated in memory. */
struct r600_atom *cache_flush;
struct r600_atom *streamout_begin;
struct r600_atom *streamout_enable; /* must be after streamout_begin */
@@ -156,6 +150,7 @@ struct si_context {
struct r600_atom *db_render_state;
struct r600_atom *msaa_config;
struct r600_atom *clip_regs;
+ struct r600_atom *shader_userdata;
} s;
struct r600_atom *array[0];
} atoms;
@@ -168,7 +163,10 @@ struct si_context {
struct si_shader_selector *ps_shader;
struct si_shader_selector *gs_shader;
struct si_shader_selector *vs_shader;
+ struct si_shader_selector *tcs_shader;
+ struct si_shader_selector *tes_shader;
struct si_cs_shader_state cs_shader_state;
+ struct si_shader_data shader_userdata;
/* shader information */
unsigned sprite_coord_enable;
bool flatshade;
@@ -194,13 +192,16 @@ struct si_context {
/* With rasterizer discard, there doesn't have to be a pixel shader.
* In that case, we bind this one: */
void *dummy_pixel_shader;
- struct si_pm4_state *gs_on;
- struct si_pm4_state *gs_off;
- struct si_pm4_state *gs_rings;
struct r600_atom cache_flush;
struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
+
+ /* VGT states. */
+ struct si_pm4_state *vgt_shader_config[4];
+ struct si_pm4_state *gs_rings;
struct pipe_resource *esgs_ring;
struct pipe_resource *gsvs_ring;
+ struct si_pm4_state *tf_state;
+ struct pipe_resource *tf_ring;
LLVMTargetMachineRef tm;
@@ -218,7 +219,7 @@ struct si_context {
bool db_depth_disable_expclear;
unsigned ps_db_shader_control;
- /* Draw state. */
+ /* Emitted draw state. */
int last_base_vertex;
int last_start_instance;
int last_sh_base_reg;
@@ -227,6 +228,7 @@ struct si_context {
int last_gs_out_prim;
int last_prim;
int last_multi_vgt_param;
+ int last_ls_hs_config;
int last_rast_prim;
unsigned last_sc_line_stipple;
int current_rast_prim; /* primitive type after TES, GS */
@@ -235,6 +237,12 @@ struct si_context {
boolean emit_scratch_reloc;
unsigned scratch_waves;
unsigned spi_tmpring_size;
+
+ /* Emitted derived tessellation state. */
+ struct si_shader *last_ls; /* local shader (VS) */
+ struct si_shader_selector *last_tcs;
+ int last_num_tcs_input_cp;
+ int last_tes_sh_base;
};
/* cik_sdma.c */
@@ -260,6 +268,13 @@ void si_resource_copy_region(struct pipe_context *ctx,
unsigned src_level,
const struct pipe_box *src_box);
+/* si_cp_dma.c */
+void si_copy_buffer(struct si_context *sctx,
+ struct pipe_resource *dst, struct pipe_resource *src,
+ uint64_t dst_offset, uint64_t src_offset, unsigned size,
+ bool is_framebuffer);
+void si_init_cp_dma_functions(struct si_context *sctx);
+
/* si_dma.c */
void si_dma_copy(struct pipe_context *ctx,
struct pipe_resource *dst,
@@ -293,7 +308,7 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,
* common helpers
*/
-static INLINE struct r600_resource *
+static inline struct r600_resource *
si_resource_create_custom(struct pipe_screen *screen,
unsigned usage, unsigned size)
{
@@ -302,7 +317,7 @@ si_resource_create_custom(struct pipe_screen *screen,
PIPE_BIND_CUSTOM, usage, size));
}
-static INLINE void
+static inline void
si_invalidate_draw_sh_constants(struct si_context *sctx)
{
sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
@@ -310,4 +325,18 @@ si_invalidate_draw_sh_constants(struct si_context *sctx)
sctx->last_sh_base_reg = -1; /* reset to an unknown value */
}
+static inline void
+si_set_atom_dirty(struct si_context *sctx,
+ struct r600_atom *atom, bool dirty)
+{
+ atom->dirty = dirty;
+}
+
+static inline void
+si_mark_atom_dirty(struct si_context *sctx,
+ struct r600_atom *atom)
+{
+ si_set_atom_dirty(sctx, atom, true);
+}
+
#endif
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 47e5f96cbed..4288e9b2ab1 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -31,6 +31,7 @@
#include "gallivm/lp_bld_intr.h"
#include "gallivm/lp_bld_logic.h"
#include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_bitarit.h"
#include "gallivm/lp_bld_flow.h"
#include "radeon/r600_cs.h"
#include "radeon/radeon_llvm.h"
@@ -71,18 +72,25 @@ struct si_shader_context
int param_streamout_write_index;
int param_streamout_offset[4];
int param_vertex_id;
+ int param_rel_auto_id;
+ int param_vs_prim_id;
int param_instance_id;
+ int param_tes_u;
+ int param_tes_v;
+ int param_tes_rel_patch_id;
+ int param_tes_patch_id;
+ int param_es2gs_offset;
LLVMTargetMachineRef tm;
LLVMValueRef const_md;
LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
- LLVMValueRef ddxy_lds;
+ LLVMValueRef lds;
LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
LLVMValueRef so_buffers[4];
LLVMValueRef esgs_ring;
- LLVMValueRef gsvs_ring;
- LLVMValueRef gs_next_vertex;
+ LLVMValueRef gsvs_ring[4];
+ LLVMValueRef gs_next_vertex[4];
};
static struct si_shader_context * si_shader_context(
@@ -129,12 +137,29 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
assert(index <= 1);
return 2 + index;
case TGSI_SEMANTIC_GENERIC:
- assert(index <= 63-4);
- return 4 + index;
+ if (index <= 63-4)
+ return 4 + index;
+ else
+ /* same explanation as in the default statement,
+ * the only user hitting this is st/nine.
+ */
+ return 0;
+
+ /* patch indices are completely separate and thus start from 0 */
+ case TGSI_SEMANTIC_TESSOUTER:
+ return 0;
+ case TGSI_SEMANTIC_TESSINNER:
+ return 1;
+ case TGSI_SEMANTIC_PATCH:
+ return 2 + index;
default:
- assert(0);
- return 63;
+ /* Don't fail here. The result of this function is only used
+ * for LS, TCS, TES, and GS, where legacy GL semantics can't
+ * occur, but this function is called for all vertex shaders
+ * before it's known whether LS will be compiled or not.
+ */
+ return 0;
}
}
@@ -205,6 +230,136 @@ static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
return value;
}
+static LLVMValueRef get_rel_patch_id(struct si_shader_context *si_shader_ctx)
+{
+ switch (si_shader_ctx->type) {
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 0, 8);
+
+ case TGSI_PROCESSOR_TESS_EVAL:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_tes_rel_patch_id);
+
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
+/* Tessellation shaders pass outputs to the next shader using LDS.
+ *
+ * LS outputs = TCS inputs
+ * TCS outputs = TES inputs
+ *
+ * The LDS layout is:
+ * - TCS inputs for patch 0
+ * - TCS inputs for patch 1
+ * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
+ * - ...
+ * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
+ * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
+ * - TCS outputs for patch 1
+ * - Per-patch TCS outputs for patch 1
+ * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
+ * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
+ * - ...
+ *
+ * All three shaders VS(LS), TCS, TES share the same LDS space.
+ */
+
+static LLVMValueRef
+get_tcs_in_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+ if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX)
+ return unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 0, 13);
+ else if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+ return unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 0, 13);
+ else {
+ assert(0);
+ return NULL;
+ }
+}
+
+static LLVMValueRef
+get_tcs_out_patch_stride(struct si_shader_context *si_shader_ctx)
+{
+ return unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 0, 13);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_offset(struct si_shader_context *si_shader_ctx)
+{
+ return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+ unpack_param(si_shader_ctx,
+ SI_PARAM_TCS_OUT_OFFSETS,
+ 0, 16),
+ 4);
+}
+
+static LLVMValueRef
+get_tcs_out_patch0_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+ return lp_build_mul_imm(&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld,
+ unpack_param(si_shader_ctx,
+ SI_PARAM_TCS_OUT_OFFSETS,
+ 16, 16),
+ 4);
+}
+
+static LLVMValueRef
+get_tcs_in_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMValueRef patch_stride = get_tcs_in_patch_stride(si_shader_ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+ return LLVMBuildMul(gallivm->builder, patch_stride, rel_patch_id, "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_offset(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(si_shader_ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+ return LLVMBuildAdd(gallivm->builder, patch0_offset,
+ LLVMBuildMul(gallivm->builder, patch_stride,
+ rel_patch_id, ""),
+ "");
+}
+
+static LLVMValueRef
+get_tcs_out_current_patch_data_offset(struct si_shader_context *si_shader_ctx)
+{
+ struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+ LLVMValueRef patch0_patch_data_offset =
+ get_tcs_out_patch0_patch_data_offset(si_shader_ctx);
+ LLVMValueRef patch_stride = get_tcs_out_patch_stride(si_shader_ctx);
+ LLVMValueRef rel_patch_id = get_rel_patch_id(si_shader_ctx);
+
+ return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
+ LLVMBuildMul(gallivm->builder, patch_stride,
+ rel_patch_id, ""),
+ "");
+}
+
+static void build_indexed_store(struct si_shader_context *si_shader_ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index,
+ LLVMValueRef value)
+{
+ struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef indices[2], pointer;
+
+ indices[0] = bld_base->uint_bld.zero;
+ indices[1] = index;
+
+ pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
+ LLVMBuildStore(gallivm->builder, value, pointer);
+}
+
/**
* Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
* It's equivalent to doing a load from &base_ptr[index].
@@ -308,7 +463,7 @@ static void declare_input_vs(
args[0] = t_list;
args[1] = attribute_offset;
args[2] = buffer_index;
- input = build_intrinsic(gallivm->builder,
+ input = lp_build_intrinsic(gallivm->builder,
"llvm.SI.vs.load.input", vec4_type, args, 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -323,6 +478,285 @@ static void declare_input_vs(
}
}
+static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
+ unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+
+ if (swizzle > 0)
+ return bld_base->uint_bld.zero;
+
+ switch (si_shader_ctx->type) {
+ case TGSI_PROCESSOR_VERTEX:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_vs_prim_id);
+ case TGSI_PROCESSOR_TESS_CTRL:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_PATCH_ID);
+ case TGSI_PROCESSOR_TESS_EVAL:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_tes_patch_id);
+ case TGSI_PROCESSOR_GEOMETRY:
+ return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_PRIMITIVE_ID);
+ default:
+ assert(0);
+ return bld_base->uint_bld.zero;
+ }
+}
+
+/**
+ * Return the value of tgsi_ind_register for indexing.
+ * This is the indirect index with the constant offset added to it.
+ */
+static LLVMValueRef get_indirect_index(struct si_shader_context *si_shader_ctx,
+ const struct tgsi_ind_register *ind,
+ int rel_index)
+{
+ struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+ LLVMValueRef result;
+
+ result = si_shader_ctx->radeon_bld.soa.addr[ind->Index][ind->Swizzle];
+ result = LLVMBuildLoad(gallivm->builder, result, "");
+ result = LLVMBuildAdd(gallivm->builder, result,
+ lp_build_const_int32(gallivm, rel_index), "");
+ return result;
+}
+
+/**
+ * Calculate a dword address given an input or output register and a stride.
+ */
+static LLVMValueRef get_dw_address(struct si_shader_context *si_shader_ctx,
+ const struct tgsi_full_dst_register *dst,
+ const struct tgsi_full_src_register *src,
+ LLVMValueRef vertex_dw_stride,
+ LLVMValueRef base_addr)
+{
+ struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
+ struct tgsi_shader_info *info = &si_shader_ctx->shader->selector->info;
+ ubyte *name, *index, *array_first;
+ int first, param;
+ struct tgsi_full_dst_register reg;
+
+ /* Set the register description. The address computation is the same
+ * for sources and destinations. */
+ if (src) {
+ reg.Register.File = src->Register.File;
+ reg.Register.Index = src->Register.Index;
+ reg.Register.Indirect = src->Register.Indirect;
+ reg.Register.Dimension = src->Register.Dimension;
+ reg.Indirect = src->Indirect;
+ reg.Dimension = src->Dimension;
+ reg.DimIndirect = src->DimIndirect;
+ } else
+ reg = *dst;
+
+ /* If the register is 2-dimensional (e.g. an array of vertices
+ * in a primitive), calculate the base address of the vertex. */
+ if (reg.Register.Dimension) {
+ LLVMValueRef index;
+
+ if (reg.Dimension.Indirect)
+ index = get_indirect_index(si_shader_ctx, ®.DimIndirect,
+ reg.Dimension.Index);
+ else
+ index = lp_build_const_int32(gallivm, reg.Dimension.Index);
+
+ base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+ LLVMBuildMul(gallivm->builder, index,
+ vertex_dw_stride, ""), "");
+ }
+
+ /* Get information about the register. */
+ if (reg.Register.File == TGSI_FILE_INPUT) {
+ name = info->input_semantic_name;
+ index = info->input_semantic_index;
+ array_first = info->input_array_first;
+ } else if (reg.Register.File == TGSI_FILE_OUTPUT) {
+ name = info->output_semantic_name;
+ index = info->output_semantic_index;
+ array_first = info->output_array_first;
+ } else {
+ assert(0);
+ return NULL;
+ }
+
+ if (reg.Register.Indirect) {
+ /* Add the relative address of the element. */
+ LLVMValueRef ind_index;
+
+ if (reg.Indirect.ArrayID)
+ first = array_first[reg.Indirect.ArrayID];
+ else
+ first = reg.Register.Index;
+
+ ind_index = get_indirect_index(si_shader_ctx, ®.Indirect,
+ reg.Register.Index - first);
+
+ base_addr = LLVMBuildAdd(gallivm->builder, base_addr,
+ LLVMBuildMul(gallivm->builder, ind_index,
+ lp_build_const_int32(gallivm, 4), ""), "");
+
+ param = si_shader_io_get_unique_index(name[first], index[first]);
+ } else {
+ param = si_shader_io_get_unique_index(name[reg.Register.Index],
+ index[reg.Register.Index]);
+ }
+
+ /* Add the base address of the element. */
+ return LLVMBuildAdd(gallivm->builder, base_addr,
+ lp_build_const_int32(gallivm, param * 4), "");
+}
+
+/**
+ * Load from LDS.
+ *
+ * \param type output value type
+ * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
+ * \param dw_addr address in dwords
+ */
+static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
+ enum tgsi_opcode_type type, unsigned swizzle,
+ LLVMValueRef dw_addr)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef value;
+
+ if (swizzle == ~0) {
+ LLVMValueRef values[TGSI_NUM_CHANNELS];
+
+ for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
+ values[chan] = lds_load(bld_base, type, chan, dw_addr);
+
+ return lp_build_gather_values(bld_base->base.gallivm, values,
+ TGSI_NUM_CHANNELS);
+ }
+
+ dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+ lp_build_const_int32(gallivm, swizzle));
+
+ value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
+ return LLVMBuildBitCast(gallivm->builder, value,
+ tgsi2llvmtype(bld_base, type), "");
+}
+
+/**
+ * Store to LDS.
+ *
+ * \param swizzle offset (typically 0..3)
+ * \param dw_addr address in dwords
+ * \param value value to store
+ */
+static void lds_store(struct lp_build_tgsi_context * bld_base,
+ unsigned swizzle, LLVMValueRef dw_addr,
+ LLVMValueRef value)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+ dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+ lp_build_const_int32(gallivm, swizzle));
+
+ value = LLVMBuildBitCast(gallivm->builder, value,
+ LLVMInt32TypeInContext(gallivm->context), "");
+ build_indexed_store(si_shader_ctx, si_shader_ctx->lds,
+ dw_addr, value);
+}
+
+static LLVMValueRef fetch_input_tcs(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type, unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ LLVMValueRef dw_addr, stride;
+
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+ dw_addr = get_tcs_in_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+
+ return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_output_tcs(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type, unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ LLVMValueRef dw_addr, stride;
+
+ if (reg->Register.Dimension) {
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+ }
+
+ return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static LLVMValueRef fetch_input_tes(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type, unsigned swizzle)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ LLVMValueRef dw_addr, stride;
+
+ if (reg->Register.Dimension) {
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, stride, dw_addr);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, NULL, reg, NULL, dw_addr);
+ }
+
+ return lds_load(bld_base, type, swizzle, dw_addr);
+}
+
+static void store_output_tcs(struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction * inst,
+ const struct tgsi_opcode_info * info,
+ LLVMValueRef dst[4])
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+ unsigned chan_index;
+ LLVMValueRef dw_addr, stride;
+
+ /* Only handle per-patch and per-vertex outputs here.
+ * Vectors will be lowered to scalars and this function will be called again.
+ */
+ if (reg->Register.File != TGSI_FILE_OUTPUT ||
+ (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) {
+ radeon_llvm_emit_store(bld_base, inst, info, dst);
+ return;
+ }
+
+ if (reg->Register.Dimension) {
+ stride = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8);
+ dw_addr = get_tcs_out_current_patch_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, reg, NULL, stride, dw_addr);
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = get_dw_address(si_shader_ctx, reg, NULL, NULL, dw_addr);
+ }
+
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+ LLVMValueRef value = dst[chan_index];
+
+ if (inst->Instruction.Saturate)
+ value = radeon_llvm_saturate(bld_base, value);
+
+ lds_store(bld_base, chan_index, dw_addr, value);
+ }
+}
+
static LLVMValueRef fetch_input_gs(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_src_register *reg,
@@ -342,13 +776,8 @@ static LLVMValueRef fetch_input_gs(
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
- if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID) {
- if (swizzle == 0)
- return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_PRIMITIVE_ID);
- else
- return uint->zero;
- }
+ if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
+ return get_primitive_id(bld_base, swizzle);
if (!reg->Register.Dimension)
return NULL;
@@ -380,7 +809,7 @@ static LLVMValueRef fetch_input_gs(
args[1] = vtx_offset;
args[2] = lp_build_const_int32(gallivm,
(get_param_index(semantic_name, semantic_index,
- shader->selector->gs_used_inputs) * 4 +
+ shader->selector->inputs_read) * 4 +
swizzle) * 256);
args[3] = uint->zero;
args[4] = uint->one; /* OFFEN */
@@ -390,13 +819,42 @@ static LLVMValueRef fetch_input_gs(
args[8] = uint->zero; /* TFE */
return LLVMBuildBitCast(gallivm->builder,
- build_intrinsic(gallivm->builder,
+ lp_build_intrinsic(gallivm->builder,
"llvm.SI.buffer.load.dword.i32.i32",
i32, args, 9,
LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
tgsi2llvmtype(bld_base, type), "");
}
+static int lookup_interp_param_index(unsigned interpolate, unsigned location)
+{
+ switch (interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ return 0;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
+ return SI_PARAM_LINEAR_SAMPLE;
+ else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
+ return SI_PARAM_LINEAR_CENTROID;
+ else
+ return SI_PARAM_LINEAR_CENTER;
+ break;
+ case TGSI_INTERPOLATE_COLOR:
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ if (location == TGSI_INTERPOLATE_LOC_SAMPLE)
+ return SI_PARAM_PERSP_SAMPLE;
+ else if (location == TGSI_INTERPOLATE_LOC_CENTROID)
+ return SI_PARAM_PERSP_CENTROID;
+ else
+ return SI_PARAM_PERSP_CENTER;
+ break;
+ default:
+ fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+ return -1;
+ }
+}
+
static void declare_input_fs(
struct radeon_llvm_context *radeon_bld,
unsigned input_index,
@@ -411,7 +869,8 @@ static void declare_input_fs(
LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
LLVMValueRef main_fn = radeon_bld->main_fn;
- LLVMValueRef interp_param;
+ LLVMValueRef interp_param = NULL;
+ int interp_param_idx;
const char * intr_name;
/* This value is:
@@ -460,31 +919,13 @@ static void declare_input_fs(
attr_number = lp_build_const_int32(gallivm,
shader->ps_input_param_offset[input_index]);
- switch (decl->Interp.Interpolate) {
- case TGSI_INTERPOLATE_CONSTANT:
- interp_param = 0;
- break;
- case TGSI_INTERPOLATE_LINEAR:
- if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_SAMPLE);
- else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID);
- else
- interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER);
- break;
- case TGSI_INTERPOLATE_COLOR:
- case TGSI_INTERPOLATE_PERSPECTIVE:
- if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_SAMPLE);
- else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
- else
- interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
- break;
- default:
- fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
+ shader->ps_input_interpolate[input_index] = decl->Interp.Interpolate;
+ interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate,
+ decl->Interp.Location);
+ if (interp_param_idx == -1)
return;
- }
+ else if (interp_param_idx)
+ interp_param = LLVMGetParam(main_fn, interp_param_idx);
/* fs.constant returns the param from the middle vertex, so it's not
* really useful for flat shading. It's meant to be used for custom
@@ -522,12 +963,12 @@ static void declare_input_fs(
args[0] = llvm_chan;
args[1] = attr_number;
- front = build_intrinsic(gallivm->builder, intr_name,
+ front = lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
args[1] = back_attr_number;
- back = build_intrinsic(gallivm->builder, intr_name,
+ back = lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -548,7 +989,7 @@ static void declare_input_fs(
args[2] = params;
args[3] = interp_param;
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
- build_intrinsic(gallivm->builder, intr_name,
+ lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
@@ -566,7 +1007,7 @@ static void declare_input_fs(
args[2] = params;
args[3] = interp_param;
radeon_bld->inputs[soa_index] =
- build_intrinsic(gallivm->builder, intr_name,
+ lp_build_intrinsic(gallivm->builder, intr_name,
input_type, args, args[3] ? 4 : 3,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
}
@@ -587,10 +1028,35 @@ static LLVMValueRef buffer_load_const(LLVMBuilderRef builder, LLVMValueRef resou
{
LLVMValueRef args[2] = {resource, offset};
- return build_intrinsic(builder, "llvm.SI.load.const", return_type, args, 2,
+ return lp_build_intrinsic(builder, "llvm.SI.load.const", return_type, args, 2,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
}
+static LLVMValueRef load_sample_position(struct radeon_llvm_context *radeon_bld, LLVMValueRef sample_id)
+{
+ struct si_shader_context *si_shader_ctx =
+ si_shader_context(&radeon_bld->soa.bld_base);
+ struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
+ struct gallivm_state *gallivm = &radeon_bld->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
+ LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
+
+ /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
+ LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, sample_id, 8);
+ LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
+
+ LLVMValueRef pos[4] = {
+ buffer_load_const(builder, resource, offset0, radeon_bld->soa.bld_base.base.elem_type),
+ buffer_load_const(builder, resource, offset1, radeon_bld->soa.bld_base.base.elem_type),
+ lp_build_const_float(gallivm, 0),
+ lp_build_const_float(gallivm, 0)
+ };
+
+ return lp_build_gather_values(gallivm, pos, 4);
+}
+
static void declare_system_value(
struct radeon_llvm_context * radeon_bld,
unsigned index,
@@ -598,6 +1064,7 @@ static void declare_system_value(
{
struct si_shader_context *si_shader_ctx =
si_shader_context(&radeon_bld->soa.bld_base);
+ struct lp_build_context *bld = &radeon_bld->soa.bld_base.base;
struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
struct gallivm_state *gallivm = &radeon_bld->gallivm;
LLVMValueRef value = 0;
@@ -626,30 +1093,23 @@ static void declare_system_value(
SI_PARAM_BASE_VERTEX);
break;
+ case TGSI_SEMANTIC_INVOCATIONID:
+ if (si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL)
+ value = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+ else if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY)
+ value = LLVMGetParam(radeon_bld->main_fn,
+ SI_PARAM_GS_INSTANCE_ID);
+ else
+ assert(!"INVOCATIONID not implemented");
+ break;
+
case TGSI_SEMANTIC_SAMPLEID:
value = get_sample_id(radeon_bld);
break;
case TGSI_SEMANTIC_SAMPLEPOS:
- {
- LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
- LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
- LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
-
- /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
- LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, get_sample_id(radeon_bld), 8);
- LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
-
- LLVMValueRef pos[4] = {
- buffer_load_const(builder, resource, offset0, radeon_bld->soa.bld_base.base.elem_type),
- buffer_load_const(builder, resource, offset1, radeon_bld->soa.bld_base.base.elem_type),
- lp_build_const_float(gallivm, 0),
- lp_build_const_float(gallivm, 0)
- };
- value = lp_build_gather_values(gallivm, pos, 4);
+ value = load_sample_position(radeon_bld, get_sample_id(radeon_bld));
break;
- }
case TGSI_SEMANTIC_SAMPLEMASK:
/* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
@@ -660,6 +1120,48 @@ static void declare_system_value(
value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
break;
+ case TGSI_SEMANTIC_TESSCOORD:
+ {
+ LLVMValueRef coord[4] = {
+ LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_u),
+ LLVMGetParam(radeon_bld->main_fn, si_shader_ctx->param_tes_v),
+ bld->zero,
+ bld->zero
+ };
+
+ /* For triangles, the vector should be (u, v, 1-u-v). */
+ if (si_shader_ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
+ PIPE_PRIM_TRIANGLES)
+ coord[2] = lp_build_sub(bld, bld->one,
+ lp_build_add(bld, coord[0], coord[1]));
+
+ value = lp_build_gather_values(gallivm, coord, 4);
+ break;
+ }
+
+ case TGSI_SEMANTIC_VERTICESIN:
+ value = unpack_param(si_shader_ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
+ break;
+
+ case TGSI_SEMANTIC_TESSINNER:
+ case TGSI_SEMANTIC_TESSOUTER:
+ {
+ LLVMValueRef dw_addr;
+ int param = si_shader_io_get_unique_index(decl->Semantic.Name, 0);
+
+ dw_addr = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ dw_addr = LLVMBuildAdd(gallivm->builder, dw_addr,
+ lp_build_const_int32(gallivm, param * 4), "");
+
+ value = lds_load(&radeon_bld->soa.bld_base, TGSI_TYPE_FLOAT,
+ ~0, dw_addr);
+ break;
+ }
+
+ case TGSI_SEMANTIC_PRIMID:
+ value = get_primitive_id(&radeon_bld->soa.bld_base, 0);
+ break;
+
default:
assert(!"unknown system value");
return;
@@ -679,7 +1181,7 @@ static LLVMValueRef fetch_constant(
const struct tgsi_ind_register *ireg = ®->Indirect;
unsigned buf, idx;
- LLVMValueRef addr;
+ LLVMValueRef addr, bufp;
LLVMValueRef result;
if (swizzle == LP_CHAN_ALL) {
@@ -694,8 +1196,24 @@ static LLVMValueRef fetch_constant(
buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
idx = reg->Register.Index * 4 + swizzle;
- if (!reg->Register.Indirect)
- return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
+ if (!reg->Register.Indirect && !reg->Dimension.Indirect) {
+ if (type != TGSI_TYPE_DOUBLE)
+ return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
+ else {
+ return radeon_llvm_emit_fetch_double(bld_base,
+ si_shader_ctx->constants[buf][idx],
+ si_shader_ctx->constants[buf][idx + 1]);
+ }
+ }
+
+ if (reg->Register.Dimension && reg->Dimension.Indirect) {
+ LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+ LLVMValueRef index;
+ index = get_indirect_index(si_shader_ctx, ®->DimIndirect,
+ reg->Dimension.Index);
+ bufp = build_indexed_load_const(si_shader_ctx, ptr, index);
+ } else
+ bufp = si_shader_ctx->const_resource[buf];
addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
@@ -703,10 +1221,26 @@ static LLVMValueRef fetch_constant(
addr = lp_build_add(&bld_base->uint_bld, addr,
lp_build_const_int32(base->gallivm, idx * 4));
- result = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
- addr, base->elem_type);
+ result = buffer_load_const(base->gallivm->builder, bufp,
+ addr, bld_base->base.elem_type);
- return bitcast(bld_base, type, result);
+ if (type != TGSI_TYPE_DOUBLE)
+ result = bitcast(bld_base, type, result);
+ else {
+ LLVMValueRef addr2, result2;
+ addr2 = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
+ addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
+ addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
+ addr2 = lp_build_add(&bld_base->uint_bld, addr2,
+ lp_build_const_int32(base->gallivm, idx * 4));
+
+ result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
+ addr2, bld_base->base.elem_type);
+
+ result = radeon_llvm_emit_fetch_double(bld_base,
+ result, result2);
+ }
+ return result;
}
/* Initialize arguments for the shader export intrinsic */
@@ -745,7 +1279,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
args[0] = values[2 * chan];
args[1] = values[2 * chan + 1];
args[chan + 5] =
- build_intrinsic(base->gallivm->builder,
+ lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.packf16",
LLVMInt32TypeInContext(base->gallivm->context),
args, 2,
@@ -827,12 +1361,12 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
lp_build_const_float(gallivm, 1.0f),
lp_build_const_float(gallivm, -1.0f));
- build_intrinsic(gallivm->builder,
+ lp_build_intrinsic(gallivm->builder,
"llvm.AMDGPU.kill",
LLVMVoidTypeInContext(gallivm->context),
&arg, 1, 0);
} else {
- build_intrinsic(gallivm->builder,
+ lp_build_intrinsic(gallivm->builder,
"llvm.AMDGPU.kilp",
LLVMVoidTypeInContext(gallivm->context),
NULL, 0, 0);
@@ -853,7 +1387,7 @@ static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base
SI_PARAM_SAMPLE_COVERAGE);
coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
- coverage = build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
+ coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
bld_base->int_bld.elem_type,
&coverage, 1, LLVMReadNoneAttribute);
@@ -983,16 +1517,16 @@ static void build_tbuffer_store(struct si_shader_context *shader,
lp_build_intrinsic(gallivm->builder, name,
LLVMVoidTypeInContext(gallivm->context),
- args, Elements(args));
+ args, Elements(args), 0);
}
-static void build_streamout_store(struct si_shader_context *shader,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef vaddr,
- LLVMValueRef soffset,
- unsigned inst_offset)
+static void build_tbuffer_store_dwords(struct si_shader_context *shader,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ unsigned num_channels,
+ LLVMValueRef vaddr,
+ LLVMValueRef soffset,
+ unsigned inst_offset)
{
static unsigned dfmt[] = {
V_008F0C_BUF_DATA_FORMAT_32,
@@ -1025,13 +1559,16 @@ static void si_llvm_emit_streamout(struct si_shader_context *shader,
LLVMValueRef so_vtx_count =
unpack_param(shader, shader->param_streamout_config, 16, 7);
- LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
+ LLVMValueRef tid = lp_build_intrinsic(builder, "llvm.SI.tid", i32,
NULL, 0, LLVMReadNoneAttribute);
/* can_emit = tid < so_vtx_count; */
LLVMValueRef can_emit =
LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
+ LLVMValueRef stream_id =
+ unpack_param(shader, shader->param_streamout_config, 24, 2);
+
/* Emit the streamout code conditionally. This actually avoids
* out-of-bounds buffer access. The hw tells us via the SGPR
* (so_vtx_count) which threads are allowed to emit streamout data. */
@@ -1071,7 +1608,9 @@ static void si_llvm_emit_streamout(struct si_shader_context *shader,
unsigned reg = so->output[i].register_index;
unsigned start = so->output[i].start_component;
unsigned num_comps = so->output[i].num_components;
+ unsigned stream = so->output[i].stream;
LLVMValueRef out[4];
+ struct lp_build_if_state if_ctx_stream;
assert(num_comps && num_comps <= 4);
if (!num_comps || num_comps > 4)
@@ -1105,11 +1644,18 @@ static void si_llvm_emit_streamout(struct si_shader_context *shader,
break;
}
- build_streamout_store(shader, shader->so_buffers[buf_idx],
- vdata, num_comps,
- so_write_offset[buf_idx],
- LLVMConstInt(i32, 0, 0),
- so->output[i].dst_offset*4);
+ LLVMValueRef can_emit_stream =
+ LLVMBuildICmp(builder, LLVMIntEQ,
+ stream_id,
+ lp_build_const_int32(gallivm, stream), "");
+
+ lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
+ build_tbuffer_store_dwords(shader, shader->so_buffers[buf_idx],
+ vdata, num_comps,
+ so_write_offset[buf_idx],
+ LLVMConstInt(i32, 0, 0),
+ so->output[i].dst_offset*4);
+ lp_build_endif(&if_ctx_stream);
}
}
lp_build_endif(&if_ctx);
@@ -1128,7 +1674,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
LLVMValueRef args[9];
LLVMValueRef pos_args[4][9] = { { 0 } };
- LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL;
+ LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
unsigned semantic_name, semantic_index;
unsigned target;
unsigned param_count = 0;
@@ -1154,7 +1700,12 @@ handle_semantic:
continue;
case TGSI_SEMANTIC_LAYER:
layer_value = outputs[i].values[0];
- continue;
+ semantic_name = TGSI_SEMANTIC_GENERIC;
+ goto handle_semantic;
+ case TGSI_SEMANTIC_VIEWPORT_INDEX:
+ viewport_index_value = outputs[i].values[0];
+ semantic_name = TGSI_SEMANTIC_GENERIC;
+ goto handle_semantic;
case TGSI_SEMANTIC_POSITION:
target = V_008DFC_SQ_EXP_POS;
break;
@@ -1195,7 +1746,7 @@ handle_semantic:
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
+ args, 9, 0);
}
if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
@@ -1204,6 +1755,8 @@ handle_semantic:
}
}
+ shader->nr_param_exports = param_count;
+
/* We need to add the position output manually if it's missing. */
if (!pos_args[0][0]) {
pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
@@ -1220,11 +1773,13 @@ handle_semantic:
/* Write the misc vector (point size, edgeflag, layer, viewport). */
if (shader->selector->info.writes_psize ||
shader->selector->info.writes_edgeflag ||
+ shader->selector->info.writes_viewport_index ||
shader->selector->info.writes_layer) {
pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
shader->selector->info.writes_psize |
(shader->selector->info.writes_edgeflag << 1) |
- (shader->selector->info.writes_layer << 2));
+ (shader->selector->info.writes_layer << 2) |
+ (shader->selector->info.writes_viewport_index << 3));
pos_args[1][1] = uint->zero; /* EXEC mask */
pos_args[1][2] = uint->zero; /* last export? */
pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
@@ -1255,6 +1810,9 @@ handle_semantic:
if (shader->selector->info.writes_layer)
pos_args[1][7] = layer_value;
+
+ if (shader->selector->info.writes_viewport_index)
+ pos_args[1][8] = viewport_index_value;
}
for (i = 0; i < 4; i++)
@@ -1276,7 +1834,133 @@ handle_semantic:
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- pos_args[i], 9);
+ pos_args[i], 9, 0);
+ }
+}
+
+/* This only writes the tessellation factor levels. */
+static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct si_shader *shader = si_shader_ctx->shader;
+ unsigned tess_inner_index, tess_outer_index;
+ LLVMValueRef lds_base, lds_inner, lds_outer;
+ LLVMValueRef tf_base, rel_patch_id, byteoffset, buffer, rw_buffers;
+ LLVMValueRef out[6], vec0, vec1, invocation_id;
+ unsigned stride, outer_comps, inner_comps, i;
+ struct lp_build_if_state if_ctx;
+
+ invocation_id = unpack_param(si_shader_ctx, SI_PARAM_REL_IDS, 8, 5);
+
+ /* Do this only for invocation 0, because the tess levels are per-patch,
+ * not per-vertex.
+ *
+ * This can't jump, because invocation 0 executes this. It should
+ * at least mask out the loads and stores for other invocations.
+ */
+ lp_build_if(&if_ctx, gallivm,
+ LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
+ invocation_id, bld_base->uint_bld.zero, ""));
+
+ /* Determine the layout of one tess factor element in the buffer. */
+ switch (shader->key.tcs.prim_mode) {
+ case PIPE_PRIM_LINES:
+ stride = 2; /* 2 dwords, 1 vec2 store */
+ outer_comps = 2;
+ inner_comps = 0;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ stride = 4; /* 4 dwords, 1 vec4 store */
+ outer_comps = 3;
+ inner_comps = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
+ outer_comps = 4;
+ inner_comps = 2;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ /* Load tess_inner and tess_outer from LDS.
+ * Any invocation can write them, so we can't get them from a temporary.
+ */
+ tess_inner_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSINNER, 0);
+ tess_outer_index = si_shader_io_get_unique_index(TGSI_SEMANTIC_TESSOUTER, 0);
+
+ lds_base = get_tcs_out_current_patch_data_offset(si_shader_ctx);
+ lds_inner = LLVMBuildAdd(gallivm->builder, lds_base,
+ lp_build_const_int32(gallivm,
+ tess_inner_index * 4), "");
+ lds_outer = LLVMBuildAdd(gallivm->builder, lds_base,
+ lp_build_const_int32(gallivm,
+ tess_outer_index * 4), "");
+
+ for (i = 0; i < outer_comps; i++)
+ out[i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
+ for (i = 0; i < inner_comps; i++)
+ out[outer_comps+i] = lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+
+ /* Convert the outputs to vectors for stores. */
+ vec0 = lp_build_gather_values(gallivm, out, MIN2(stride, 4));
+ vec1 = NULL;
+
+ if (stride > 4)
+ vec1 = lp_build_gather_values(gallivm, out+4, stride - 4);
+
+ /* Get the buffer. */
+ rw_buffers = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_RW_BUFFERS);
+ buffer = build_indexed_load_const(si_shader_ctx, rw_buffers,
+ lp_build_const_int32(gallivm, SI_RING_TESS_FACTOR));
+
+ /* Get the offset. */
+ tf_base = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ SI_PARAM_TESS_FACTOR_OFFSET);
+ rel_patch_id = get_rel_patch_id(si_shader_ctx);
+ byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id,
+ lp_build_const_int32(gallivm, 4 * stride), "");
+
+ /* Store the outputs. */
+ build_tbuffer_store_dwords(si_shader_ctx, buffer, vec0,
+ MIN2(stride, 4), byteoffset, tf_base, 0);
+ if (vec1)
+ build_tbuffer_store_dwords(si_shader_ctx, buffer, vec1,
+ stride - 4, byteoffset, tf_base, 16);
+ lp_build_endif(&if_ctx);
+}
+
+static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context * bld_base)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader *shader = si_shader_ctx->shader;
+ struct tgsi_shader_info *info = &shader->selector->info;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ unsigned i, chan;
+ LLVMValueRef vertex_id = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+ si_shader_ctx->param_rel_auto_id);
+ LLVMValueRef vertex_dw_stride =
+ unpack_param(si_shader_ctx, SI_PARAM_LS_OUT_LAYOUT, 13, 8);
+ LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
+ vertex_dw_stride, "");
+
+ /* Write outputs to LDS. The next shader (TCS aka HS) will read
+ * its inputs from it. */
+ for (i = 0; i < info->num_outputs; i++) {
+ LLVMValueRef *out_ptr = si_shader_ctx->radeon_bld.soa.outputs[i];
+ unsigned name = info->output_semantic_name[i];
+ unsigned index = info->output_semantic_index[i];
+ int param = si_shader_io_get_unique_index(name, index);
+ LLVMValueRef dw_addr = LLVMBuildAdd(gallivm->builder, base_dw_addr,
+ lp_build_const_int32(gallivm, param * 4), "");
+
+ for (chan = 0; chan < 4; chan++) {
+ lds_store(bld_base, chan, dw_addr,
+ LLVMBuildLoad(gallivm->builder, out_ptr[chan], ""));
+ }
}
}
@@ -1288,17 +1972,25 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
struct tgsi_shader_info *info = &es->selector->info;
LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
- SI_PARAM_ES2GS_OFFSET);
+ si_shader_ctx->param_es2gs_offset);
+ uint64_t enabled_outputs = si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL ?
+ es->key.tes.es_enabled_outputs :
+ es->key.vs.es_enabled_outputs;
unsigned chan;
int i;
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
si_shader_ctx->radeon_bld.soa.outputs[i];
- int param_index = get_param_index(info->output_semantic_name[i],
- info->output_semantic_index[i],
- es->key.vs.gs_used_inputs);
+ int param_index;
+ if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
+ info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
+ continue;
+
+ param_index = get_param_index(info->output_semantic_name[i],
+ info->output_semantic_index[i],
+ enabled_outputs);
if (param_index < 0)
continue;
@@ -1326,7 +2018,7 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_NOP | SENDMSG_GS_DONE);
args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
- build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
+ lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
LLVMVoidTypeInContext(gallivm->context), args, 2,
LLVMNoUnwindAttribute);
}
@@ -1339,7 +2031,7 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
struct si_shader_output_values *outputs = NULL;
int i,j;
- outputs = MALLOC(info->num_outputs * sizeof(outputs[0]));
+ outputs = MALLOC((info->num_outputs + 1) * sizeof(outputs[0]));
for (i = 0; i < info->num_outputs; i++) {
outputs[i].name = info->output_semantic_name[i];
@@ -1352,7 +2044,19 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
"");
}
- si_llvm_export_vs(bld_base, outputs, info->num_outputs);
+ /* Export PrimitiveID when PS needs it. */
+ if (si_vs_exports_prim_id(si_shader_ctx->shader)) {
+ outputs[i].name = TGSI_SEMANTIC_PRIMID;
+ outputs[i].sid = 0;
+ outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
+ get_primitive_id(bld_base, 0));
+ outputs[i].values[1] = bld_base->base.undef;
+ outputs[i].values[2] = bld_base->base.undef;
+ outputs[i].values[3] = bld_base->base.undef;
+ i++;
+ }
+
+ si_llvm_export_vs(bld_base, outputs, i);
FREE(outputs);
}
@@ -1417,7 +2121,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- last_args, 9);
+ last_args, 9, 0);
}
/* This instruction will be emitted at the end of the shader. */
@@ -1434,14 +2138,14 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
+ args, 9, 0);
}
}
} else {
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
+ args, 9, 0);
}
}
@@ -1503,7 +2207,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- args, 9);
+ args, 9, 0);
else
memcpy(last_args, args, sizeof(args));
}
@@ -1534,7 +2238,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
lp_build_intrinsic(base->gallivm->builder,
"llvm.SI.export",
LLVMVoidTypeInContext(base->gallivm->context),
- last_args, 9);
+ last_args, 9, 0);
}
static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
@@ -1563,15 +2267,36 @@ static void tex_fetch_args(
const struct tgsi_full_instruction * inst = emit_data->inst;
unsigned opcode = inst->Instruction.Opcode;
unsigned target = inst->Texture.Texture;
- LLVMValueRef coords[5];
+ LLVMValueRef coords[5], derivs[6];
LLVMValueRef address[16];
int ref_pos;
unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
unsigned count = 0;
unsigned chan;
- unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
- unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
+ unsigned sampler_src;
+ unsigned sampler_index;
+ unsigned num_deriv_channels = 0;
bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
+ LLVMValueRef res_ptr, samp_ptr;
+
+ sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
+ sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
+
+ if (emit_data->inst->Src[sampler_src].Register.Indirect) {
+ const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src];
+ LLVMValueRef ind_index;
+
+ ind_index = get_indirect_index(si_shader_ctx, ®->Indirect, reg->Register.Index);
+
+ res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+
+ samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+ samp_ptr = build_indexed_load_const(si_shader_ctx, samp_ptr, ind_index);
+ } else {
+ res_ptr = si_shader_ctx->resources[sampler_index];
+ samp_ptr = si_shader_ctx->samplers[sampler_index];
+ }
if (target == TGSI_TEXTURE_BUFFER) {
LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
@@ -1580,7 +2305,7 @@ static void tex_fetch_args(
LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
/* Bitcast and truncate v8i32 to v16i8. */
- LLVMValueRef res = si_shader_ctx->resources[sampler_index];
+ LLVMValueRef res = res_ptr;
res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
@@ -1649,18 +2374,13 @@ static void tex_fetch_args(
}
}
- if (target == TGSI_TEXTURE_CUBE ||
- target == TGSI_TEXTURE_CUBE_ARRAY ||
- target == TGSI_TEXTURE_SHADOWCUBE ||
- target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
- radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
-
/* Pack user derivatives */
if (opcode == TGSI_OPCODE_TXD) {
- int num_deriv_channels, param;
+ int param, num_src_deriv_channels;
switch (target) {
case TGSI_TEXTURE_3D:
+ num_src_deriv_channels = 3;
num_deriv_channels = 3;
break;
case TGSI_TEXTURE_2D:
@@ -1669,27 +2389,44 @@ static void tex_fetch_args(
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ num_src_deriv_channels = 2;
+ num_deriv_channels = 2;
+ break;
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ /* Cube derivatives will be converted to 2D. */
+ num_src_deriv_channels = 3;
num_deriv_channels = 2;
break;
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ num_src_deriv_channels = 1;
num_deriv_channels = 1;
break;
default:
assert(0); /* no other targets are valid here */
}
- for (param = 1; param <= 2; param++)
- for (chan = 0; chan < num_deriv_channels; chan++)
- address[count++] = lp_build_emit_fetch(bld_base, inst, param, chan);
+ for (param = 0; param < 2; param++)
+ for (chan = 0; chan < num_src_deriv_channels; chan++)
+ derivs[param * num_src_deriv_channels + chan] =
+ lp_build_emit_fetch(bld_base, inst, param+1, chan);
}
+ if (target == TGSI_TEXTURE_CUBE ||
+ target == TGSI_TEXTURE_CUBE_ARRAY ||
+ target == TGSI_TEXTURE_SHADOWCUBE ||
+ target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
+ radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords, derivs);
+
+ if (opcode == TGSI_OPCODE_TXD)
+ for (int i = 0; i < num_deriv_channels * 2; i++)
+ address[count++] = derivs[i];
+
/* Pack texture coordinates */
address[count++] = coords[0];
if (num_coords > 1)
@@ -1806,7 +2543,7 @@ static void tex_fetch_args(
}
/* Resource */
- emit_data->args[1] = si_shader_ctx->resources[sampler_index];
+ emit_data->args[1] = res_ptr;
if (opcode == TGSI_OPCODE_TXF) {
/* add tex offsets */
@@ -1889,7 +2626,7 @@ static void tex_fetch_args(
dmask = 1 << gather_comp;
}
- emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+ emit_data->args[2] = samp_ptr;
emit_data->args[3] = lp_build_const_int32(gallivm, dmask);
emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
@@ -1905,7 +2642,7 @@ static void tex_fetch_args(
LLVMFloatTypeInContext(gallivm->context),
4);
} else {
- emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
+ emit_data->args[2] = samp_ptr;
emit_data->args[3] = lp_build_const_int32(gallivm, target);
emit_data->arg_count = 4;
@@ -1940,7 +2677,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
emit_data->inst->Texture.NumOffsets > 0 : false;
if (target == TGSI_TEXTURE_BUFFER) {
- emit_data->output[emit_data->chan] = build_intrinsic(
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder,
"llvm.SI.vs.load.input", emit_data->dst_type,
emit_data->args, emit_data->arg_count,
@@ -1989,7 +2726,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
is_shadow ? ".c" : "", infix, has_offset ? ".o" : "",
LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
- emit_data->output[emit_data->chan] = build_intrinsic(
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder, intr_name, emit_data->dst_type,
emit_data->args, emit_data->arg_count,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -2036,7 +2773,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
sprintf(intr_name, "%s.v%ui32", name,
LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
- emit_data->output[emit_data->chan] = build_intrinsic(
+ emit_data->output[emit_data->chan] = lp_build_intrinsic(
base->gallivm->builder, intr_name, emit_data->dst_type,
emit_data->args, emit_data->arg_count,
LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
@@ -2050,17 +2787,47 @@ static void txq_fetch_args(
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
const struct tgsi_full_instruction *inst = emit_data->inst;
struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
unsigned target = inst->Texture.Texture;
+ LLVMValueRef res_ptr;
+
+ if (inst->Src[1].Register.Indirect) {
+ const struct tgsi_full_src_register *reg = &inst->Src[1];
+ LLVMValueRef ind_index;
+
+ ind_index = get_indirect_index(si_shader_ctx, ®->Indirect, reg->Register.Index);
+
+ res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+ res_ptr = build_indexed_load_const(si_shader_ctx, res_ptr,
+ ind_index);
+ } else
+ res_ptr = si_shader_ctx->resources[inst->Src[1].Register.Index];
if (target == TGSI_TEXTURE_BUFFER) {
LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
/* Read the size from the buffer descriptor directly. */
- LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index];
- size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
- size = LLVMBuildExtractElement(gallivm->builder, size,
- lp_build_const_int32(gallivm, 6), "");
+ LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+ LLVMValueRef size = LLVMBuildExtractElement(builder, res,
+ lp_build_const_int32(gallivm, 6), "");
+
+ if (si_shader_ctx->screen->b.chip_class >= VI) {
+ /* On VI, the descriptor contains the size in bytes,
+ * but TXQ must return the size in elements.
+ * The stride is always non-zero for resources using TXQ.
+ */
+ LLVMValueRef stride =
+ LLVMBuildExtractElement(builder, res,
+ lp_build_const_int32(gallivm, 5), "");
+ stride = LLVMBuildLShr(builder, stride,
+ lp_build_const_int32(gallivm, 16), "");
+ stride = LLVMBuildAnd(builder, stride,
+ lp_build_const_int32(gallivm, 0x3FFF), "");
+
+ size = LLVMBuildUDiv(builder, size, stride, "");
+ }
+
emit_data->args[0] = size;
return;
}
@@ -2069,7 +2836,7 @@ static void txq_fetch_args(
emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
/* Resource */
- emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index];
+ emit_data->args[1] = res_ptr;
/* Texture target */
if (target == TGSI_TEXTURE_CUBE_ARRAY ||
@@ -2116,6 +2883,35 @@ static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
}
}
+/*
+ * SI implements derivatives using the local data store (LDS)
+ * All writes to the LDS happen in all executing threads at
+ * the same time. TID is the Thread ID for the current
+ * thread and is a value between 0 and 63, representing
+ * the thread's position in the wavefront.
+ *
+ * For the pixel shader threads are grouped into quads of four pixels.
+ * The TIDs of the pixels of a quad are:
+ *
+ * +------+------+
+ * |4n + 0|4n + 1|
+ * +------+------+
+ * |4n + 2|4n + 3|
+ * +------+------+
+ *
+ * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
+ * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
+ * the current pixel's column, and masking with 0xfffffffe yields the TID
+ * of the left pixel of the current pixel's row.
+ *
+ * Adding 1 yields the TID of the pixel to the right of the left pixel, and
+ * adding 2 yields the TID of the pixel below the top pixel.
+ */
+/* masks for thread ID. */
+#define TID_MASK_TOP_LEFT 0xfffffffc
+#define TID_MASK_TOP 0xfffffffd
+#define TID_MASK_LEFT 0xfffffffe
+
static void si_llvm_emit_ddxy(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -2132,25 +2928,34 @@ static void si_llvm_emit_ddxy(
LLVMTypeRef i32;
unsigned swizzle[4];
unsigned c;
+ int idx;
+ unsigned mask;
i32 = LLVMInt32TypeInContext(gallivm->context);
indices[0] = bld_base->uint_bld.zero;
- indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
+ indices[1] = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
NULL, 0, LLVMReadNoneAttribute);
- store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
+ if (opcode == TGSI_OPCODE_DDX_FINE)
+ mask = TID_MASK_LEFT;
+ else if (opcode == TGSI_OPCODE_DDY_FINE)
+ mask = TID_MASK_TOP;
+ else
+ mask = TID_MASK_TOP_LEFT;
+
indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm, 0xfffffffc), "");
- load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ lp_build_const_int32(gallivm, mask), "");
+ load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
+ /* for DDX we want to next X pixel, DDY next Y pixel. */
+ idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2;
indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
- lp_build_const_int32(gallivm,
- opcode == TGSI_OPCODE_DDX ? 1 : 2),
- "");
- load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
+ lp_build_const_int32(gallivm, idx), "");
+ load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
indices, 2, "");
for (c = 0; c < 4; ++c) {
@@ -2184,6 +2989,247 @@ static void si_llvm_emit_ddxy(
emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
}
+/*
+ * this takes an I,J coordinate pair,
+ * and works out the X and Y derivatives.
+ * it returns DDX(I), DDX(J), DDY(I), DDY(J).
+ */
+static LLVMValueRef si_llvm_emit_ddxy_interp(
+ struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef interp_ij)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_context *base = &bld_base->base;
+ LLVMValueRef indices[2];
+ LLVMValueRef store_ptr, load_ptr_x, load_ptr_y, load_ptr_ddx, load_ptr_ddy, temp, temp2;
+ LLVMValueRef tl, tr, bl, result[4];
+ LLVMTypeRef i32;
+ unsigned c;
+
+ i32 = LLVMInt32TypeInContext(gallivm->context);
+
+ indices[0] = bld_base->uint_bld.zero;
+ indices[1] = lp_build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
+ NULL, 0, LLVMReadNoneAttribute);
+ store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ temp = LLVMBuildAnd(gallivm->builder, indices[1],
+ lp_build_const_int32(gallivm, TID_MASK_LEFT), "");
+
+ temp2 = LLVMBuildAnd(gallivm->builder, indices[1],
+ lp_build_const_int32(gallivm, TID_MASK_TOP), "");
+
+ indices[1] = temp;
+ load_ptr_x = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ indices[1] = temp2;
+ load_ptr_y = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ indices[1] = LLVMBuildAdd(gallivm->builder, temp,
+ lp_build_const_int32(gallivm, 1), "");
+ load_ptr_ddx = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ indices[1] = LLVMBuildAdd(gallivm->builder, temp2,
+ lp_build_const_int32(gallivm, 2), "");
+ load_ptr_ddy = LLVMBuildGEP(gallivm->builder, si_shader_ctx->lds,
+ indices, 2, "");
+
+ for (c = 0; c < 2; ++c) {
+ LLVMValueRef store_val;
+ LLVMValueRef c_ll = lp_build_const_int32(gallivm, c);
+
+ store_val = LLVMBuildExtractElement(gallivm->builder,
+ interp_ij, c_ll, "");
+ LLVMBuildStore(gallivm->builder,
+ store_val,
+ store_ptr);
+
+ tl = LLVMBuildLoad(gallivm->builder, load_ptr_x, "");
+ tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
+
+ tr = LLVMBuildLoad(gallivm->builder, load_ptr_ddx, "");
+ tr = LLVMBuildBitCast(gallivm->builder, tr, base->elem_type, "");
+
+ result[c] = LLVMBuildFSub(gallivm->builder, tr, tl, "");
+
+ tl = LLVMBuildLoad(gallivm->builder, load_ptr_y, "");
+ tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
+
+ bl = LLVMBuildLoad(gallivm->builder, load_ptr_ddy, "");
+ bl = LLVMBuildBitCast(gallivm->builder, bl, base->elem_type, "");
+
+ result[c + 2] = LLVMBuildFSub(gallivm->builder, bl, tl, "");
+ }
+
+ return lp_build_gather_values(gallivm, result, 4);
+}
+
+static void interp_fetch_args(
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
+ /* offset is in second src, first two channels */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 1,
+ 0);
+ emit_data->args[1] = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 1,
+ 1);
+ emit_data->arg_count = 2;
+ } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ LLVMValueRef sample_position;
+ LLVMValueRef sample_id;
+ LLVMValueRef halfval = lp_build_const_float(gallivm, 0.5f);
+
+ /* fetch sample ID, then fetch its sample position,
+ * and place into first two channels.
+ */
+ sample_id = lp_build_emit_fetch(bld_base,
+ emit_data->inst, 1, 0);
+ sample_id = LLVMBuildBitCast(gallivm->builder, sample_id,
+ LLVMInt32TypeInContext(gallivm->context),
+ "");
+ sample_position = load_sample_position(&si_shader_ctx->radeon_bld, sample_id);
+
+ emit_data->args[0] = LLVMBuildExtractElement(gallivm->builder,
+ sample_position,
+ lp_build_const_int32(gallivm, 0), "");
+
+ emit_data->args[0] = LLVMBuildFSub(gallivm->builder, emit_data->args[0], halfval, "");
+ emit_data->args[1] = LLVMBuildExtractElement(gallivm->builder,
+ sample_position,
+ lp_build_const_int32(gallivm, 1), "");
+ emit_data->args[1] = LLVMBuildFSub(gallivm->builder, emit_data->args[1], halfval, "");
+ emit_data->arg_count = 2;
+ }
+}
+
+static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct si_shader *shader = si_shader_ctx->shader;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef interp_param;
+ const struct tgsi_full_instruction *inst = emit_data->inst;
+ const char *intr_name;
+ int input_index;
+ int chan;
+ int i;
+ LLVMValueRef attr_number;
+ LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
+ LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
+ int interp_param_idx;
+ unsigned location;
+
+ assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
+ input_index = inst->Src[0].Register.Index;
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE)
+ location = TGSI_INTERPOLATE_LOC_CENTER;
+ else
+ location = TGSI_INTERPOLATE_LOC_CENTROID;
+
+ interp_param_idx = lookup_interp_param_index(shader->ps_input_interpolate[input_index],
+ location);
+ if (interp_param_idx == -1)
+ return;
+ else if (interp_param_idx)
+ interp_param = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, interp_param_idx);
+ else
+ interp_param = NULL;
+
+ attr_number = lp_build_const_int32(gallivm,
+ shader->ps_input_param_offset[input_index]);
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+ inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+ LLVMValueRef ij_out[2];
+ LLVMValueRef ddxy_out = si_llvm_emit_ddxy_interp(bld_base, interp_param);
+
+ /*
+ * take the I then J parameters, and the DDX/Y for it, and
+ * calculate the IJ inputs for the interpolator.
+ * temp1 = ddx * offset/sample.x + I;
+ * interp_param.I = ddy * offset/sample.y + temp1;
+ * temp1 = ddx * offset/sample.x + J;
+ * interp_param.J = ddy * offset/sample.y + temp1;
+ */
+ for (i = 0; i < 2; i++) {
+ LLVMValueRef ix_ll = lp_build_const_int32(gallivm, i);
+ LLVMValueRef iy_ll = lp_build_const_int32(gallivm, i + 2);
+ LLVMValueRef ddx_el = LLVMBuildExtractElement(gallivm->builder,
+ ddxy_out, ix_ll, "");
+ LLVMValueRef ddy_el = LLVMBuildExtractElement(gallivm->builder,
+ ddxy_out, iy_ll, "");
+ LLVMValueRef interp_el = LLVMBuildExtractElement(gallivm->builder,
+ interp_param, ix_ll, "");
+ LLVMValueRef temp1, temp2;
+
+ interp_el = LLVMBuildBitCast(gallivm->builder, interp_el,
+ LLVMFloatTypeInContext(gallivm->context), "");
+
+ temp1 = LLVMBuildFMul(gallivm->builder, ddx_el, emit_data->args[0], "");
+
+ temp1 = LLVMBuildFAdd(gallivm->builder, temp1, interp_el, "");
+
+ temp2 = LLVMBuildFMul(gallivm->builder, ddy_el, emit_data->args[1], "");
+
+ temp2 = LLVMBuildFAdd(gallivm->builder, temp2, temp1, "");
+
+ ij_out[i] = LLVMBuildBitCast(gallivm->builder,
+ temp2,
+ LLVMIntTypeInContext(gallivm->context, 32), "");
+ }
+ interp_param = lp_build_gather_values(bld_base->base.gallivm, ij_out, 2);
+ }
+
+ intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
+ for (chan = 0; chan < 2; chan++) {
+ LLVMValueRef args[4];
+ LLVMValueRef llvm_chan;
+ unsigned schan;
+
+ schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);
+ llvm_chan = lp_build_const_int32(gallivm, schan);
+
+ args[0] = llvm_chan;
+ args[1] = attr_number;
+ args[2] = params;
+ args[3] = interp_param;
+
+ emit_data->output[chan] =
+ lp_build_intrinsic(gallivm->builder, intr_name,
+ input_type, args, args[3] ? 4 : 3,
+ LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
+ }
+}
+
+static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
+ struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
+ unsigned stream;
+
+ assert(src0.File == TGSI_FILE_IMMEDIATE);
+
+ stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 0x3;
+ return stream;
+}
+
/* Emit one vertex from the geometry shader */
static void si_llvm_emit_vertex(
const struct lp_build_tgsi_action *action,
@@ -2203,9 +3249,14 @@ static void si_llvm_emit_vertex(
LLVMValueRef args[2];
unsigned chan;
int i;
+ unsigned stream;
+
+ stream = si_llvm_get_stream(bld_base, emit_data);
/* Write vertex attribute values to GSVS ring */
- gs_next_vertex = LLVMBuildLoad(gallivm->builder, si_shader_ctx->gs_next_vertex, "");
+ gs_next_vertex = LLVMBuildLoad(gallivm->builder,
+ si_shader_ctx->gs_next_vertex[stream],
+ "");
/* If this thread has already emitted the declared maximum number of
* vertices, kill it: excessive vertex emissions are not supposed to
@@ -2218,8 +3269,9 @@ static void si_llvm_emit_vertex(
kill = lp_build_select(&bld_base->base, can_emit,
lp_build_const_float(gallivm, 1.0f),
lp_build_const_float(gallivm, -1.0f));
- build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
- LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
+
+ lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+ LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
for (i = 0; i < info->num_outputs; i++) {
LLVMValueRef *out_ptr =
@@ -2237,7 +3289,7 @@ static void si_llvm_emit_vertex(
out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
build_tbuffer_store(si_shader_ctx,
- si_shader_ctx->gsvs_ring,
+ si_shader_ctx->gsvs_ring[stream],
out_val, 1,
voffset, soffset, 0,
V_008F0C_BUF_DATA_FORMAT_32,
@@ -2247,12 +3299,13 @@ static void si_llvm_emit_vertex(
}
gs_next_vertex = lp_build_add(uint, gs_next_vertex,
lp_build_const_int32(gallivm, 1));
- LLVMBuildStore(gallivm->builder, gs_next_vertex, si_shader_ctx->gs_next_vertex);
+
+ LLVMBuildStore(gallivm->builder, gs_next_vertex, si_shader_ctx->gs_next_vertex[stream]);
/* Signal vertex emission */
- args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS);
+ args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS | (stream << 8));
args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
- build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
+ lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
LLVMVoidTypeInContext(gallivm->context), args, 2,
LLVMNoUnwindAttribute);
}
@@ -2266,15 +3319,28 @@ static void si_llvm_emit_primitive(
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMValueRef args[2];
+ unsigned stream;
/* Signal primitive cut */
- args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | SENDMSG_GS);
+ stream = si_llvm_get_stream(bld_base, emit_data);
+ args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | SENDMSG_GS | (stream << 8));
args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
- build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
+ lp_build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
LLVMVoidTypeInContext(gallivm->context), args, 2,
LLVMNoUnwindAttribute);
}
+static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+
+ lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.barrier.local",
+ LLVMVoidTypeInContext(gallivm->context), NULL, 0,
+ LLVMNoUnwindAttribute);
+}
+
static const struct lp_build_tgsi_action tex_action = {
.fetch_args = tex_fetch_args,
.emit = build_tex_intrinsic,
@@ -2286,6 +3352,11 @@ static const struct lp_build_tgsi_action txq_action = {
.intr_name = "llvm.SI.resinfo"
};
+static const struct lp_build_tgsi_action interp_action = {
+ .fetch_args = interp_fetch_args,
+ .emit = build_interp_intrinsic,
+};
+
static void create_meta_data(struct si_shader_context *si_shader_ctx)
{
struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
@@ -2304,6 +3375,27 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
CONST_ADDR_SPACE);
}
+static void declare_streamout_params(struct si_shader_context *si_shader_ctx,
+ struct pipe_stream_output_info *so,
+ LLVMTypeRef *params, LLVMTypeRef i32,
+ unsigned *num_params)
+{
+ int i;
+
+ /* Streamout SGPRs. */
+ if (so->num_outputs) {
+ params[si_shader_ctx->param_streamout_config = (*num_params)++] = i32;
+ params[si_shader_ctx->param_streamout_write_index = (*num_params)++] = i32;
+ }
+ /* A streamout buffer offset is loaded if the stride is non-zero. */
+ for (i = 0; i < 4; i++) {
+ if (!so->stride[i])
+ continue;
+
+ params[si_shader_ctx->param_streamout_offset[i] = (*num_params)++] = i32;
+ }
+}
+
static void create_function(struct si_shader_context *si_shader_ctx)
{
struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
@@ -2336,8 +3428,10 @@ static void create_function(struct si_shader_context *si_shader_ctx)
num_params = SI_PARAM_START_INSTANCE+1;
if (shader->key.vs.as_es) {
- params[SI_PARAM_ES2GS_OFFSET] = i32;
- num_params++;
+ params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+ } else if (shader->key.vs.as_ls) {
+ params[SI_PARAM_LS_OUT_LAYOUT] = i32;
+ num_params = SI_PARAM_LS_OUT_LAYOUT+1;
} else {
if (shader->is_gs_copy_shader) {
last_array_pointer = SI_PARAM_CONST;
@@ -2345,30 +3439,52 @@ static void create_function(struct si_shader_context *si_shader_ctx)
}
/* The locations of the other parameters are assigned dynamically. */
-
- /* Streamout SGPRs. */
- if (shader->selector->so.num_outputs) {
- params[si_shader_ctx->param_streamout_config = num_params++] = i32;
- params[si_shader_ctx->param_streamout_write_index = num_params++] = i32;
- }
- /* A streamout buffer offset is loaded if the stride is non-zero. */
- for (i = 0; i < 4; i++) {
- if (!shader->selector->so.stride[i])
- continue;
-
- params[si_shader_ctx->param_streamout_offset[i] = num_params++] = i32;
- }
+ declare_streamout_params(si_shader_ctx, &shader->selector->so,
+ params, i32, &num_params);
}
last_sgpr = num_params-1;
/* VGPRs */
params[si_shader_ctx->param_vertex_id = num_params++] = i32;
- params[num_params++] = i32; /* unused*/
- params[num_params++] = i32; /* unused */
+ params[si_shader_ctx->param_rel_auto_id = num_params++] = i32;
+ params[si_shader_ctx->param_vs_prim_id = num_params++] = i32;
params[si_shader_ctx->param_instance_id = num_params++] = i32;
break;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+ params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+ params[SI_PARAM_TCS_IN_LAYOUT] = i32;
+ params[SI_PARAM_TESS_FACTOR_OFFSET] = i32;
+ last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET;
+
+ /* VGPRs */
+ params[SI_PARAM_PATCH_ID] = i32;
+ params[SI_PARAM_REL_IDS] = i32;
+ num_params = SI_PARAM_REL_IDS+1;
+ break;
+
+ case TGSI_PROCESSOR_TESS_EVAL:
+ params[SI_PARAM_TCS_OUT_OFFSETS] = i32;
+ params[SI_PARAM_TCS_OUT_LAYOUT] = i32;
+ num_params = SI_PARAM_TCS_OUT_LAYOUT+1;
+
+ if (shader->key.tes.as_es) {
+ params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
+ } else {
+ declare_streamout_params(si_shader_ctx, &shader->selector->so,
+ params, i32, &num_params);
+ }
+ last_sgpr = num_params - 1;
+
+ /* VGPRs */
+ params[si_shader_ctx->param_tes_u = num_params++] = f32;
+ params[si_shader_ctx->param_tes_v = num_params++] = f32;
+ params[si_shader_ctx->param_tes_rel_patch_id = num_params++] = i32;
+ params[si_shader_ctx->param_tes_patch_id = num_params++] = i32;
+ break;
+
case TGSI_PROCESSOR_GEOMETRY:
params[SI_PARAM_GS2VS_OFFSET] = i32;
params[SI_PARAM_GS_WAVE_ID] = i32;
@@ -2435,12 +3551,35 @@ static void create_function(struct si_shader_context *si_shader_ctx)
if (bld_base->info &&
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
- bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
- si_shader_ctx->ddxy_lds =
+ bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
+ bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
+ si_shader_ctx->lds =
LLVMAddGlobalInAddressSpace(gallivm->module,
LLVMArrayType(i32, 64),
"ddxy_lds",
LOCAL_ADDR_SPACE);
+
+ if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) ||
+ si_shader_ctx->type == TGSI_PROCESSOR_TESS_CTRL ||
+ si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL) {
+ /* This is the upper bound, maximum is 32 inputs times 32 vertices */
+ unsigned vertex_data_dw_size = 32*32*4;
+ unsigned patch_data_dw_size = 32*4;
+ /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */
+ unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size;
+ unsigned lds_dwords = patch_dw_size;
+
+ /* The actual size is computed outside of the shader to reduce
+ * the number of shader variants. */
+ si_shader_ctx->lds =
+ LLVMAddGlobalInAddressSpace(gallivm->module,
+ LLVMArrayType(i32, lds_dwords),
+ "tess_lds",
+ LOCAL_ADDR_SPACE);
+ }
}
static void preload_constants(struct si_shader_context *si_shader_ctx)
@@ -2517,9 +3656,13 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
struct gallivm_state * gallivm = bld_base->base.gallivm;
unsigned i;
- if (si_shader_ctx->type != TGSI_PROCESSOR_VERTEX ||
- si_shader_ctx->shader->key.vs.as_es ||
- !si_shader_ctx->shader->selector->so.num_outputs)
+ /* Streamout can only be used if the shader is compiled as VS. */
+ if (!si_shader_ctx->shader->selector->so.num_outputs ||
+ (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
+ (si_shader_ctx->shader->key.vs.as_es ||
+ si_shader_ctx->shader->key.vs.as_ls)) ||
+ (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+ si_shader_ctx->shader->key.tes.as_es))
return;
LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
@@ -2550,6 +3693,8 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
si_shader_ctx->shader->key.vs.as_es) ||
+ (si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL &&
+ si_shader_ctx->shader->key.tes.as_es) ||
si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_ESGS);
@@ -2557,13 +3702,21 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
}
- if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY ||
- si_shader_ctx->shader->is_gs_copy_shader) {
+ if (si_shader_ctx->shader->is_gs_copy_shader) {
LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
- si_shader_ctx->gsvs_ring =
+ si_shader_ctx->gsvs_ring[0] =
build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
}
+ if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
+ int i;
+ for (i = 0; i < 4; i++) {
+ LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS + i);
+
+ si_shader_ctx->gsvs_ring[i] =
+ build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
+ }
+ }
}
void si_shader_binary_read_config(const struct si_screen *sscreen,
@@ -2637,26 +3790,54 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
}
}
-int si_shader_binary_read(struct si_screen *sscreen,
- struct si_shader *shader,
- const struct radeon_shader_binary *binary)
+int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
{
-
- unsigned i;
- unsigned code_size;
+ const struct radeon_shader_binary *binary = &shader->binary;
+ unsigned code_size = binary->code_size + binary->rodata_size;
unsigned char *ptr;
+
+ r600_resource_reference(&shader->bo, NULL);
+ shader->bo = si_resource_create_custom(&sscreen->b.b,
+ PIPE_USAGE_IMMUTABLE,
+ code_size);
+ if (!shader->bo)
+ return -ENOMEM;
+
+ ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
+ PIPE_TRANSFER_READ_WRITE);
+ util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
+ if (binary->rodata_size > 0) {
+ ptr += binary->code_size;
+ util_memcpy_cpu_to_le32(ptr, binary->rodata,
+ binary->rodata_size);
+ }
+
+ sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
+ return 0;
+}
+
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
+{
+ const struct radeon_shader_binary *binary = &shader->binary;
+ unsigned i;
bool dump = r600_can_dump_shader(&sscreen->b,
shader->selector ? shader->selector->tokens : NULL);
si_shader_binary_read_config(sscreen, shader, 0);
+ si_shader_binary_upload(sscreen, shader);
if (dump) {
- if (!binary->disassembled) {
- fprintf(stderr, "SI CODE:\n");
- for (i = 0; i < binary->code_size; i+=4 ) {
- fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
- binary->code[i + 2], binary->code[i + 1],
- binary->code[i]);
+ if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
+ if (binary->disasm_string) {
+ fprintf(stderr, "\nShader Disassembly:\n\n");
+ fprintf(stderr, "%s\n", binary->disasm_string);
+ } else {
+ fprintf(stderr, "SI CODE:\n");
+ for (i = 0; i < binary->code_size; i+=4 ) {
+ fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
+ binary->code[i + 2], binary->code[i + 1],
+ binary->code[i]);
+ }
}
}
@@ -2666,26 +3847,6 @@ int si_shader_binary_read(struct si_screen *sscreen,
shader->num_sgprs, shader->num_vgprs, binary->code_size,
shader->lds_size, shader->scratch_bytes_per_wave);
}
-
- /* copy new shader */
- code_size = binary->code_size + binary->rodata_size;
- r600_resource_reference(&shader->bo, NULL);
- shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE,
- code_size);
- if (shader->bo == NULL) {
- return -ENOMEM;
- }
-
-
- ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_READ_WRITE);
- util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
- if (binary->rodata_size > 0) {
- ptr += binary->code_size;
- util_memcpy_cpu_to_le32(ptr, binary->rodata, binary->rodata_size);
- }
-
- sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
-
return 0;
}
@@ -2693,15 +3854,16 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
LLVMTargetMachineRef tm, LLVMModuleRef mod)
{
int r = 0;
- bool dump = r600_can_dump_shader(&sscreen->b,
- shader->selector ? shader->selector->tokens : NULL);
- r = radeon_llvm_compile(mod, &shader->binary,
- r600_get_llvm_processor_name(sscreen->b.family), dump, tm);
+ bool dump_asm = r600_can_dump_shader(&sscreen->b,
+ shader->selector ? shader->selector->tokens : NULL);
+ bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);
- if (r) {
+ r = radeon_llvm_compile(mod, &shader->binary,
+ r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm);
+ if (r)
return r;
- }
- r = si_shader_binary_read(sscreen, shader, &shader->binary);
+
+ r = si_shader_binary_read(sscreen, shader);
FREE(shader->binary.config);
FREE(shader->binary.rodata);
@@ -2709,7 +3871,8 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
if (shader->scratch_bytes_per_wave == 0) {
FREE(shader->binary.code);
FREE(shader->binary.relocs);
- memset(&shader->binary, 0, sizeof(shader->binary));
+ memset(&shader->binary, 0,
+ offsetof(struct radeon_shader_binary, disasm_string));
}
return r;
}
@@ -2741,7 +3904,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
preload_streamout_buffers(si_shader_ctx);
preload_ring_buffers(si_shader_ctx);
- args[0] = si_shader_ctx->gsvs_ring;
+ args[0] = si_shader_ctx->gsvs_ring[0];
args[1] = lp_build_mul_imm(uint,
LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
si_shader_ctx->param_vertex_id),
@@ -2767,7 +3930,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
outputs[i].values[chan] =
LLVMBuildBitCast(gallivm->builder,
- build_intrinsic(gallivm->builder,
+ lp_build_intrinsic(gallivm->builder,
"llvm.SI.buffer.load.dword.i32.i32",
LLVMInt32TypeInContext(gallivm->context),
args, 9,
@@ -2807,9 +3970,21 @@ static void si_dump_key(unsigned shader, union si_shader_key *key)
fprintf(stderr, "}\n");
if (key->vs.as_es)
- fprintf(stderr, " gs_used_inputs = 0x%"PRIx64"\n",
- key->vs.gs_used_inputs);
+ fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n",
+ key->vs.es_enabled_outputs);
fprintf(stderr, " as_es = %u\n", key->vs.as_es);
+ fprintf(stderr, " as_es = %u\n", key->vs.as_ls);
+ break;
+
+ case PIPE_SHADER_TESS_CTRL:
+ fprintf(stderr, " prim_mode = %u\n", key->tcs.prim_mode);
+ break;
+
+ case PIPE_SHADER_TESS_EVAL:
+ if (key->tes.as_es)
+ fprintf(stderr, " es_enabled_outputs = 0x%"PRIx64"\n",
+ key->tes.es_enabled_outputs);
+ fprintf(stderr, " as_es = %u\n", key->tes.as_es);
break;
case PIPE_SHADER_GEOMETRY:
@@ -2851,7 +4026,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
- if (dump) {
+ if (dump && !(sscreen->b.debug_flags & DBG_NO_TGSI)) {
si_dump_key(sel->type, &shader->key);
tgsi_dump(tokens, 0);
si_dump_streamout(&sel->so);
@@ -2873,6 +4048,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
+ bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
+ bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
+ bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
+
bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
@@ -2888,9 +4067,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
+ bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
+ bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
+ bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
if (HAVE_LLVM >= 0x0306) {
bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
@@ -2908,11 +4090,25 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
switch (si_shader_ctx.type) {
case TGSI_PROCESSOR_VERTEX:
si_shader_ctx.radeon_bld.load_input = declare_input_vs;
- if (shader->key.vs.as_es) {
+ if (shader->key.vs.as_ls)
+ bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
+ else if (shader->key.vs.as_es)
bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
- } else {
+ else
+ bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
+ break;
+ case TGSI_PROCESSOR_TESS_CTRL:
+ bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
+ bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
+ bld_base->emit_store = store_output_tcs;
+ bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
+ break;
+ case TGSI_PROCESSOR_TESS_EVAL:
+ bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
+ if (shader->key.tes.as_es)
+ bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
+ else
bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
- }
break;
case TGSI_PROCESSOR_GEOMETRY:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
@@ -2946,9 +4142,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
preload_ring_buffers(&si_shader_ctx);
if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
- si_shader_ctx.gs_next_vertex =
- lp_build_alloca(bld_base->base.gallivm,
- bld_base->uint_bld.elem_type, "");
+ int i;
+ for (i = 0; i < 4; i++) {
+ si_shader_ctx.gs_next_vertex[i] =
+ lp_build_alloca(bld_base->base.gallivm,
+ bld_base->uint_bld.elem_type, "");
+ }
}
if (!lp_build_tgsi_llvm(bld_base, tokens)) {
@@ -3000,4 +4199,5 @@ void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
FREE(shader->binary.code);
FREE(shader->binary.relocs);
+ FREE(shader->binary.disasm_string);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 51055afe36a..cd845c12e64 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -26,6 +26,46 @@
* Christian König
*/
+/* How linking tessellation shader inputs and outputs works.
+ *
+ * Inputs and outputs between shaders are stored in a buffer. This buffer
+ * lives in LDS (typical case for tessellation), but it can also live
+ * in memory. Each input or output has a fixed location within a vertex.
+ * The highest used input or output determines the stride between vertices.
+ *
+ * Since tessellation is only enabled in the OpenGL core profile,
+ * only these semantics are valid for per-vertex data:
+ *
+ * Name Location
+ *
+ * POSITION 0
+ * PSIZE 1
+ * CLIPDIST0..1 2..3
+ * CULLDIST0..1 (not implemented)
+ * GENERIC0..31 4..35
+ *
+ * For example, a shader only writing GENERIC0 has the output stride of 5.
+ *
+ * Only these semantics are valid for per-patch data:
+ *
+ * Name Location
+ *
+ * TESSOUTER 0
+ * TESSINNER 1
+ * PATCH0..29 2..31
+ *
+ * That's how independent shaders agree on input and output locations.
+ * The si_shader_io_get_unique_index function assigns the locations.
+ *
+ * Other required information for calculating the input and output addresses
+ * like the vertex stride, the patch stride, and the offsets where per-vertex
+ * and per-patch data start, is passed to the shader via user data SGPRs.
+ * The offsets and strides are calculated at draw time and aren't available
+ * at compile time.
+ *
+ * The same approach should be used for linking ES->GS in the future.
+ */
+
#ifndef SI_SHADER_H
#define SI_SHADER_H
@@ -43,9 +83,16 @@ struct radeon_shader_reloc;
#define SI_SGPR_VERTEX_BUFFER 8 /* VS only */
#define SI_SGPR_BASE_VERTEX 10 /* VS only */
#define SI_SGPR_START_INSTANCE 11 /* VS only */
+#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */
+#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */
+#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
+#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
#define SI_SGPR_ALPHA_REF 8 /* PS only */
#define SI_VS_NUM_USER_SGPR 12
+#define SI_LS_NUM_USER_SGPR 13
+#define SI_TCS_NUM_USER_SGPR 11
+#define SI_TES_NUM_USER_SGPR 10
#define SI_GS_NUM_USER_SGPR 8
#define SI_GSCOPY_NUM_USER_SGPR 4
#define SI_PS_NUM_USER_SGPR 9
@@ -62,8 +109,30 @@ struct radeon_shader_reloc;
#define SI_PARAM_START_INSTANCE 6
/* the other VS parameters are assigned dynamically */
-/* ES only parameters */
-#define SI_PARAM_ES2GS_OFFSET 7
+/* Offsets where TCS outputs and TCS patch outputs live in LDS:
+ * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
+ * [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
+ */
+#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
+
+/* Layout of TCS outputs / TES inputs:
+ * [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
+ * [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
+ * [26:31] = gl_PatchVerticesIn, max = 32
+ */
+#define SI_PARAM_TCS_OUT_LAYOUT 5 /* for TCS & TES */
+
+/* Layout of LS outputs / TCS inputs
+ * [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
+ * [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
+ */
+#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */
+#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */
+
+/* TCS only parameters. */
+#define SI_PARAM_TESS_FACTOR_OFFSET 7
+#define SI_PARAM_PATCH_ID 8
+#define SI_PARAM_REL_IDS 9
/* GS only parameters */
#define SI_PARAM_GS2VS_OFFSET 4
@@ -115,9 +184,25 @@ struct si_shader_selector {
unsigned gs_output_prim;
unsigned gs_max_out_vertices;
- uint64_t gs_used_inputs; /* mask of "get_unique_index" bits */
+ unsigned gs_num_invocations;
+
+ /* masks of "get_unique_index" bits */
+ uint64_t inputs_read;
+ uint64_t outputs_written;
+ uint32_t patch_outputs_written;
};
+/* Valid shader configurations:
+ *
+ * API shaders VS | TCS | TES | GS |pass| PS
+ * are compiled as: | | | |thru|
+ * | | | | |
+ * Only VS & PS: VS | -- | -- | -- | -- | PS
+ * With GS: ES | -- | -- | GS | VS | PS
+ * With Tessel.: LS | HS | VS | -- | -- | PS
+ * With both: LS | HS | ES | GS | VS | PS
+ */
+
union si_shader_key {
struct {
unsigned export_16bpc:8;
@@ -130,11 +215,25 @@ union si_shader_key {
} ps;
struct {
unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS];
- /* The mask of "get_unique_index" bits, needed for ES,
- * it describes how the ES->GS ring buffer is laid out. */
- uint64_t gs_used_inputs;
- unsigned as_es:1;
+ /* Mask of "get_unique_index" bits - which outputs are read
+ * by the next stage (needed by ES).
+ * This describes how outputs are laid out in memory. */
+ uint64_t es_enabled_outputs;
+ unsigned as_es:1; /* export shader */
+ unsigned as_ls:1; /* local shader */
+ unsigned export_prim_id; /* when PS needs it and GS is disabled */
} vs;
+ struct {
+ unsigned prim_mode:3;
+ } tcs; /* tessellation control shader */
+ struct {
+ /* Mask of "get_unique_index" bits - which outputs are read
+ * by the next stage (needed by ES).
+ * This describes how outputs are laid out in memory. */
+ uint64_t es_enabled_outputs;
+ unsigned as_es:1; /* export shader */
+ unsigned export_prim_id; /* when PS needs it and GS is disabled */
+ } tes; /* tessellation evaluation shader */
};
struct si_shader {
@@ -161,27 +260,47 @@ struct si_shader {
unsigned nparam;
unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS];
unsigned ps_input_param_offset[PIPE_MAX_SHADER_INPUTS];
-
+ unsigned ps_input_interpolate[PIPE_MAX_SHADER_INPUTS];
bool uses_instanceid;
unsigned nr_pos_exports;
+ unsigned nr_param_exports;
bool is_gs_copy_shader;
bool dx10_clamp_mode; /* convert NaNs to 0 */
+
+ unsigned ls_rsrc1;
+ unsigned ls_rsrc2;
};
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
{
- return sctx->gs_shader ? &sctx->gs_shader->info
- : &sctx->vs_shader->info;
+ if (sctx->gs_shader)
+ return &sctx->gs_shader->info;
+ else if (sctx->tes_shader)
+ return &sctx->tes_shader->info;
+ else
+ return &sctx->vs_shader->info;
}
static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
{
if (sctx->gs_shader)
return sctx->gs_shader->current->gs_copy_shader;
+ else if (sctx->tes_shader)
+ return sctx->tes_shader->current;
else
return sctx->vs_shader->current;
}
+static inline bool si_vs_exports_prim_id(struct si_shader *shader)
+{
+ if (shader->selector->type == PIPE_SHADER_VERTEX)
+ return shader->key.vs.export_prim_id;
+ else if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+ return shader->key.tes.export_prim_id;
+ else
+ return false;
+}
+
/* radeonsi_shader.c */
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader);
@@ -189,8 +308,8 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
LLVMTargetMachineRef tm, LLVMModuleRef mod);
void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
-int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
- const struct radeon_shader_binary *binary);
+int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
uint64_t scratch_va);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6c18836d189..c923ea7e154 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -61,7 +61,7 @@ unsigned si_array_mode(unsigned mode)
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
{
- if (sscreen->b.chip_class == CIK &&
+ if (sscreen->b.chip_class >= CIK &&
sscreen->b.info.cik_macrotile_mode_array_valid) {
unsigned index, tileb;
@@ -489,11 +489,14 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
S_02881C_USE_VTX_POINT_SIZE(info->writes_psize) |
S_02881C_USE_VTX_EDGE_FLAG(info->writes_edgeflag) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(info->writes_layer) |
+ S_02881C_USE_VTX_VIEWPORT_INDX(info->writes_viewport_index) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((clipdist_mask & 0x0F) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((clipdist_mask & 0xF0) != 0) |
S_02881C_VS_OUT_MISC_VEC_ENA(info->writes_psize ||
info->writes_edgeflag ||
- info->writes_layer) |
+ info->writes_layer ||
+ info->writes_viewport_index) |
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) |
(sctx->queued.named.rasterizer->clip_plane_enable &
clipdist_mask));
r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -509,20 +512,26 @@ static void si_set_scissor_states(struct pipe_context *ctx,
const struct pipe_scissor_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_scissor *scissor = CALLOC_STRUCT(si_state_scissor);
- struct si_pm4_state *pm4 = &scissor->pm4;
+ struct si_state_scissor *scissor;
+ struct si_pm4_state *pm4;
+ int i;
- if (scissor == NULL)
- return;
+ for (i = start_slot; i < start_slot + num_scissors; i++) {
+ int idx = i - start_slot;
+ int offset = i * 4 * 2;
- scissor->scissor = *state;
- si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL,
- S_028250_TL_X(state->minx) | S_028250_TL_Y(state->miny) |
- S_028250_WINDOW_OFFSET_DISABLE(1));
- si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR,
- S_028254_BR_X(state->maxx) | S_028254_BR_Y(state->maxy));
-
- si_pm4_set_state(sctx, scissor, scissor);
+ scissor = CALLOC_STRUCT(si_state_scissor);
+ if (scissor == NULL)
+ return;
+ pm4 = &scissor->pm4;
+ scissor->scissor = state[idx];
+ si_pm4_set_reg(pm4, R_028250_PA_SC_VPORT_SCISSOR_0_TL + offset,
+ S_028250_TL_X(state[idx].minx) | S_028250_TL_Y(state[idx].miny) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ si_pm4_set_reg(pm4, R_028254_PA_SC_VPORT_SCISSOR_0_BR + offset,
+ S_028254_BR_X(state[idx].maxx) | S_028254_BR_Y(state[idx].maxy));
+ si_pm4_set_state(sctx, scissor[i], scissor);
+ }
}
static void si_set_viewport_states(struct pipe_context *ctx,
@@ -531,21 +540,29 @@ static void si_set_viewport_states(struct pipe_context *ctx,
const struct pipe_viewport_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_state_viewport *viewport = CALLOC_STRUCT(si_state_viewport);
- struct si_pm4_state *pm4 = &viewport->pm4;
+ struct si_state_viewport *viewport;
+ struct si_pm4_state *pm4;
+ int i;
- if (viewport == NULL)
- return;
+ for (i = start_slot; i < start_slot + num_viewports; i++) {
+ int idx = i - start_slot;
+ int offset = i * 4 * 6;
- viewport->viewport = *state;
- si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE_0, fui(state->scale[0]));
- si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET_0, fui(state->translate[0]));
- si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE_0, fui(state->scale[1]));
- si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET_0, fui(state->translate[1]));
- si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE_0, fui(state->scale[2]));
- si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET_0, fui(state->translate[2]));
+ viewport = CALLOC_STRUCT(si_state_viewport);
+ if (!viewport)
+ return;
+ pm4 = &viewport->pm4;
- si_pm4_set_state(sctx, viewport, viewport);
+ viewport->viewport = state[idx];
+ si_pm4_set_reg(pm4, R_02843C_PA_CL_VPORT_XSCALE + offset, fui(state[idx].scale[0]));
+ si_pm4_set_reg(pm4, R_028440_PA_CL_VPORT_XOFFSET + offset, fui(state[idx].translate[0]));
+ si_pm4_set_reg(pm4, R_028444_PA_CL_VPORT_YSCALE + offset, fui(state[idx].scale[1]));
+ si_pm4_set_reg(pm4, R_028448_PA_CL_VPORT_YOFFSET + offset, fui(state[idx].translate[1]));
+ si_pm4_set_reg(pm4, R_02844C_PA_CL_VPORT_ZSCALE + offset, fui(state[idx].scale[2]));
+ si_pm4_set_reg(pm4, R_028450_PA_CL_VPORT_ZOFFSET + offset, fui(state[idx].translate[2]));
+
+ si_pm4_set_state(sctx, viewport[i], viewport);
+ }
}
/*
@@ -649,7 +666,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
/* offset */
rs->offset_units = state->offset_units;
- rs->offset_scale = state->offset_scale * 12.0f;
+ rs->offset_scale = state->offset_scale * 16.0f;
si_pm4_set_reg(pm4, R_0286D4_SPI_INTERP_CONTROL_0,
S_0286D4_FLAT_SHADE_ENA(1) |
@@ -718,12 +735,12 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
if (sctx->framebuffer.nr_samples > 1 &&
(!old_rs || old_rs->multisample_enable != rs->multisample_enable))
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
si_pm4_bind_state(sctx, rasterizer, rs);
si_update_fb_rs_state(sctx);
- sctx->clip_regs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
}
static void si_delete_rs_state(struct pipe_context *ctx, void *state)
@@ -821,7 +838,8 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
db_depth_control = S_028800_Z_ENABLE(state->depth.enabled) |
S_028800_Z_WRITE_ENABLE(state->depth.writemask) |
- S_028800_ZFUNC(state->depth.func);
+ S_028800_ZFUNC(state->depth.func) |
+ S_028800_DEPTH_BOUNDS_ENABLE(state->depth.bounds_test);
/* stencil */
if (state->stencil[0].enabled) {
@@ -850,9 +868,12 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
dsa->alpha_func = PIPE_FUNC_ALWAYS;
}
- /* misc */
si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control);
si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
+ if (state->depth.bounds_test) {
+ si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth.bounds_min));
+ si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth.bounds_max));
+ }
return dsa;
}
@@ -888,7 +909,7 @@ static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
{
struct si_context *sctx = (struct si_context*)ctx;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
@@ -1157,7 +1178,9 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
int first_non_void)
{
struct si_screen *sscreen = (struct si_screen*)screen;
- bool enable_s3tc = sscreen->b.info.drm_minor >= 31;
+ bool enable_compressed_formats = (sscreen->b.info.drm_major == 2 &&
+ sscreen->b.info.drm_minor >= 31) ||
+ sscreen->b.info.drm_major == 3;
boolean uniform = TRUE;
int i;
@@ -1200,7 +1223,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
}
if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
- if (!enable_s3tc)
+ if (!enable_compressed_formats)
goto out_unknown;
switch (format) {
@@ -1220,7 +1243,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
}
if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
- if (!enable_s3tc)
+ if (!enable_compressed_formats)
goto out_unknown;
switch (format) {
@@ -1249,8 +1272,7 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
}
if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
-
- if (!enable_s3tc)
+ if (!enable_compressed_formats)
goto out_unknown;
if (!util_format_s3tc_enabled) {
@@ -1606,7 +1628,6 @@ boolean si_is_format_supported(struct pipe_screen *screen,
unsigned sample_count,
unsigned usage)
{
- struct si_screen *sscreen = (struct si_screen *)screen;
unsigned retval = 0;
if (target >= PIPE_MAX_TEXTURE_TYPES) {
@@ -1618,8 +1639,7 @@ boolean si_is_format_supported(struct pipe_screen *screen,
return FALSE;
if (sample_count > 1) {
- /* 2D tiling on CIK is supported since DRM 2.35.0 */
- if (sscreen->b.chip_class >= CIK && sscreen->b.info.drm_minor < 35)
+ if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE))
return FALSE;
switch (sample_count) {
@@ -1826,6 +1846,9 @@ static void si_initialize_color_surface(struct si_context *sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;
+ if (sctx->b.chip_class >= VI)
+ surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
+
if (rtex->fmask.size) {
surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
@@ -2023,7 +2046,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
util_format_is_pure_integer(state->cbufs[0]->format);
if (sctx->framebuffer.cb0_is_integer != old_cb0_is_integer)
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
@@ -2043,6 +2066,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (rtex->fmask.size && rtex->cmask.size) {
sctx->framebuffer.compressed_cb_mask |= 1 << i;
}
+ r600_context_add_resource_size(ctx, surf->base.texture);
}
/* Set the 16BPC export for possible dual-src blending. */
if (i == 1 && surf && surf->export_16bpc) {
@@ -2057,20 +2081,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
}
+ r600_context_add_resource_size(ctx, surf->base.texture);
}
si_update_fb_rs_state(sctx);
si_update_fb_blend_state(sctx);
- sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
+ sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
- sctx->framebuffer.atom.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
if (sctx->framebuffer.nr_samples != old_nr_samples) {
- sctx->msaa_config.dirty = true;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
/* Set sample locations as fragment shader constants. */
switch (sctx->framebuffer.nr_samples) {
@@ -2107,7 +2132,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
old_nr_samples != SI_NUM_SMOOTH_AA_SAMPLES) &&
(sctx->framebuffer.nr_samples != SI_NUM_SMOOTH_AA_SAMPLES ||
old_nr_samples != 1))
- sctx->msaa_sample_locs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs);
}
}
@@ -2141,20 +2166,24 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
RADEON_PRIO_COLOR_META);
}
- r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
+ r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
+ sctx->b.chip_class >= VI ? 14 : 13);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */
radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */
radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */
radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */
- radeon_emit(cs, 0); /* R_028C78 unused */
+ radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */
radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */
radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */
radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */
radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */
radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
+
+ if (sctx->b.chip_class >= VI)
+ radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0]) {
@@ -2249,22 +2278,35 @@ static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
sctx->ps_iter_samples = min_samples;
if (sctx->framebuffer.nr_samples > 1)
- sctx->msaa_config.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
/*
* Samplers
*/
-static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx,
- struct pipe_resource *texture,
- const struct pipe_sampler_view *state)
+/**
+ * Create a sampler view.
+ *
+ * @param ctx context
+ * @param texture texture
+ * @param state sampler view template
+ * @param width0 width0 override (for compressed textures as int)
+ * @param height0 height0 override (for compressed textures as int)
+ * @param force_level set the base address to the level (for compressed textures)
+ */
+struct pipe_sampler_view *
+si_create_sampler_view_custom(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *state,
+ unsigned width0, unsigned height0,
+ unsigned force_level)
{
struct si_context *sctx = (struct si_context*)ctx;
struct si_sampler_view *view = CALLOC_STRUCT(si_sampler_view);
struct r600_texture *tmp = (struct r600_texture*)texture;
const struct util_format_description *desc;
- unsigned format, num_format;
+ unsigned format, num_format, base_level, first_level, last_level;
uint32_t pitch = 0;
unsigned char state_swizzle[4], swizzle[4];
unsigned height, depth, width;
@@ -2297,7 +2339,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
/* Buffer resource. */
if (texture->target == PIPE_BUFFER) {
- unsigned stride;
+ unsigned stride, num_records;
desc = util_format_description(state->format);
first_non_void = util_format_get_first_non_void_channel(state->format);
@@ -2306,10 +2348,16 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
+ num_records = state->u.buf.last_element + 1 - state->u.buf.first_element;
+ num_records = MIN2(num_records, texture->width0 / stride);
+
+ if (sctx->b.chip_class >= VI)
+ num_records *= stride;
+
view->state[4] = va;
view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(stride);
- view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element;
+ view->state[6] = num_records;
view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
@@ -2437,13 +2485,25 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
format = 0;
}
- /* not supported any more */
- //endian = si_colorformat_endian_swap(format);
+ base_level = 0;
+ first_level = state->u.tex.first_level;
+ last_level = state->u.tex.last_level;
+ width = width0;
+ height = height0;
+ depth = texture->depth0;
- width = surflevel[0].npix_x;
- height = surflevel[0].npix_y;
- depth = surflevel[0].npix_z;
- pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
+ if (force_level) {
+ assert(force_level == first_level &&
+ force_level == last_level);
+ base_level = force_level;
+ first_level = 0;
+ last_level = 0;
+ width = u_minify(width, force_level);
+ height = u_minify(height, force_level);
+ depth = u_minify(depth, force_level);
+ }
+
+ pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format);
if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
height = 1;
@@ -2453,8 +2513,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
depth = texture->array_size / 6;
- va = tmp->resource.gpu_address + surflevel[0].offset;
- va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size;
+ va = tmp->resource.gpu_address + surflevel[base_level].offset;
view->state[0] = va >> 8;
view->state[1] = (S_008F14_BASE_ADDRESS_HI(va >> 40) |
@@ -2467,11 +2526,11 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) |
S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) |
S_008F1C_BASE_LEVEL(texture->nr_samples > 1 ?
- 0 : state->u.tex.first_level - tmp->mipmap_shift) |
+ 0 : first_level) |
S_008F1C_LAST_LEVEL(texture->nr_samples > 1 ?
util_logbase2(texture->nr_samples) :
- state->u.tex.last_level - tmp->mipmap_shift) |
- S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, 0, false)) |
+ last_level) |
+ S_008F1C_TILING_INDEX(si_tile_mode_index(tmp, base_level, false)) |
S_008F1C_POW2_PAD(texture->last_level > 0) |
S_008F1C_TYPE(si_tex_dim(texture->target, texture->nr_samples)));
view->state[4] = (S_008F20_DEPTH(depth - 1) | S_008F20_PITCH(pitch - 1));
@@ -2523,6 +2582,16 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
return &view->base;
}
+static struct pipe_sampler_view *
+si_create_sampler_view(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *state)
+{
+ return si_create_sampler_view_custom(ctx, texture, state,
+ texture ? texture->width0 : 0,
+ texture ? texture->height0 : 0, 0);
+}
+
static void si_sampler_view_destroy(struct pipe_context *ctx,
struct pipe_sampler_view *state)
{
@@ -2765,6 +2834,7 @@ static void si_set_vertex_buffers(struct pipe_context *ctx,
pipe_resource_reference(&dsti->buffer, src->buffer);
dsti->buffer_offset = src->buffer_offset;
dsti->stride = src->stride;
+ r600_context_add_resource_size(ctx, src->buffer);
}
} else {
for (i = 0; i < count; i++) {
@@ -2782,6 +2852,7 @@ static void si_set_index_buffer(struct pipe_context *ctx,
if (ib) {
pipe_resource_reference(&sctx->index_buffer.buffer, ib->buffer);
memcpy(&sctx->index_buffer, ib, sizeof(*ib));
+ r600_context_add_resource_size(ctx, ib->buffer);
} else {
pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
}
@@ -2845,6 +2916,30 @@ static void si_set_polygon_stipple(struct pipe_context *ctx,
}
}
+static void si_set_tess_state(struct pipe_context *ctx,
+ const float default_outer_level[4],
+ const float default_inner_level[2])
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct pipe_constant_buffer cb;
+ float array[8];
+
+ memcpy(array, default_outer_level, sizeof(float) * 4);
+ memcpy(array+4, default_inner_level, sizeof(float) * 2);
+
+ cb.buffer = NULL;
+ cb.user_buffer = NULL;
+ cb.buffer_size = sizeof(array);
+
+ si_upload_const_buffer(sctx, (struct r600_resource**)&cb.buffer,
+ (void*)array, sizeof(array),
+ &cb.buffer_offset);
+
+ ctx->set_constant_buffer(ctx, PIPE_SHADER_TESS_CTRL,
+ SI_DRIVER_STATE_CONST_BUF, &cb);
+ pipe_resource_reference(&cb.buffer, NULL);
+}
+
static void si_texture_barrier(struct pipe_context *ctx)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -2870,6 +2965,8 @@ static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
}
+static void si_init_config(struct si_context *sctx);
+
void si_init_state_functions(struct si_context *sctx)
{
si_init_atom(&sctx->framebuffer.atom, &sctx->atoms.s.framebuffer, si_emit_framebuffer_state, 0);
@@ -2920,6 +3017,7 @@ void si_init_state_functions(struct si_context *sctx)
sctx->b.b.texture_barrier = si_texture_barrier;
sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
sctx->b.b.set_min_samples = si_set_min_samples;
+ sctx->b.b.set_tess_state = si_set_tess_state;
sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
@@ -2931,24 +3029,31 @@ void si_init_state_functions(struct si_context *sctx)
} else {
sctx->b.dma_copy = si_dma_copy;
}
+
+ si_init_config(sctx);
}
static void
si_write_harvested_raster_configs(struct si_context *sctx,
struct si_pm4_state *pm4,
- unsigned raster_config)
+ unsigned raster_config,
+ unsigned raster_config_1)
{
unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
- unsigned num_rb = sctx->screen->b.info.r600_num_backends;
- unsigned rb_per_pkr = num_rb / num_se / sh_per_se;
+ unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
+ unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
unsigned rb_per_se = num_rb / num_se;
- unsigned se0_mask = (1 << rb_per_se) - 1;
- unsigned se1_mask = se0_mask << rb_per_se;
+ unsigned se_mask[4];
unsigned se;
- assert(num_se == 1 || num_se == 2);
+ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+
+ assert(num_se == 1 || num_se == 2 || num_se == 4);
assert(sh_per_se == 1 || sh_per_se == 2);
assert(rb_per_pkr == 1 || rb_per_pkr == 2);
@@ -2956,17 +3061,16 @@ si_write_harvested_raster_configs(struct si_context *sctx,
* fields are for, so I'm leaving them as their default
* values. */
- se0_mask &= rb_mask;
- se1_mask &= rb_mask;
- if (num_se == 2 && (!se0_mask || !se1_mask)) {
- raster_config &= C_028350_SE_MAP;
+ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
+ (!se_mask[2] && !se_mask[3]))) {
+ raster_config_1 &= C_028354_SE_PAIR_MAP;
- if (!se0_mask) {
- raster_config |=
- S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
+ if (!se_mask[0] && !se_mask[1]) {
+ raster_config_1 |=
+ S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
} else {
- raster_config |=
- S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
+ raster_config_1 |=
+ S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
}
}
@@ -2974,10 +3078,23 @@ si_write_harvested_raster_configs(struct si_context *sctx,
unsigned raster_config_se = raster_config;
unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+ int idx = (se / 2) * 2;
+
+ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+ raster_config_se &= C_028350_SE_MAP;
+
+ if (!se_mask[idx]) {
+ raster_config_se |=
+ S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
+ } else {
+ raster_config_se |=
+ S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
+ }
+ }
pkr0_mask &= rb_mask;
pkr1_mask &= rb_mask;
- if (sh_per_se == 2 && (!pkr0_mask || !pkr1_mask)) {
+ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
raster_config_se &= C_028350_PKR_MAP;
if (!pkr0_mask) {
@@ -2989,7 +3106,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,
}
}
- if (rb_per_pkr == 2) {
+ if (rb_per_se >= 2) {
unsigned rb0_mask = 1 << (se * rb_per_se);
unsigned rb1_mask = rb0_mask << 1;
@@ -3007,7 +3124,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,
}
}
- if (sh_per_se == 2) {
+ if (rb_per_se > 2) {
rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
rb1_mask = rb0_mask << 1;
rb0_mask &= rb_mask;
@@ -3026,19 +3143,28 @@ si_write_harvested_raster_configs(struct si_context *sctx,
}
}
- si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
- SE_INDEX(se) | SH_BROADCAST_WRITES |
- INSTANCE_BROADCAST_WRITES);
+ /* GRBM_GFX_INDEX is privileged on VI */
+ if (sctx->b.chip_class <= CIK)
+ si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+ SE_INDEX(se) | SH_BROADCAST_WRITES |
+ INSTANCE_BROADCAST_WRITES);
si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se);
+ if (sctx->b.chip_class >= CIK)
+ si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
}
- si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
- SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
- INSTANCE_BROADCAST_WRITES);
+ /* GRBM_GFX_INDEX is privileged on VI */
+ if (sctx->b.chip_class <= CIK)
+ si_pm4_set_reg(pm4, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
+ INSTANCE_BROADCAST_WRITES);
}
-void si_init_config(struct si_context *sctx)
+static void si_init_config(struct si_context *sctx)
{
+ unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
+ unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+ unsigned raster_config, raster_config_1;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
if (pm4 == NULL)
@@ -3046,24 +3172,18 @@ void si_init_config(struct si_context *sctx)
si_cmd_context_control(pm4);
- si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0);
- si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0);
+ si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
+ si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
/* FIXME calculate these values somehow ??? */
si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, 0x80);
si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
- si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0x0);
si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
- si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
- si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
- si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
- si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT, 0);
-
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
@@ -3076,62 +3196,78 @@ void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
- if (sctx->b.chip_class >= CIK) {
- switch (sctx->screen->b.family) {
- case CHIP_BONAIRE:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
- si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
- break;
- case CHIP_HAWAII:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
- si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
- break;
- case CHIP_KAVERI:
- /* XXX todo */
- case CHIP_KABINI:
- /* XXX todo */
- case CHIP_MULLINS:
- /* XXX todo */
- default:
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0);
- si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0);
- break;
- }
+ switch (sctx->screen->b.family) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ raster_config = 0x2a00126a;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_VERDE:
+ raster_config = 0x0000124a;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_OLAND:
+ raster_config = 0x00000082;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_HAINAN:
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_BONAIRE:
+ raster_config = 0x16000012;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_HAWAII:
+ raster_config = 0x3a00161a;
+ raster_config_1 = 0x0000002e;
+ break;
+ case CHIP_FIJI:
+ /* Fiji should be same as Hawaii, but that causes corruption in some cases */
+ raster_config = 0x16000012; /* 0x3a00161a */
+ raster_config_1 = 0x0000002a; /* 0x0000002e */
+ break;
+ case CHIP_TONGA:
+ raster_config = 0x16000012;
+ raster_config_1 = 0x0000002a;
+ break;
+ case CHIP_ICELAND:
+ raster_config = 0x00000002;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_CARRIZO:
+ raster_config = 0x00000002;
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_KAVERI:
+ /* KV should be 0x00000002, but that causes problems with radeon */
+ raster_config = 0x00000000; /* 0x00000002 */
+ raster_config_1 = 0x00000000;
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
+ break;
+ default:
+ fprintf(stderr,
+ "radeonsi: Unknown GPU, using 0 for raster_config\n");
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
+ break;
+ }
+
+ /* Always use the default config when all backends are enabled
+ * (or when we failed to determine the enabled backends).
+ */
+ if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
+ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
+ raster_config);
+ if (sctx->b.chip_class >= CIK)
+ si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
+ raster_config_1);
} else {
- unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
- unsigned num_rb = sctx->screen->b.info.r600_num_backends;
- unsigned raster_config;
-
- switch (sctx->screen->b.family) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- raster_config = 0x2a00126a;
- break;
- case CHIP_VERDE:
- raster_config = 0x0000124a;
- break;
- case CHIP_OLAND:
- raster_config = 0x00000082;
- break;
- case CHIP_HAINAN:
- raster_config = 0;
- break;
- default:
- fprintf(stderr,
- "radeonsi: Unknown GPU, using 0 for raster_config\n");
- raster_config = 0;
- break;
- }
-
- /* Always use the default config when all backends are enabled
- * (or when we failed to determine the enabled backends).
- */
- if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
- si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
- raster_config);
- } else {
- si_write_harvested_raster_configs(sctx, pm4, raster_config);
- }
+ si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1);
}
si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
@@ -3153,8 +3289,6 @@ void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
- si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, 0);
- si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, 0);
si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
@@ -3173,10 +3307,21 @@ void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
if (sctx->b.chip_class >= CIK) {
+ si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffc));
+ si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
+ si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xfffe));
+ si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(0));
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
}
+ if (sctx->b.chip_class >= VI) {
+ si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
+ S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
+ si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
+ si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
+ }
+
sctx->init_config = pm4;
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 5e68b162137..b8f63c5dd36 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -30,6 +30,8 @@
#include "si_pm4.h"
#include "radeon/r600_pipe_common.h"
+#define SI_NUM_SHADERS (PIPE_SHADER_TESS_EVAL+1)
+
struct si_screen;
struct si_shader;
@@ -92,18 +94,21 @@ union si_state {
struct si_pm4_state *blend_color;
struct si_pm4_state *clip;
struct si_state_sample_mask *sample_mask;
- struct si_state_scissor *scissor;
- struct si_state_viewport *viewport;
+ struct si_state_scissor *scissor[16];
+ struct si_state_viewport *viewport[16];
struct si_state_rasterizer *rasterizer;
struct si_state_dsa *dsa;
struct si_pm4_state *fb_rs;
struct si_pm4_state *fb_blend;
struct si_pm4_state *dsa_stencil_ref;
struct si_pm4_state *ta_bordercolor_base;
+ struct si_pm4_state *ls;
+ struct si_pm4_state *hs;
struct si_pm4_state *es;
struct si_pm4_state *gs;
struct si_pm4_state *gs_rings;
- struct si_pm4_state *gs_onoff;
+ struct si_pm4_state *tf_ring;
+ struct si_pm4_state *vgt_shader_config;
struct si_pm4_state *vs;
struct si_pm4_state *ps;
struct si_pm4_state *spi;
@@ -111,6 +116,11 @@ union si_state {
struct si_pm4_state *array[0];
};
+struct si_shader_data {
+ struct r600_atom atom;
+ uint32_t sh_base[SI_NUM_SHADERS];
+};
+
#define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */
#define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS
#define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1)
@@ -135,68 +145,61 @@ union si_state {
* Ring buffers: 0..1
* Streamout buffers: 2..5
*/
-#define SI_RING_ESGS 0
-#define SI_RING_GSVS 1
-#define SI_NUM_RING_BUFFERS 2
+#define SI_RING_TESS_FACTOR 0 /* for HS (TCS) */
+#define SI_RING_ESGS 0 /* for ES, GS */
+#define SI_RING_GSVS 1 /* for GS, VS */
+#define SI_RING_GSVS_1 2 /* 1, 2, 3 for GS */
+#define SI_RING_GSVS_2 3
+#define SI_RING_GSVS_3 4
+#define SI_NUM_RING_BUFFERS 5
#define SI_SO_BUF_OFFSET SI_NUM_RING_BUFFERS
#define SI_NUM_RW_BUFFERS (SI_SO_BUF_OFFSET + 4)
#define SI_NUM_VERTEX_BUFFERS 16
-/* This represents resource descriptors in memory, such as buffer resources,
+/* This represents descriptors in memory, such as buffer resources,
* image resources, and sampler states.
*/
struct si_descriptors {
- struct r600_atom atom;
-
- /* The size of one resource descriptor. */
+ /* The list of descriptors in malloc'd memory. */
+ uint32_t *list;
+ /* The size of one descriptor. */
unsigned element_dw_size;
- /* The maximum number of resource descriptors. */
+ /* The maximum number of descriptors. */
unsigned num_elements;
+ /* Whether the list has been changed and should be re-uploaded. */
+ bool list_dirty;
- /* The buffer where resource descriptors are stored. */
+ /* The buffer where the descriptors have been uploaded. */
struct r600_resource *buffer;
unsigned buffer_offset;
- /* The i-th bit is set if that element is dirty (changed but not emitted). */
- uint64_t dirty_mask;
/* The i-th bit is set if that element is enabled (non-NULL resource). */
uint64_t enabled_mask;
- /* We can't update descriptors directly because the GPU might be
- * reading them at the same time, so we have to update them
- * in a copy-on-write manner. Each such copy is called a context,
- * which is just another array descriptors in the same buffer. */
- unsigned current_context_id;
- /* The size of a context, should be equal to 4*element_dw_size*num_elements. */
- unsigned context_size;
-
- /* The shader userdata register where the 64-bit pointer to the descriptor
+ /* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
* array will be stored. */
- unsigned shader_userdata_reg;
+ unsigned shader_userdata_offset;
+ /* Whether the pointer should be re-emitted. */
+ bool pointer_dirty;
};
struct si_sampler_views {
struct si_descriptors desc;
struct pipe_sampler_view *views[SI_NUM_SAMPLER_VIEWS];
- uint32_t *desc_data[SI_NUM_SAMPLER_VIEWS];
};
struct si_sampler_states {
struct si_descriptors desc;
- uint32_t *desc_data[SI_NUM_SAMPLER_STATES];
void *saved_states[2]; /* saved for u_blitter */
};
struct si_buffer_resources {
struct si_descriptors desc;
- unsigned num_buffers;
enum radeon_bo_usage shader_usage; /* READ, WRITE, or READWRITE */
enum radeon_bo_priority priority;
struct pipe_resource **buffers; /* this has num_buffers elements */
- uint32_t *desc_storage; /* this has num_buffers*4 elements */
- uint32_t **desc_data; /* an array of pointers pointing to desc_storage */
};
#define si_pm4_block_idx(member) \
@@ -232,20 +235,18 @@ struct si_buffer_resources {
/* si_descriptors.c */
void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
unsigned start, unsigned count, void **states);
-void si_update_vertex_buffers(struct si_context *sctx);
void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
struct pipe_resource *buffer,
unsigned stride, unsigned num_records,
bool add_tid, bool swizzle,
- unsigned element_size, unsigned index_stride);
+ unsigned element_size, unsigned index_stride, uint64_t offset);
void si_init_all_descriptors(struct si_context *sctx);
+bool si_upload_shader_descriptors(struct si_context *sctx);
void si_release_all_descriptors(struct si_context *sctx);
void si_all_descriptors_begin_new_cs(struct si_context *sctx);
-void si_copy_buffer(struct si_context *sctx,
- struct pipe_resource *dst, struct pipe_resource *src,
- uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer);
void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
const uint8_t *ptr, unsigned size, uint32_t *const_offset);
+void si_shader_change_notify(struct si_context *sctx);
/* si_state.c */
struct si_shader_selector;
@@ -256,7 +257,6 @@ boolean si_is_format_supported(struct pipe_screen *screen,
unsigned sample_count,
unsigned usage);
void si_init_state_functions(struct si_context *sctx);
-void si_init_config(struct si_context *sctx);
unsigned cik_bank_wh(unsigned bankwh);
unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode);
unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect);
@@ -264,6 +264,12 @@ unsigned cik_tile_split(unsigned tile_split);
unsigned si_array_mode(unsigned mode);
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex);
unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil);
+struct pipe_sampler_view *
+si_create_sampler_view_custom(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *state,
+ unsigned width0, unsigned height0,
+ unsigned force_level);
/* si_state_shader.c */
void si_update_shaders(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 2e77d85a80d..4c21655596c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -31,6 +31,7 @@
#include "util/u_index_modify.h"
#include "util/u_upload_mgr.h"
+#include "util/u_prim.h"
static void si_decompress_textures(struct si_context *sctx)
{
@@ -64,6 +65,7 @@ static unsigned si_conv_pipe_prim(unsigned mode)
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_008958_DI_PT_LINESTRIP_ADJ,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = V_008958_DI_PT_TRILIST_ADJ,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_008958_DI_PT_TRISTRIP_ADJ,
+ [PIPE_PRIM_PATCHES] = V_008958_DI_PT_PATCH,
[R600_PRIM_RECTANGLE_LIST] = V_008958_DI_PT_RECTLIST
};
assert(mode < Elements(prim_conv));
@@ -87,6 +89,7 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode)
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
[PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+ [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
[R600_PRIM_RECTANGLE_LIST] = V_028A6C_OUTPRIM_TYPE_TRISTRIP
};
assert(mode < Elements(prim_conv));
@@ -94,8 +97,128 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode)
return prim_conv[mode];
}
+/**
+ * This calculates the LDS size for tessellation shaders (VS, TCS, TES).
+ * LS.LDS_SIZE is shared by all 3 shader stages.
+ *
+ * The information about LDS and other non-compile-time parameters is then
+ * written to userdata SGPRs.
+ */
+static void si_emit_derived_tess_state(struct si_context *sctx,
+ const struct pipe_draw_info *info,
+ unsigned *num_patches)
+{
+ struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+ struct si_shader_selector *ls = sctx->vs_shader;
+ /* The TES pointer will only be used for sctx->last_tcs.
+ * It would be wrong to think that TCS = TES. */
+ struct si_shader_selector *tcs =
+ sctx->tcs_shader ? sctx->tcs_shader : sctx->tes_shader;
+ unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
+ unsigned num_tcs_input_cp = info->vertices_per_patch;
+ unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
+ unsigned num_tcs_patch_outputs;
+ unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size;
+ unsigned input_patch_size, output_patch_size, output_patch0_offset;
+ unsigned perpatch_output_offset, lds_size, ls_rsrc2;
+ unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
+
+ *num_patches = 1; /* TODO: calculate this */
+
+ if (sctx->last_ls == ls->current &&
+ sctx->last_tcs == tcs &&
+ sctx->last_tes_sh_base == tes_sh_base &&
+ sctx->last_num_tcs_input_cp == num_tcs_input_cp)
+ return;
+
+ sctx->last_ls = ls->current;
+ sctx->last_tcs = tcs;
+ sctx->last_tes_sh_base = tes_sh_base;
+ sctx->last_num_tcs_input_cp = num_tcs_input_cp;
+
+ /* This calculates how shader inputs and outputs among VS, TCS, and TES
+ * are laid out in LDS. */
+ num_tcs_inputs = util_last_bit64(ls->outputs_written);
+
+ if (sctx->tcs_shader) {
+ num_tcs_outputs = util_last_bit64(tcs->outputs_written);
+ num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+ num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
+ } else {
+ /* No TCS. Route varyings from LS to TES. */
+ num_tcs_outputs = num_tcs_inputs;
+ num_tcs_output_cp = num_tcs_input_cp;
+ num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
+ }
+
+ input_vertex_size = num_tcs_inputs * 16;
+ output_vertex_size = num_tcs_outputs * 16;
+
+ input_patch_size = num_tcs_input_cp * input_vertex_size;
+
+ pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size;
+ output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16;
+
+ output_patch0_offset = sctx->tcs_shader ? input_patch_size * *num_patches : 0;
+ perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
+
+ lds_size = output_patch0_offset + output_patch_size * *num_patches;
+ ls_rsrc2 = ls->current->ls_rsrc2;
+
+ if (sctx->b.chip_class >= CIK) {
+ assert(lds_size <= 65536);
+ ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
+ } else {
+ assert(lds_size <= 32768);
+ ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
+ }
+
+ /* Due to a hw bug, RSRC2_LS must be written twice with another
+ * LS register written in between. */
+ if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
+ si_write_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
+ si_write_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+ radeon_emit(cs, ls->current->ls_rsrc1);
+ radeon_emit(cs, ls_rsrc2);
+
+ /* Compute userdata SGPRs. */
+ assert(((input_vertex_size / 4) & ~0xff) == 0);
+ assert(((output_vertex_size / 4) & ~0xff) == 0);
+ assert(((input_patch_size / 4) & ~0x1fff) == 0);
+ assert(((output_patch_size / 4) & ~0x1fff) == 0);
+ assert(((output_patch0_offset / 16) & ~0xffff) == 0);
+ assert(((perpatch_output_offset / 16) & ~0xffff) == 0);
+ assert(num_tcs_input_cp <= 32);
+ assert(num_tcs_output_cp <= 32);
+
+ tcs_in_layout = (input_patch_size / 4) |
+ ((input_vertex_size / 4) << 13);
+ tcs_out_layout = (output_patch_size / 4) |
+ ((output_vertex_size / 4) << 13);
+ tcs_out_offsets = (output_patch0_offset / 16) |
+ ((perpatch_output_offset / 16) << 16);
+
+ /* Set them for LS. */
+ si_write_sh_reg(cs,
+ R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4,
+ tcs_in_layout);
+
+ /* Set them for TCS. */
+ si_write_sh_reg_seq(cs,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OUT_OFFSETS * 4, 3);
+ radeon_emit(cs, tcs_out_offsets);
+ radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26));
+ radeon_emit(cs, tcs_in_layout);
+
+ /* Set them for TES. */
+ si_write_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OUT_OFFSETS * 4, 2);
+ radeon_emit(cs, tcs_out_offsets);
+ radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
+}
+
static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
- const struct pipe_draw_info *info)
+ const struct pipe_draw_info *info,
+ unsigned num_patches)
{
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned prim = info->mode;
@@ -104,11 +227,41 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
/* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false;
bool ia_switch_on_eop = false;
+ bool ia_switch_on_eoi = false;
bool partial_vs_wave = false;
+ bool partial_es_wave = false;
if (sctx->gs_shader)
primgroup_size = 64; /* recommended with a GS */
+ if (sctx->tes_shader) {
+ unsigned num_cp_out =
+ sctx->tcs_shader ?
+ sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+ info->vertices_per_patch;
+ unsigned max_size = 256 / MAX2(info->vertices_per_patch, num_cp_out);
+
+ primgroup_size = MIN2(primgroup_size, max_size);
+
+ /* primgroup_size must be set to a multiple of NUM_PATCHES */
+ primgroup_size = (primgroup_size / num_patches) * num_patches;
+
+ /* SWITCH_ON_EOI must be set if PrimID is used.
+ * If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+ if ((sctx->tcs_shader && sctx->tcs_shader->info.uses_primid) ||
+ sctx->tes_shader->info.uses_primid) {
+ ia_switch_on_eoi = true;
+ partial_es_wave = true;
+ }
+
+ /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
+ if ((sctx->b.family == CHIP_TAHITI ||
+ sctx->b.family == CHIP_PITCAIRN ||
+ sctx->b.family == CHIP_BONAIRE) &&
+ sctx->gs_shader)
+ partial_vs_wave = true;
+ }
+
/* This is a hardware requirement. */
if ((rs && rs->line_stipple_enable) ||
(sctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) {
@@ -139,14 +292,52 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
(info->indirect || info->instance_count > 1))
wd_switch_on_eop = true;
+ /* USE_OPAQUE doesn't work when WD_SWITCH_ON_EOP is 0. */
+ if (info->count_from_stream_output)
+ wd_switch_on_eop = true;
+
/* If the WD switch is false, the IA switch must be false too. */
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
+ /* Hw bug with single-primitive instances and SWITCH_ON_EOI
+ * on multi-SE chips. */
+ if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
+ (info->indirect ||
+ (info->instance_count > 1 &&
+ u_prims_for_vertices(info->mode, info->count) <= 1)))
+ sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
+
+ /* Instancing bug on 2 SE chips. */
+ if (sctx->b.screen->info.max_se == 2 && ia_switch_on_eoi &&
+ (info->indirect || info->instance_count > 1))
+ partial_vs_wave = true;
+
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
+ S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
+ S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
- S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0);
+ S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0);
+}
+
+static unsigned si_get_ls_hs_config(struct si_context *sctx,
+ const struct pipe_draw_info *info,
+ unsigned num_patches)
+{
+ unsigned num_output_cp;
+
+ if (!sctx->tes_shader)
+ return 0;
+
+ num_output_cp = sctx->tcs_shader ?
+ sctx->tcs_shader->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] :
+ info->vertices_per_patch;
+
+ return S_028B58_NUM_PATCHES(num_patches) |
+ S_028B58_HS_NUM_INPUT_CP(info->vertices_per_patch) |
+ S_028B58_HS_NUM_OUTPUT_CP(num_output_cp);
}
static void si_emit_scratch_reloc(struct si_context *sctx)
@@ -202,22 +393,31 @@ static void si_emit_draw_registers(struct si_context *sctx,
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
unsigned prim = si_conv_pipe_prim(info->mode);
unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
- unsigned ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info);
+ unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
+
+ if (sctx->tes_shader)
+ si_emit_derived_tess_state(sctx, info, &num_patches);
+
+ ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
+ ls_hs_config = si_get_ls_hs_config(sctx, info, num_patches);
/* Draw state. */
if (prim != sctx->last_prim ||
- ia_multi_vgt_param != sctx->last_multi_vgt_param) {
+ ia_multi_vgt_param != sctx->last_multi_vgt_param ||
+ ls_hs_config != sctx->last_ls_hs_config) {
if (sctx->b.chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DRAW_PREAMBLE, 2, 0));
radeon_emit(cs, prim); /* VGT_PRIMITIVE_TYPE */
radeon_emit(cs, ia_multi_vgt_param); /* IA_MULTI_VGT_PARAM */
- radeon_emit(cs, 0); /* VGT_LS_HS_CONFIG */
+ radeon_emit(cs, ls_hs_config); /* VGT_LS_HS_CONFIG */
} else {
r600_write_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
r600_write_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+ r600_write_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
}
sctx->last_prim = prim;
sctx->last_multi_vgt_param = ia_multi_vgt_param;
+ sctx->last_ls_hs_config = ls_hs_config;
}
if (gs_out_prim != sctx->last_gs_out_prim) {
@@ -245,8 +445,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
const struct pipe_index_buffer *ib)
{
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
- unsigned sh_base_reg = (sctx->gs_shader ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
- R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
if (info->count_from_stream_output) {
struct r600_so_target *t =
@@ -275,12 +474,24 @@ static void si_emit_draw_packets(struct si_context *sctx,
if (info->indexed) {
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
- if (ib->index_size == 4) {
- radeon_emit(cs, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ?
- V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
- } else {
- radeon_emit(cs, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ?
- V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+ /* index type */
+ switch (ib->index_size) {
+ case 1:
+ radeon_emit(cs, V_028A7C_VGT_INDEX_8);
+ break;
+ case 2:
+ radeon_emit(cs, V_028A7C_VGT_INDEX_16 |
+ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+ V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+ break;
+ case 4:
+ radeon_emit(cs, V_028A7C_VGT_INDEX_32 |
+ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+ V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
+ break;
+ default:
+ assert(!"unreachable");
+ return;
}
}
@@ -406,9 +617,14 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
if (sctx->flags & SI_CONTEXT_INV_TC_L1)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
- if (sctx->flags & SI_CONTEXT_INV_TC_L2)
+ if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
+ /* TODO: this might not be needed. */
+ if (sctx->chip_class >= VI)
+ cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
+ }
+
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
@@ -520,8 +736,14 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
(info->indexed || !info->count_from_stream_output))
return;
- if (!sctx->ps_shader || !sctx->vs_shader)
+ if (!sctx->ps_shader || !sctx->vs_shader) {
+ assert(0);
return;
+ }
+ if (!!sctx->tes_shader != (info->mode == PIPE_PRIM_PATCHES)) {
+ assert(0);
+ return;
+ }
si_decompress_textures(sctx);
@@ -532,15 +754,15 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
* current_rast_prim for this draw_vbo call. */
if (sctx->gs_shader)
sctx->current_rast_prim = sctx->gs_shader->gs_output_prim;
+ else if (sctx->tes_shader)
+ sctx->current_rast_prim =
+ sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
else
sctx->current_rast_prim = info->mode;
si_update_shaders(sctx);
-
- if (sctx->vertex_buffers_dirty) {
- si_update_vertex_buffers(sctx);
- sctx->vertex_buffers_dirty = false;
- }
+ if (!si_upload_shader_descriptors(sctx))
+ return;
if (info->indexed) {
/* Initialize the index buffer struct. */
@@ -550,7 +772,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
ib.offset = sctx->index_buffer.offset;
/* Translate or upload, if needed. */
- if (ib.index_size == 1) {
+ /* 8-bit indices are supported on VI. */
+ if (sctx->b.chip_class <= CIK && ib.index_size == 1) {
struct pipe_resource *out_buffer = NULL;
unsigned out_offset, start, count, start_offset;
void *ptr;
@@ -585,6 +808,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
}
}
+ /* TODO: VI should read index buffers through TC, so this shouldn't be
+ * needed on VI. */
if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
r600_resource(ib.buffer)->TC_L2_dirty = false;
@@ -592,7 +817,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
/* Check flush flags. */
if (sctx->b.flags)
- sctx->atoms.s.cache_flush->dirty = true;
+ si_mark_atom_dirty(sctx, sctx->atoms.s.cache_flush);
si_need_cs_space(sctx, 0, TRUE);
@@ -618,7 +843,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
/* Workaround for a VGT hang when streamout is enabled.
* It must be done after drawing. */
- if (sctx->b.family == CHIP_HAWAII &&
+ if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
(sctx->b.streamout.streamout_enabled ||
sctx->b.streamout.prims_gen_query_enabled)) {
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 208c8523ef1..0347014948d 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -30,9 +30,135 @@
#include "sid.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
#include "util/u_memory.h"
#include "util/u_simple_shaders.h"
+static void si_set_tesseval_regs(struct si_shader *shader,
+ struct si_pm4_state *pm4)
+{
+ struct tgsi_shader_info *info = &shader->selector->info;
+ unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
+ bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
+ bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
+ unsigned type, partitioning, topology;
+
+ switch (tes_prim_mode) {
+ case PIPE_PRIM_LINES:
+ type = V_028B6C_TESS_ISOLINE;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ type = V_028B6C_TESS_TRIANGLE;
+ break;
+ case PIPE_PRIM_QUADS:
+ type = V_028B6C_TESS_QUAD;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ switch (tes_spacing) {
+ case PIPE_TESS_SPACING_FRACTIONAL_ODD:
+ partitioning = V_028B6C_PART_FRAC_ODD;
+ break;
+ case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
+ partitioning = V_028B6C_PART_FRAC_EVEN;
+ break;
+ case PIPE_TESS_SPACING_EQUAL:
+ partitioning = V_028B6C_PART_INTEGER;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (tes_point_mode)
+ topology = V_028B6C_OUTPUT_POINT;
+ else if (tes_prim_mode == PIPE_PRIM_LINES)
+ topology = V_028B6C_OUTPUT_LINE;
+ else if (tes_vertex_order_cw)
+ /* for some reason, this must be the other way around */
+ topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
+ else
+ topology = V_028B6C_OUTPUT_TRIANGLE_CW;
+
+ si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
+ S_028B6C_TYPE(type) |
+ S_028B6C_PARTITIONING(partitioning) |
+ S_028B6C_TOPOLOGY(topology));
+}
+
+static void si_shader_ls(struct si_shader *shader)
+{
+ struct si_pm4_state *pm4;
+ unsigned num_sgprs, num_user_sgprs;
+ unsigned vgpr_comp_cnt;
+ uint64_t va;
+
+ pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+ if (pm4 == NULL)
+ return;
+
+ va = shader->bo->gpu_address;
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+
+ /* We need at least 2 components for LS.
+ * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */
+ vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1;
+
+ num_user_sgprs = SI_LS_NUM_USER_SGPR;
+ num_sgprs = shader->num_sgprs;
+ if (num_user_sgprs > num_sgprs) {
+ /* Last 2 reserved SGPRs are used for VCC */
+ num_sgprs = num_user_sgprs + 2;
+ }
+ assert(num_sgprs <= 104);
+
+ si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
+ si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
+
+ shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B528_SGPRS((num_sgprs - 1) / 8) |
+ S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
+ shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
+ S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
+}
+
+static void si_shader_hs(struct si_shader *shader)
+{
+ struct si_pm4_state *pm4;
+ unsigned num_sgprs, num_user_sgprs;
+ uint64_t va;
+
+ pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
+ if (pm4 == NULL)
+ return;
+
+ va = shader->bo->gpu_address;
+ si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
+
+ num_user_sgprs = SI_TCS_NUM_USER_SGPR;
+ num_sgprs = shader->num_sgprs;
+ /* One SGPR after user SGPRs is pre-loaded with tessellation factor
+ * buffer offset. */
+ if ((num_user_sgprs + 1) > num_sgprs) {
+ /* Last 2 reserved SGPRs are used for VCC */
+ num_sgprs = num_user_sgprs + 1 + 2;
+ }
+ assert(num_sgprs <= 104);
+
+ si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
+ si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, va >> 40);
+ si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
+ S_00B428_VGPRS((shader->num_vgprs - 1) / 4) |
+ S_00B428_SGPRS((num_sgprs - 1) / 8));
+ si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
+ S_00B42C_USER_SGPR(num_user_sgprs) |
+ S_00B42C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+}
+
static void si_shader_es(struct si_shader *shader)
{
struct si_pm4_state *pm4;
@@ -48,9 +174,15 @@ static void si_shader_es(struct si_shader *shader)
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ if (shader->selector->type == PIPE_SHADER_VERTEX) {
+ vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
+ vgpr_comp_cnt = 3; /* all components are needed for TES */
+ num_user_sgprs = SI_TES_NUM_USER_SGPR;
+ } else
+ assert(0);
- num_user_sgprs = SI_VS_NUM_USER_SGPR;
num_sgprs = shader->num_sgprs;
/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
if ((num_user_sgprs + 1) > num_sgprs) {
@@ -69,17 +201,37 @@ static void si_shader_es(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
S_00B32C_USER_SGPR(num_user_sgprs) |
S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
+
+ if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+ si_set_tesseval_regs(shader, pm4);
+}
+
+static unsigned si_gs_get_max_stream(struct si_shader *shader)
+{
+ struct pipe_stream_output_info *so = &shader->selector->so;
+ unsigned max_stream = 0, i;
+
+ if (so->num_outputs == 0)
+ return 0;
+
+ for (i = 0; i < so->num_outputs; i++) {
+ if (so->output[i].stream > max_stream)
+ max_stream = so->output[i].stream;
+ }
+ return max_stream;
}
static void si_shader_gs(struct si_shader *shader)
{
- unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 >> 2);
+ unsigned gs_vert_itemsize = shader->selector->info.num_outputs * 16;
unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
- unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+ unsigned gsvs_itemsize = (gs_vert_itemsize * gs_max_vert_out) >> 2;
+ unsigned gs_num_invocations = shader->selector->gs_num_invocations;
unsigned cut_mode;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
uint64_t va;
+ unsigned max_stream = si_gs_get_max_stream(shader);
/* The GSVS_RING_ITEMSIZE register takes 15 bits */
assert(gsvs_itemsize < (1 << 15));
@@ -107,16 +259,23 @@ static void si_shader_gs(struct si_shader *shader)
S_028A40_GS_WRITE_OPTIMIZE(1));
si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
- si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize);
- si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
+ si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1));
+ si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * ((max_stream >= 3) ? 3 : 1));
si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- util_bitcount64(shader->selector->gs_used_inputs) * (16 >> 2));
- si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
+ util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
+ si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
- si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
+ si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize >> 2);
+ si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, (max_stream >= 1) ? gs_vert_itemsize >> 2 : 0);
+ si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, (max_stream >= 2) ? gs_vert_itemsize >> 2 : 0);
+ si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, (max_stream >= 3) ? gs_vert_itemsize >> 2 : 0);
+
+ si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
+ S_028B90_ENABLE(gs_num_invocations > 0));
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
@@ -143,19 +302,29 @@ static void si_shader_gs(struct si_shader *shader)
static void si_shader_vs(struct si_shader *shader)
{
- struct tgsi_shader_info *info = &shader->selector->info;
struct si_pm4_state *pm4;
unsigned num_sgprs, num_user_sgprs;
- unsigned nparams, i, vgpr_comp_cnt;
+ unsigned nparams, vgpr_comp_cnt;
uint64_t va;
unsigned window_space =
shader->selector->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+ bool enable_prim_id = si_vs_exports_prim_id(shader);
pm4 = shader->pm4 = CALLOC_STRUCT(si_pm4_state);
if (pm4 == NULL)
return;
+ /* If this is the GS copy shader, the GS state writes this register.
+ * Otherwise, the VS state writes it.
+ */
+ if (!shader->is_gs_copy_shader) {
+ si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
+ S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0));
+ si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id);
+ } else
+ si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, 0);
+
va = shader->bo->gpu_address;
si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);
@@ -163,8 +332,11 @@ static void si_shader_vs(struct si_shader *shader)
vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */
num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR;
} else if (shader->selector->type == PIPE_SHADER_VERTEX) {
- vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0;
+ vgpr_comp_cnt = shader->uses_instanceid ? 3 : (enable_prim_id ? 2 : 0);
num_user_sgprs = SI_VS_NUM_USER_SGPR;
+ } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) {
+ vgpr_comp_cnt = 3; /* all components are needed for TES */
+ num_user_sgprs = SI_TES_NUM_USER_SGPR;
} else
assert(0);
@@ -175,28 +347,8 @@ static void si_shader_vs(struct si_shader *shader)
}
assert(num_sgprs <= 104);
- /* Certain attributes (position, psize, etc.) don't count as params.
- * VS is required to export at least one param and r600_shader_from_tgsi()
- * takes care of adding a dummy export.
- */
- for (nparams = 0, i = 0 ; i < info->num_outputs; i++) {
- switch (info->output_semantic_name[i]) {
- case TGSI_SEMANTIC_CLIPVERTEX:
- case TGSI_SEMANTIC_CLIPDIST:
- case TGSI_SEMANTIC_CULLDIST:
- case TGSI_SEMANTIC_POSITION:
- case TGSI_SEMANTIC_PSIZE:
- case TGSI_SEMANTIC_EDGEFLAG:
- case TGSI_SEMANTIC_VIEWPORT_INDEX:
- case TGSI_SEMANTIC_LAYER:
- break;
- default:
- nparams++;
- }
- }
- if (nparams < 1)
- nparams = 1;
-
+ /* VS is required to export at least one param. */
+ nparams = MAX2(shader->nr_param_exports, 1);
si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
S_0286C4_VS_EXPORT_COUNT(nparams - 1));
@@ -236,6 +388,9 @@ static void si_shader_vs(struct si_shader *shader)
S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
+
+ if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
+ si_set_tesseval_regs(shader, pm4);
}
static void si_shader_ps(struct si_shader *shader)
@@ -333,7 +488,18 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
- if (shader->key.vs.as_es)
+ if (shader->key.vs.as_ls)
+ si_shader_ls(shader);
+ else if (shader->key.vs.as_es)
+ si_shader_es(shader);
+ else
+ si_shader_vs(shader);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ si_shader_hs(shader);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (shader->key.tes.as_es)
si_shader_es(shader);
else
si_shader_vs(shader);
@@ -351,7 +517,7 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
}
/* Compute the key for the hw shader variant */
-static INLINE void si_shader_selector_key(struct pipe_context *ctx,
+static inline void si_shader_selector_key(struct pipe_context *ctx,
struct si_shader_selector *sel,
union si_shader_key *key)
{
@@ -367,10 +533,27 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx,
key->vs.instance_divisors[i] =
sctx->vertex_elements->elements[i].instance_divisor;
- if (sctx->gs_shader) {
+ if (sctx->tes_shader)
+ key->vs.as_ls = 1;
+ else if (sctx->gs_shader) {
key->vs.as_es = 1;
- key->vs.gs_used_inputs = sctx->gs_shader->gs_used_inputs;
+ key->vs.es_enabled_outputs = sctx->gs_shader->inputs_read;
}
+
+ if (!sctx->gs_shader && sctx->ps_shader &&
+ sctx->ps_shader->info.uses_primid)
+ key->vs.export_prim_id = 1;
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ key->tcs.prim_mode =
+ sctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (sctx->gs_shader) {
+ key->tes.as_es = 1;
+ key->tes.es_enabled_outputs = sctx->gs_shader->inputs_read;
+ } else if (sctx->ps_shader && sctx->ps_shader->info.uses_primid)
+ key->tes.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
break;
@@ -468,6 +651,7 @@ static int si_shader_select(struct pipe_context *ctx,
}
si_shader_init_pm4_state(shader);
sel->num_shaders++;
+ p_atomic_inc(&sctx->screen->b.num_compilations);
}
return 0;
@@ -485,6 +669,7 @@ static void *si_create_shader_state(struct pipe_context *ctx,
sel->tokens = tgsi_dup_tokens(state->tokens);
sel->so = state->stream_output;
tgsi_scan_shader(state->tokens, &sel->info);
+ p_atomic_inc(&sscreen->b.num_shaders_created);
switch (pipe_shader_type) {
case PIPE_SHADER_GEOMETRY:
@@ -492,6 +677,8 @@ static void *si_create_shader_state(struct pipe_context *ctx,
sel->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
sel->gs_max_out_vertices =
sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+ sel->gs_num_invocations =
+ sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
for (i = 0; i < sel->info.num_inputs; i++) {
unsigned name = sel->info.input_semantic_name[i];
@@ -501,10 +688,31 @@ static void *si_create_shader_state(struct pipe_context *ctx,
case TGSI_SEMANTIC_PRIMID:
break;
default:
- sel->gs_used_inputs |=
+ sel->inputs_read |=
1llu << si_shader_io_get_unique_index(name, index);
}
}
+ break;
+
+ case PIPE_SHADER_VERTEX:
+ case PIPE_SHADER_TESS_CTRL:
+ for (i = 0; i < sel->info.num_outputs; i++) {
+ unsigned name = sel->info.output_semantic_name[i];
+ unsigned index = sel->info.output_semantic_index[i];
+
+ switch (name) {
+ case TGSI_SEMANTIC_TESSINNER:
+ case TGSI_SEMANTIC_TESSOUTER:
+ case TGSI_SEMANTIC_PATCH:
+ sel->patch_outputs_written |=
+ 1llu << si_shader_io_get_unique_index(name, index);
+ break;
+ default:
+ sel->outputs_written |=
+ 1llu << si_shader_io_get_unique_index(name, index);
+ }
+ }
+ break;
}
if (sscreen->b.debug_flags & DBG_PRECOMPILE)
@@ -531,6 +739,18 @@ static void *si_create_vs_state(struct pipe_context *ctx,
return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
}
+static void *si_create_tcs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_CTRL);
+}
+
+static void *si_create_tes_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ return si_create_shader_state(ctx, state, PIPE_SHADER_TESS_EVAL);
+}
+
static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
@@ -540,20 +760,58 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
return;
sctx->vs_shader = sel;
- sctx->clip_regs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
}
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state;
+ bool enable_changed = !!sctx->gs_shader != !!sel;
if (sctx->gs_shader == sel)
return;
sctx->gs_shader = sel;
- sctx->clip_regs.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+
+ if (enable_changed)
+ si_shader_change_notify(sctx);
+}
+
+static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = state;
+ bool enable_changed = !!sctx->tcs_shader != !!sel;
+
+ if (sctx->tcs_shader == sel)
+ return;
+
+ sctx->tcs_shader = sel;
+
+ if (enable_changed)
+ sctx->last_tcs = NULL; /* invalidate derived tess state */
+}
+
+static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = state;
+ bool enable_changed = !!sctx->tes_shader != !!sel;
+
+ if (sctx->tes_shader == sel)
+ return;
+
+ sctx->tes_shader = sel;
+ si_mark_atom_dirty(sctx, &sctx->clip_regs);
+ sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+
+ if (enable_changed) {
+ si_shader_change_notify(sctx);
+ sctx->last_tes_sh_base = -1; /* invalidate derived tess state */
+ }
}
static void si_make_dummy_ps(struct si_context *sctx)
@@ -594,7 +852,18 @@ static void si_delete_shader_selector(struct pipe_context *ctx,
c = p->next_variant;
switch (sel->type) {
case PIPE_SHADER_VERTEX:
- if (p->key.vs.as_es)
+ if (p->key.vs.as_ls)
+ si_pm4_delete_state(sctx, ls, p->pm4);
+ else if (p->key.vs.as_es)
+ si_pm4_delete_state(sctx, es, p->pm4);
+ else
+ si_pm4_delete_state(sctx, vs, p->pm4);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ si_pm4_delete_state(sctx, hs, p->pm4);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ if (p->key.tes.as_es)
si_pm4_delete_state(sctx, es, p->pm4);
else
si_pm4_delete_state(sctx, vs, p->pm4);
@@ -653,6 +922,30 @@ static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
si_delete_shader_selector(ctx, sel);
}
+static void si_delete_tcs_shader(struct pipe_context *ctx, void *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = (struct si_shader_selector *)state;
+
+ if (sctx->tcs_shader == sel) {
+ sctx->tcs_shader = NULL;
+ }
+
+ si_delete_shader_selector(ctx, sel);
+}
+
+static void si_delete_tes_shader(struct pipe_context *ctx, void *state)
+{
+ struct si_context *sctx = (struct si_context *)ctx;
+ struct si_shader_selector *sel = (struct si_shader_selector *)state;
+
+ if (sctx->tes_shader == sel) {
+ sctx->tes_shader = NULL;
+ }
+
+ si_delete_shader_selector(ctx, sel);
+}
+
static void si_update_spi_map(struct si_context *sctx)
{
struct si_shader *ps = sctx->ps_shader->current;
@@ -694,7 +987,10 @@ bcolor:
}
}
- if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
+ if (name == TGSI_SEMANTIC_PRIMID)
+ /* PrimID is written after the last output. */
+ tmp |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]);
+ else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(tmp)) {
/* No corresponding output found, load defaults into input.
* Don't set any other bits.
* (FLAT_SHADE=1 completely changes behavior) */
@@ -720,7 +1016,7 @@ bcolor:
static void si_init_gs_rings(struct si_context *sctx)
{
unsigned esgs_ring_size = 128 * 1024;
- unsigned gsvs_ring_size = 64 * 1024 * 1024;
+ unsigned gsvs_ring_size = 60 * 1024 * 1024;
assert(!sctx->gs_rings);
sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
@@ -732,6 +1028,12 @@ static void si_init_gs_rings(struct si_context *sctx)
PIPE_USAGE_DEFAULT, gsvs_ring_size);
if (sctx->b.chip_class >= CIK) {
+ if (sctx->b.chip_class >= VI) {
+ /* The maximum sizes are 63.999 MB on VI, because
+ * the register fields only have 18 bits. */
+ assert(esgs_ring_size / 256 < (1 << 18));
+ assert(gsvs_ring_size / 256 < (1 << 18));
+ }
si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
esgs_ring_size / 256);
si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,
@@ -745,15 +1047,42 @@ static void si_init_gs_rings(struct si_context *sctx)
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
sctx->esgs_ring, 0, esgs_ring_size,
- true, true, 4, 64);
+ true, true, 4, 64, 0);
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
sctx->esgs_ring, 0, esgs_ring_size,
- false, false, 0, 0);
+ false, false, 0, 0, 0);
si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
sctx->gsvs_ring, 0, gsvs_ring_size,
- false, false, 0, 0);
+ false, false, 0, 0, 0);
}
+static void si_update_gs_rings(struct si_context *sctx)
+{
+ unsigned gs_vert_itemsize = sctx->gs_shader->info.num_outputs * 16;
+ unsigned gs_max_vert_out = sctx->gs_shader->gs_max_out_vertices;
+ unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
+ uint64_t offset;
+
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, 0);
+
+ offset = gsvs_itemsize * 64;
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_1,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, offset);
+
+ offset = (gsvs_itemsize * 2) * 64;
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_2,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, offset);
+
+ offset = (gsvs_itemsize * 3) * 64;
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_3,
+ sctx->gsvs_ring, gsvs_itemsize,
+ 64, true, true, 4, 16, offset);
+
+}
/**
* @returns 1 if \p sel has been updated to use a new scratch buffer and 0
* otherwise.
@@ -763,7 +1092,6 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
{
struct si_shader *shader;
uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
- unsigned char *ptr;
if (!sel)
return 0;
@@ -784,12 +1112,7 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
/* Replace the shader bo with a new bo that has the relocs applied. */
- r600_resource_reference(&shader->bo, NULL);
- shader->bo = si_resource_create_custom(&sctx->screen->b.b, PIPE_USAGE_IMMUTABLE,
- shader->binary.code_size);
- ptr = sctx->screen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE);
- util_memcpy_cpu_to_le32(ptr, shader->binary.code, shader->binary.code_size);
- sctx->screen->b.ws->buffer_unmap(shader->bo->cs_buf);
+ si_shader_binary_upload(sctx->screen, shader);
/* Update the shader state to use the new shader bo. */
si_shader_init_pm4_state(shader);
@@ -818,10 +1141,14 @@ static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_context *sctx,
static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
{
+ unsigned bytes = 0;
- return MAX3(si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader),
- si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader),
- si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tcs_shader));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx, sctx->tes_shader));
+ return bytes;
}
static void si_update_spi_tmpring_size(struct si_context *sctx)
@@ -855,15 +1182,29 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
if (si_update_scratch_buffer(sctx, sctx->gs_shader))
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+ if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
+ si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
- /* VS can be bound as ES or VS. */
- if (sctx->gs_shader) {
+ /* VS can be bound as LS, ES, or VS. */
+ if (sctx->tes_shader) {
+ if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+ si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
+ } else if (sctx->gs_shader) {
if (si_update_scratch_buffer(sctx, sctx->vs_shader))
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
} else {
if (si_update_scratch_buffer(sctx, sctx->vs_shader))
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
}
+
+ /* TES can be bound as ES or VS. */
+ if (sctx->gs_shader) {
+ if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+ si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+ } else {
+ if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+ si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+ }
}
/* The LLVM shader backend should be reporting aligned scratch_sizes. */
@@ -874,60 +1215,187 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
}
+static void si_init_tess_factor_ring(struct si_context *sctx)
+{
+ assert(!sctx->tf_state);
+ sctx->tf_state = CALLOC_STRUCT(si_pm4_state);
+
+ sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
+ PIPE_USAGE_DEFAULT,
+ 32768 * sctx->screen->b.info.max_se);
+ sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
+ sctx->tf_ring->width0, fui(0), false);
+ assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
+
+ if (sctx->b.chip_class >= CIK) {
+ si_pm4_set_reg(sctx->tf_state, R_030938_VGT_TF_RING_SIZE,
+ S_030938_SIZE(sctx->tf_ring->width0 / 4));
+ si_pm4_set_reg(sctx->tf_state, R_030940_VGT_TF_MEMORY_BASE,
+ r600_resource(sctx->tf_ring)->gpu_address >> 8);
+ } else {
+ si_pm4_set_reg(sctx->tf_state, R_008988_VGT_TF_RING_SIZE,
+ S_008988_SIZE(sctx->tf_ring->width0 / 4));
+ si_pm4_set_reg(sctx->tf_state, R_0089B8_VGT_TF_MEMORY_BASE,
+ r600_resource(sctx->tf_ring)->gpu_address >> 8);
+ }
+ si_pm4_add_bo(sctx->tf_state, r600_resource(sctx->tf_ring),
+ RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+ si_pm4_bind_state(sctx, tf_ring, sctx->tf_state);
+
+ si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
+ SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
+ sctx->tf_ring->width0, false, false, 0, 0, 0);
+
+ sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
+}
+
+/**
+ * This is used when TCS is NULL in the VS->TCS->TES chain. In this case,
+ * VS passes its outputs to TES directly, so the fixed-function shader only
+ * has to write TESSOUTER and TESSINNER.
+ */
+static void si_generate_fixed_func_tcs(struct si_context *sctx)
+{
+ struct ureg_src const0, const1;
+ struct ureg_dst tessouter, tessinner;
+ struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_TESS_CTRL);
+
+ if (!ureg)
+ return; /* if we get here, we're screwed */
+
+ assert(!sctx->fixed_func_tcs_shader);
+
+ ureg_DECL_constant2D(ureg, 0, 1, SI_DRIVER_STATE_CONST_BUF);
+ const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
+ SI_DRIVER_STATE_CONST_BUF);
+ const1 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1),
+ SI_DRIVER_STATE_CONST_BUF);
+
+ tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
+ tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);
+
+ ureg_MOV(ureg, tessouter, const0);
+ ureg_MOV(ureg, tessinner, const1);
+ ureg_END(ureg);
+
+ sctx->fixed_func_tcs_shader =
+ ureg_create_shader_and_destroy(ureg, &sctx->b.b);
+ assert(sctx->fixed_func_tcs_shader);
+}
+
+static void si_update_vgt_shader_config(struct si_context *sctx)
+{
+ /* Calculate the index of the config.
+ * 0 = VS, 1 = VS+GS, 2 = VS+Tess, 3 = VS+Tess+GS */
+ unsigned index = 2*!!sctx->tes_shader + !!sctx->gs_shader;
+ struct si_pm4_state **pm4 = &sctx->vgt_shader_config[index];
+
+ if (!*pm4) {
+ uint32_t stages = 0;
+
+ *pm4 = CALLOC_STRUCT(si_pm4_state);
+
+ if (sctx->tes_shader) {
+ stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
+ S_028B54_HS_EN(1);
+
+ if (sctx->gs_shader)
+ stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
+ S_028B54_GS_EN(1) |
+ S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+ else
+ stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
+ } else if (sctx->gs_shader) {
+ stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
+ S_028B54_GS_EN(1) |
+ S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+ }
+
+ si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
+ }
+ si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
+}
+
+static void si_update_so(struct si_context *sctx, struct si_shader_selector *shader)
+{
+ struct pipe_stream_output_info *so = &shader->so;
+ uint32_t enabled_stream_buffers_mask = 0;
+ int i;
+
+ for (i = 0; i < so->num_outputs; i++)
+ enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << (so->output[i].stream * 4);
+ sctx->b.streamout.enabled_stream_buffers_mask = enabled_stream_buffers_mask;
+ sctx->b.streamout.stride_in_dw = shader->so.stride;
+}
+
void si_update_shaders(struct si_context *sctx)
{
struct pipe_context *ctx = (struct pipe_context*)sctx;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+ /* Update stages before GS. */
+ if (sctx->tes_shader) {
+ if (!sctx->tf_state)
+ si_init_tess_factor_ring(sctx);
+
+ /* VS as LS */
+ si_shader_select(ctx, sctx->vs_shader);
+ si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
+
+ if (sctx->tcs_shader) {
+ si_shader_select(ctx, sctx->tcs_shader);
+ si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
+ } else {
+ if (!sctx->fixed_func_tcs_shader)
+ si_generate_fixed_func_tcs(sctx);
+ si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+ si_pm4_bind_state(sctx, hs,
+ sctx->fixed_func_tcs_shader->current->pm4);
+ }
+
+ si_shader_select(ctx, sctx->tes_shader);
+ if (sctx->gs_shader) {
+ /* TES as ES */
+ si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
+ } else {
+ /* TES as VS */
+ si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
+ si_update_so(sctx, sctx->tes_shader);
+ }
+ } else if (sctx->gs_shader) {
+ /* VS as ES */
+ si_shader_select(ctx, sctx->vs_shader);
+ si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+ } else {
+ /* VS as VS */
+ si_shader_select(ctx, sctx->vs_shader);
+ si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+ si_update_so(sctx, sctx->vs_shader);
+ }
+
+ /* Update GS. */
if (sctx->gs_shader) {
si_shader_select(ctx, sctx->gs_shader);
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
-
- sctx->b.streamout.stride_in_dw = sctx->gs_shader->so.stride;
-
- si_shader_select(ctx, sctx->vs_shader);
- si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
+ si_update_so(sctx, sctx->gs_shader);
if (!sctx->gs_rings)
si_init_gs_rings(sctx);
+
if (sctx->emitted.named.gs_rings != sctx->gs_rings)
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
- si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
- sctx->gsvs_ring,
- sctx->gs_shader->gs_max_out_vertices *
- sctx->gs_shader->info.num_outputs * 16,
- 64, true, true, 4, 16);
-
- if (!sctx->gs_on) {
- sctx->gs_on = CALLOC_STRUCT(si_pm4_state);
-
- si_pm4_set_reg(sctx->gs_on, R_028B54_VGT_SHADER_STAGES_EN,
- S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
- S_028B54_GS_EN(1) |
- S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER));
- }
- si_pm4_bind_state(sctx, gs_onoff, sctx->gs_on);
+ si_update_gs_rings(sctx);
} else {
- si_shader_select(ctx, sctx->vs_shader);
- si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
-
- sctx->b.streamout.stride_in_dw = sctx->vs_shader->so.stride;
-
- if (!sctx->gs_off) {
- sctx->gs_off = CALLOC_STRUCT(si_pm4_state);
-
- si_pm4_set_reg(sctx->gs_off, R_028A40_VGT_GS_MODE, 0);
- si_pm4_set_reg(sctx->gs_off, R_028B54_VGT_SHADER_STAGES_EN, 0);
- }
- si_pm4_bind_state(sctx, gs_onoff, sctx->gs_off);
si_pm4_bind_state(sctx, gs_rings, NULL);
si_pm4_bind_state(sctx, gs, NULL);
si_pm4_bind_state(sctx, es, NULL);
}
+ si_update_vgt_shader_config(sctx);
+
si_shader_select(ctx, sctx->ps_shader);
if (!sctx->ps_shader->current) {
@@ -957,29 +1425,35 @@ void si_update_shaders(struct si_context *sctx)
if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
if (sctx->smoothing_enabled != sctx->ps_shader->current->key.ps.poly_line_smoothing) {
sctx->smoothing_enabled = sctx->ps_shader->current->key.ps.poly_line_smoothing;
- sctx->msaa_config.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
if (sctx->b.chip_class == SI)
- sctx->db_render_state.dirty = true;
+ si_mark_atom_dirty(sctx, &sctx->db_render_state);
}
}
void si_init_shader_functions(struct si_context *sctx)
{
sctx->b.b.create_vs_state = si_create_vs_state;
+ sctx->b.b.create_tcs_state = si_create_tcs_state;
+ sctx->b.b.create_tes_state = si_create_tes_state;
sctx->b.b.create_gs_state = si_create_gs_state;
sctx->b.b.create_fs_state = si_create_fs_state;
sctx->b.b.bind_vs_state = si_bind_vs_shader;
+ sctx->b.b.bind_tcs_state = si_bind_tcs_shader;
+ sctx->b.b.bind_tes_state = si_bind_tes_shader;
sctx->b.b.bind_gs_state = si_bind_gs_shader;
sctx->b.b.bind_fs_state = si_bind_ps_shader;
sctx->b.b.delete_vs_state = si_delete_vs_shader;
+ sctx->b.b.delete_tcs_state = si_delete_tcs_shader;
+ sctx->b.b.delete_tes_state = si_delete_tes_shader;
sctx->b.b.delete_gs_state = si_delete_gs_shader;
sctx->b.b.delete_fs_state = si_delete_ps_shader;
}
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 35d5ee232a0..66fdf35c8af 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -206,6 +206,398 @@
* 6. COMMAND [29:22] | BYTE_COUNT [20:0]
*/
+
+#define R_000E4C_SRBM_STATUS2 0x000E4C
+#define S_000E4C_SDMA_RQ_PENDING(x) (((x) & 0x1) << 0)
+#define G_000E4C_SDMA_RQ_PENDING(x) (((x) >> 0) & 0x1)
+#define C_000E4C_SDMA_RQ_PENDING 0xFFFFFFFE
+#define S_000E4C_TST_RQ_PENDING(x) (((x) & 0x1) << 1)
+#define G_000E4C_TST_RQ_PENDING(x) (((x) >> 1) & 0x1)
+#define C_000E4C_TST_RQ_PENDING 0xFFFFFFFD
+#define S_000E4C_SDMA1_RQ_PENDING(x) (((x) & 0x1) << 2)
+#define G_000E4C_SDMA1_RQ_PENDING(x) (((x) >> 2) & 0x1)
+#define C_000E4C_SDMA1_RQ_PENDING 0xFFFFFFFB
+#define S_000E4C_VCE0_RQ_PENDING(x) (((x) & 0x1) << 3)
+#define G_000E4C_VCE0_RQ_PENDING(x) (((x) >> 3) & 0x1)
+#define C_000E4C_VCE0_RQ_PENDING 0xFFFFFFF7
+#define S_000E4C_VP8_BUSY(x) (((x) & 0x1) << 4)
+#define G_000E4C_VP8_BUSY(x) (((x) >> 4) & 0x1)
+#define C_000E4C_VP8_BUSY 0xFFFFFFEF
+#define S_000E4C_SDMA_BUSY(x) (((x) & 0x1) << 5)
+#define G_000E4C_SDMA_BUSY(x) (((x) >> 5) & 0x1)
+#define C_000E4C_SDMA_BUSY 0xFFFFFFDF
+#define S_000E4C_SDMA1_BUSY(x) (((x) & 0x1) << 6)
+#define G_000E4C_SDMA1_BUSY(x) (((x) >> 6) & 0x1)
+#define C_000E4C_SDMA1_BUSY 0xFFFFFFBF
+#define S_000E4C_VCE0_BUSY(x) (((x) & 0x1) << 7)
+#define G_000E4C_VCE0_BUSY(x) (((x) >> 7) & 0x1)
+#define C_000E4C_VCE0_BUSY 0xFFFFFF7F
+#define S_000E4C_XDMA_BUSY(x) (((x) & 0x1) << 8)
+#define G_000E4C_XDMA_BUSY(x) (((x) >> 8) & 0x1)
+#define C_000E4C_XDMA_BUSY 0xFFFFFEFF
+#define S_000E4C_CHUB_BUSY(x) (((x) & 0x1) << 9)
+#define G_000E4C_CHUB_BUSY(x) (((x) >> 9) & 0x1)
+#define C_000E4C_CHUB_BUSY 0xFFFFFDFF
+#define S_000E4C_SDMA2_BUSY(x) (((x) & 0x1) << 10)
+#define G_000E4C_SDMA2_BUSY(x) (((x) >> 10) & 0x1)
+#define C_000E4C_SDMA2_BUSY 0xFFFFFBFF
+#define S_000E4C_SDMA3_BUSY(x) (((x) & 0x1) << 11)
+#define G_000E4C_SDMA3_BUSY(x) (((x) >> 11) & 0x1)
+#define C_000E4C_SDMA3_BUSY 0xFFFFF7FF
+#define S_000E4C_SAMSCP_BUSY(x) (((x) & 0x1) << 12)
+#define G_000E4C_SAMSCP_BUSY(x) (((x) >> 12) & 0x1)
+#define C_000E4C_SAMSCP_BUSY 0xFFFFEFFF
+#define S_000E4C_ISP_BUSY(x) (((x) & 0x1) << 13)
+#define G_000E4C_ISP_BUSY(x) (((x) >> 13) & 0x1)
+#define C_000E4C_ISP_BUSY 0xFFFFDFFF
+#define S_000E4C_VCE1_BUSY(x) (((x) & 0x1) << 14)
+#define G_000E4C_VCE1_BUSY(x) (((x) >> 14) & 0x1)
+#define C_000E4C_VCE1_BUSY 0xFFFFBFFF
+#define S_000E4C_ODE_BUSY(x) (((x) & 0x1) << 15)
+#define G_000E4C_ODE_BUSY(x) (((x) >> 15) & 0x1)
+#define C_000E4C_ODE_BUSY 0xFFFF7FFF
+#define S_000E4C_SDMA2_RQ_PENDING(x) (((x) & 0x1) << 16)
+#define G_000E4C_SDMA2_RQ_PENDING(x) (((x) >> 16) & 0x1)
+#define C_000E4C_SDMA2_RQ_PENDING 0xFFFEFFFF
+#define S_000E4C_SDMA3_RQ_PENDING(x) (((x) & 0x1) << 17)
+#define G_000E4C_SDMA3_RQ_PENDING(x) (((x) >> 17) & 0x1)
+#define C_000E4C_SDMA3_RQ_PENDING 0xFFFDFFFF
+#define S_000E4C_SAMSCP_RQ_PENDING(x) (((x) & 0x1) << 18)
+#define G_000E4C_SAMSCP_RQ_PENDING(x) (((x) >> 18) & 0x1)
+#define C_000E4C_SAMSCP_RQ_PENDING 0xFFFBFFFF
+#define S_000E4C_ISP_RQ_PENDING(x) (((x) & 0x1) << 19)
+#define G_000E4C_ISP_RQ_PENDING(x) (((x) >> 19) & 0x1)
+#define C_000E4C_ISP_RQ_PENDING 0xFFF7FFFF
+#define S_000E4C_VCE1_RQ_PENDING(x) (((x) & 0x1) << 20)
+#define G_000E4C_VCE1_RQ_PENDING(x) (((x) >> 20) & 0x1)
+#define C_000E4C_VCE1_RQ_PENDING 0xFFEFFFFF
+#define R_000E50_SRBM_STATUS 0x000E50
+#define S_000E50_UVD_RQ_PENDING(x) (((x) & 0x1) << 1)
+#define G_000E50_UVD_RQ_PENDING(x) (((x) >> 1) & 0x1)
+#define C_000E50_UVD_RQ_PENDING 0xFFFFFFFD
+#define S_000E50_SAMMSP_RQ_PENDING(x) (((x) & 0x1) << 2)
+#define G_000E50_SAMMSP_RQ_PENDING(x) (((x) >> 2) & 0x1)
+#define C_000E50_SAMMSP_RQ_PENDING 0xFFFFFFFB
+#define S_000E50_ACP_RQ_PENDING(x) (((x) & 0x1) << 3)
+#define G_000E50_ACP_RQ_PENDING(x) (((x) >> 3) & 0x1)
+#define C_000E50_ACP_RQ_PENDING 0xFFFFFFF7
+#define S_000E50_SMU_RQ_PENDING(x) (((x) & 0x1) << 4)
+#define G_000E50_SMU_RQ_PENDING(x) (((x) >> 4) & 0x1)
+#define C_000E50_SMU_RQ_PENDING 0xFFFFFFEF
+#define S_000E50_GRBM_RQ_PENDING(x) (((x) & 0x1) << 5)
+#define G_000E50_GRBM_RQ_PENDING(x) (((x) >> 5) & 0x1)
+#define C_000E50_GRBM_RQ_PENDING 0xFFFFFFDF
+#define S_000E50_HI_RQ_PENDING(x) (((x) & 0x1) << 6)
+#define G_000E50_HI_RQ_PENDING(x) (((x) >> 6) & 0x1)
+#define C_000E50_HI_RQ_PENDING 0xFFFFFFBF
+#define S_000E50_VMC_BUSY(x) (((x) & 0x1) << 8)
+#define G_000E50_VMC_BUSY(x) (((x) >> 8) & 0x1)
+#define C_000E50_VMC_BUSY 0xFFFFFEFF
+#define S_000E50_MCB_BUSY(x) (((x) & 0x1) << 9)
+#define G_000E50_MCB_BUSY(x) (((x) >> 9) & 0x1)
+#define C_000E50_MCB_BUSY 0xFFFFFDFF
+#define S_000E50_MCB_NON_DISPLAY_BUSY(x) (((x) & 0x1) << 10)
+#define G_000E50_MCB_NON_DISPLAY_BUSY(x) (((x) >> 10) & 0x1)
+#define C_000E50_MCB_NON_DISPLAY_BUSY 0xFFFFFBFF
+#define S_000E50_MCC_BUSY(x) (((x) & 0x1) << 11)
+#define G_000E50_MCC_BUSY(x) (((x) >> 11) & 0x1)
+#define C_000E50_MCC_BUSY 0xFFFFF7FF
+#define S_000E50_MCD_BUSY(x) (((x) & 0x1) << 12)
+#define G_000E50_MCD_BUSY(x) (((x) >> 12) & 0x1)
+#define C_000E50_MCD_BUSY 0xFFFFEFFF
+#define S_000E50_VMC1_BUSY(x) (((x) & 0x1) << 13)
+#define G_000E50_VMC1_BUSY(x) (((x) >> 13) & 0x1)
+#define C_000E50_VMC1_BUSY 0xFFFFDFFF
+#define S_000E50_SEM_BUSY(x) (((x) & 0x1) << 14)
+#define G_000E50_SEM_BUSY(x) (((x) >> 14) & 0x1)
+#define C_000E50_SEM_BUSY 0xFFFFBFFF
+#define S_000E50_ACP_BUSY(x) (((x) & 0x1) << 16)
+#define G_000E50_ACP_BUSY(x) (((x) >> 16) & 0x1)
+#define C_000E50_ACP_BUSY 0xFFFEFFFF
+#define S_000E50_IH_BUSY(x) (((x) & 0x1) << 17)
+#define G_000E50_IH_BUSY(x) (((x) >> 17) & 0x1)
+#define C_000E50_IH_BUSY 0xFFFDFFFF
+#define S_000E50_UVD_BUSY(x) (((x) & 0x1) << 19)
+#define G_000E50_UVD_BUSY(x) (((x) >> 19) & 0x1)
+#define C_000E50_UVD_BUSY 0xFFF7FFFF
+#define S_000E50_SAMMSP_BUSY(x) (((x) & 0x1) << 20)
+#define G_000E50_SAMMSP_BUSY(x) (((x) >> 20) & 0x1)
+#define C_000E50_SAMMSP_BUSY 0xFFEFFFFF
+#define S_000E50_GCATCL2_BUSY(x) (((x) & 0x1) << 21)
+#define G_000E50_GCATCL2_BUSY(x) (((x) >> 21) & 0x1)
+#define C_000E50_GCATCL2_BUSY 0xFFDFFFFF
+#define S_000E50_OSATCL2_BUSY(x) (((x) & 0x1) << 22)
+#define G_000E50_OSATCL2_BUSY(x) (((x) >> 22) & 0x1)
+#define C_000E50_OSATCL2_BUSY 0xFFBFFFFF
+#define S_000E50_BIF_BUSY(x) (((x) & 0x1) << 29)
+#define G_000E50_BIF_BUSY(x) (((x) >> 29) & 0x1)
+#define C_000E50_BIF_BUSY 0xDFFFFFFF
+#define R_000E54_SRBM_STATUS3 0x000E54
+#define S_000E54_MCC0_BUSY(x) (((x) & 0x1) << 0)
+#define G_000E54_MCC0_BUSY(x) (((x) >> 0) & 0x1)
+#define C_000E54_MCC0_BUSY 0xFFFFFFFE
+#define S_000E54_MCC1_BUSY(x) (((x) & 0x1) << 1)
+#define G_000E54_MCC1_BUSY(x) (((x) >> 1) & 0x1)
+#define C_000E54_MCC1_BUSY 0xFFFFFFFD
+#define S_000E54_MCC2_BUSY(x) (((x) & 0x1) << 2)
+#define G_000E54_MCC2_BUSY(x) (((x) >> 2) & 0x1)
+#define C_000E54_MCC2_BUSY 0xFFFFFFFB
+#define S_000E54_MCC3_BUSY(x) (((x) & 0x1) << 3)
+#define G_000E54_MCC3_BUSY(x) (((x) >> 3) & 0x1)
+#define C_000E54_MCC3_BUSY 0xFFFFFFF7
+#define S_000E54_MCC4_BUSY(x) (((x) & 0x1) << 4)
+#define G_000E54_MCC4_BUSY(x) (((x) >> 4) & 0x1)
+#define C_000E54_MCC4_BUSY 0xFFFFFFEF
+#define S_000E54_MCC5_BUSY(x) (((x) & 0x1) << 5)
+#define G_000E54_MCC5_BUSY(x) (((x) >> 5) & 0x1)
+#define C_000E54_MCC5_BUSY 0xFFFFFFDF
+#define S_000E54_MCC6_BUSY(x) (((x) & 0x1) << 6)
+#define G_000E54_MCC6_BUSY(x) (((x) >> 6) & 0x1)
+#define C_000E54_MCC6_BUSY 0xFFFFFFBF
+#define S_000E54_MCC7_BUSY(x) (((x) & 0x1) << 7)
+#define G_000E54_MCC7_BUSY(x) (((x) >> 7) & 0x1)
+#define C_000E54_MCC7_BUSY 0xFFFFFF7F
+#define S_000E54_MCD0_BUSY(x) (((x) & 0x1) << 8)
+#define G_000E54_MCD0_BUSY(x) (((x) >> 8) & 0x1)
+#define C_000E54_MCD0_BUSY 0xFFFFFEFF
+#define S_000E54_MCD1_BUSY(x) (((x) & 0x1) << 9)
+#define G_000E54_MCD1_BUSY(x) (((x) >> 9) & 0x1)
+#define C_000E54_MCD1_BUSY 0xFFFFFDFF
+#define S_000E54_MCD2_BUSY(x) (((x) & 0x1) << 10)
+#define G_000E54_MCD2_BUSY(x) (((x) >> 10) & 0x1)
+#define C_000E54_MCD2_BUSY 0xFFFFFBFF
+#define S_000E54_MCD3_BUSY(x) (((x) & 0x1) << 11)
+#define G_000E54_MCD3_BUSY(x) (((x) >> 11) & 0x1)
+#define C_000E54_MCD3_BUSY 0xFFFFF7FF
+#define S_000E54_MCD4_BUSY(x) (((x) & 0x1) << 12)
+#define G_000E54_MCD4_BUSY(x) (((x) >> 12) & 0x1)
+#define C_000E54_MCD4_BUSY 0xFFFFEFFF
+#define S_000E54_MCD5_BUSY(x) (((x) & 0x1) << 13)
+#define G_000E54_MCD5_BUSY(x) (((x) >> 13) & 0x1)
+#define C_000E54_MCD5_BUSY 0xFFFFDFFF
+#define S_000E54_MCD6_BUSY(x) (((x) & 0x1) << 14)
+#define G_000E54_MCD6_BUSY(x) (((x) >> 14) & 0x1)
+#define C_000E54_MCD6_BUSY 0xFFFFBFFF
+#define S_000E54_MCD7_BUSY(x) (((x) & 0x1) << 15)
+#define G_000E54_MCD7_BUSY(x) (((x) >> 15) & 0x1)
+#define C_000E54_MCD7_BUSY 0xFFFF7FFF
+#define R_00D034_SDMA0_STATUS_REG 0x00D034
+#define S_00D034_IDLE(x) (((x) & 0x1) << 0)
+#define G_00D034_IDLE(x) (((x) >> 0) & 0x1)
+#define C_00D034_IDLE 0xFFFFFFFE
+#define S_00D034_REG_IDLE(x) (((x) & 0x1) << 1)
+#define G_00D034_REG_IDLE(x) (((x) >> 1) & 0x1)
+#define C_00D034_REG_IDLE 0xFFFFFFFD
+#define S_00D034_RB_EMPTY(x) (((x) & 0x1) << 2)
+#define G_00D034_RB_EMPTY(x) (((x) >> 2) & 0x1)
+#define C_00D034_RB_EMPTY 0xFFFFFFFB
+#define S_00D034_RB_FULL(x) (((x) & 0x1) << 3)
+#define G_00D034_RB_FULL(x) (((x) >> 3) & 0x1)
+#define C_00D034_RB_FULL 0xFFFFFFF7
+#define S_00D034_RB_CMD_IDLE(x) (((x) & 0x1) << 4)
+#define G_00D034_RB_CMD_IDLE(x) (((x) >> 4) & 0x1)
+#define C_00D034_RB_CMD_IDLE 0xFFFFFFEF
+#define S_00D034_RB_CMD_FULL(x) (((x) & 0x1) << 5)
+#define G_00D034_RB_CMD_FULL(x) (((x) >> 5) & 0x1)
+#define C_00D034_RB_CMD_FULL 0xFFFFFFDF
+#define S_00D034_IB_CMD_IDLE(x) (((x) & 0x1) << 6)
+#define G_00D034_IB_CMD_IDLE(x) (((x) >> 6) & 0x1)
+#define C_00D034_IB_CMD_IDLE 0xFFFFFFBF
+#define S_00D034_IB_CMD_FULL(x) (((x) & 0x1) << 7)
+#define G_00D034_IB_CMD_FULL(x) (((x) >> 7) & 0x1)
+#define C_00D034_IB_CMD_FULL 0xFFFFFF7F
+#define S_00D034_BLOCK_IDLE(x) (((x) & 0x1) << 8)
+#define G_00D034_BLOCK_IDLE(x) (((x) >> 8) & 0x1)
+#define C_00D034_BLOCK_IDLE 0xFFFFFEFF
+#define S_00D034_INSIDE_IB(x) (((x) & 0x1) << 9)
+#define G_00D034_INSIDE_IB(x) (((x) >> 9) & 0x1)
+#define C_00D034_INSIDE_IB 0xFFFFFDFF
+#define S_00D034_EX_IDLE(x) (((x) & 0x1) << 10)
+#define G_00D034_EX_IDLE(x) (((x) >> 10) & 0x1)
+#define C_00D034_EX_IDLE 0xFFFFFBFF
+#define S_00D034_EX_IDLE_POLL_TIMER_EXPIRE(x) (((x) & 0x1) << 11)
+#define G_00D034_EX_IDLE_POLL_TIMER_EXPIRE(x) (((x) >> 11) & 0x1)
+#define C_00D034_EX_IDLE_POLL_TIMER_EXPIRE 0xFFFFF7FF
+#define S_00D034_PACKET_READY(x) (((x) & 0x1) << 12)
+#define G_00D034_PACKET_READY(x) (((x) >> 12) & 0x1)
+#define C_00D034_PACKET_READY 0xFFFFEFFF
+#define S_00D034_MC_WR_IDLE(x) (((x) & 0x1) << 13)
+#define G_00D034_MC_WR_IDLE(x) (((x) >> 13) & 0x1)
+#define C_00D034_MC_WR_IDLE 0xFFFFDFFF
+#define S_00D034_SRBM_IDLE(x) (((x) & 0x1) << 14)
+#define G_00D034_SRBM_IDLE(x) (((x) >> 14) & 0x1)
+#define C_00D034_SRBM_IDLE 0xFFFFBFFF
+#define S_00D034_CONTEXT_EMPTY(x) (((x) & 0x1) << 15)
+#define G_00D034_CONTEXT_EMPTY(x) (((x) >> 15) & 0x1)
+#define C_00D034_CONTEXT_EMPTY 0xFFFF7FFF
+#define S_00D034_DELTA_RPTR_FULL(x) (((x) & 0x1) << 16)
+#define G_00D034_DELTA_RPTR_FULL(x) (((x) >> 16) & 0x1)
+#define C_00D034_DELTA_RPTR_FULL 0xFFFEFFFF
+#define S_00D034_RB_MC_RREQ_IDLE(x) (((x) & 0x1) << 17)
+#define G_00D034_RB_MC_RREQ_IDLE(x) (((x) >> 17) & 0x1)
+#define C_00D034_RB_MC_RREQ_IDLE 0xFFFDFFFF
+#define S_00D034_IB_MC_RREQ_IDLE(x) (((x) & 0x1) << 18)
+#define G_00D034_IB_MC_RREQ_IDLE(x) (((x) >> 18) & 0x1)
+#define C_00D034_IB_MC_RREQ_IDLE 0xFFFBFFFF
+#define S_00D034_MC_RD_IDLE(x) (((x) & 0x1) << 19)
+#define G_00D034_MC_RD_IDLE(x) (((x) >> 19) & 0x1)
+#define C_00D034_MC_RD_IDLE 0xFFF7FFFF
+#define S_00D034_DELTA_RPTR_EMPTY(x) (((x) & 0x1) << 20)
+#define G_00D034_DELTA_RPTR_EMPTY(x) (((x) >> 20) & 0x1)
+#define C_00D034_DELTA_RPTR_EMPTY 0xFFEFFFFF
+#define S_00D034_MC_RD_RET_STALL(x) (((x) & 0x1) << 21)
+#define G_00D034_MC_RD_RET_STALL(x) (((x) >> 21) & 0x1)
+#define C_00D034_MC_RD_RET_STALL 0xFFDFFFFF
+#define S_00D034_MC_RD_NO_POLL_IDLE(x) (((x) & 0x1) << 22)
+#define G_00D034_MC_RD_NO_POLL_IDLE(x) (((x) >> 22) & 0x1)
+#define C_00D034_MC_RD_NO_POLL_IDLE 0xFFBFFFFF
+#define S_00D034_PREV_CMD_IDLE(x) (((x) & 0x1) << 25)
+#define G_00D034_PREV_CMD_IDLE(x) (((x) >> 25) & 0x1)
+#define C_00D034_PREV_CMD_IDLE 0xFDFFFFFF
+#define S_00D034_SEM_IDLE(x) (((x) & 0x1) << 26)
+#define G_00D034_SEM_IDLE(x) (((x) >> 26) & 0x1)
+#define C_00D034_SEM_IDLE 0xFBFFFFFF
+#define S_00D034_SEM_REQ_STALL(x) (((x) & 0x1) << 27)
+#define G_00D034_SEM_REQ_STALL(x) (((x) >> 27) & 0x1)
+#define C_00D034_SEM_REQ_STALL 0xF7FFFFFF
+#define S_00D034_SEM_RESP_STATE(x) (((x) & 0x03) << 28)
+#define G_00D034_SEM_RESP_STATE(x) (((x) >> 28) & 0x03)
+#define C_00D034_SEM_RESP_STATE 0xCFFFFFFF
+#define S_00D034_INT_IDLE(x) (((x) & 0x1) << 30)
+#define G_00D034_INT_IDLE(x) (((x) >> 30) & 0x1)
+#define C_00D034_INT_IDLE 0xBFFFFFFF
+#define S_00D034_INT_REQ_STALL(x) (((x) & 0x1) << 31)
+#define G_00D034_INT_REQ_STALL(x) (((x) >> 31) & 0x1)
+#define C_00D034_INT_REQ_STALL 0x7FFFFFFF
+#define R_00D834_SDMA1_STATUS_REG 0x00D834
+#define R_008008_GRBM_STATUS2 0x008008
+#define S_008008_ME0PIPE1_CMDFIFO_AVAIL(x) (((x) & 0x0F) << 0)
+#define G_008008_ME0PIPE1_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x0F)
+#define C_008008_ME0PIPE1_CMDFIFO_AVAIL 0xFFFFFFF0
+#define S_008008_ME0PIPE1_CF_RQ_PENDING(x) (((x) & 0x1) << 4)
+#define G_008008_ME0PIPE1_CF_RQ_PENDING(x) (((x) >> 4) & 0x1)
+#define C_008008_ME0PIPE1_CF_RQ_PENDING 0xFFFFFFEF
+#define S_008008_ME0PIPE1_PF_RQ_PENDING(x) (((x) & 0x1) << 5)
+#define G_008008_ME0PIPE1_PF_RQ_PENDING(x) (((x) >> 5) & 0x1)
+#define C_008008_ME0PIPE1_PF_RQ_PENDING 0xFFFFFFDF
+#define S_008008_ME1PIPE0_RQ_PENDING(x) (((x) & 0x1) << 6)
+#define G_008008_ME1PIPE0_RQ_PENDING(x) (((x) >> 6) & 0x1)
+#define C_008008_ME1PIPE0_RQ_PENDING 0xFFFFFFBF
+#define S_008008_ME1PIPE1_RQ_PENDING(x) (((x) & 0x1) << 7)
+#define G_008008_ME1PIPE1_RQ_PENDING(x) (((x) >> 7) & 0x1)
+#define C_008008_ME1PIPE1_RQ_PENDING 0xFFFFFF7F
+#define S_008008_ME1PIPE2_RQ_PENDING(x) (((x) & 0x1) << 8)
+#define G_008008_ME1PIPE2_RQ_PENDING(x) (((x) >> 8) & 0x1)
+#define C_008008_ME1PIPE2_RQ_PENDING 0xFFFFFEFF
+#define S_008008_ME1PIPE3_RQ_PENDING(x) (((x) & 0x1) << 9)
+#define G_008008_ME1PIPE3_RQ_PENDING(x) (((x) >> 9) & 0x1)
+#define C_008008_ME1PIPE3_RQ_PENDING 0xFFFFFDFF
+#define S_008008_ME2PIPE0_RQ_PENDING(x) (((x) & 0x1) << 10)
+#define G_008008_ME2PIPE0_RQ_PENDING(x) (((x) >> 10) & 0x1)
+#define C_008008_ME2PIPE0_RQ_PENDING 0xFFFFFBFF
+#define S_008008_ME2PIPE1_RQ_PENDING(x) (((x) & 0x1) << 11)
+#define G_008008_ME2PIPE1_RQ_PENDING(x) (((x) >> 11) & 0x1)
+#define C_008008_ME2PIPE1_RQ_PENDING 0xFFFFF7FF
+#define S_008008_ME2PIPE2_RQ_PENDING(x) (((x) & 0x1) << 12)
+#define G_008008_ME2PIPE2_RQ_PENDING(x) (((x) >> 12) & 0x1)
+#define C_008008_ME2PIPE2_RQ_PENDING 0xFFFFEFFF
+#define S_008008_ME2PIPE3_RQ_PENDING(x) (((x) & 0x1) << 13)
+#define G_008008_ME2PIPE3_RQ_PENDING(x) (((x) >> 13) & 0x1)
+#define C_008008_ME2PIPE3_RQ_PENDING 0xFFFFDFFF
+#define S_008008_RLC_RQ_PENDING(x) (((x) & 0x1) << 14)
+#define G_008008_RLC_RQ_PENDING(x) (((x) >> 14) & 0x1)
+#define C_008008_RLC_RQ_PENDING 0xFFFFBFFF
+#define S_008008_RLC_BUSY(x) (((x) & 0x1) << 24)
+#define G_008008_RLC_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008008_RLC_BUSY 0xFEFFFFFF
+#define S_008008_TC_BUSY(x) (((x) & 0x1) << 25)
+#define G_008008_TC_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008008_TC_BUSY 0xFDFFFFFF
+#define S_008008_TCC_CC_RESIDENT(x) (((x) & 0x1) << 26)
+#define G_008008_TCC_CC_RESIDENT(x) (((x) >> 26) & 0x1)
+#define C_008008_TCC_CC_RESIDENT 0xFBFFFFFF
+#define S_008008_CPF_BUSY(x) (((x) & 0x1) << 28)
+#define G_008008_CPF_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008008_CPF_BUSY 0xEFFFFFFF
+#define S_008008_CPC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008008_CPC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008008_CPC_BUSY 0xDFFFFFFF
+#define S_008008_CPG_BUSY(x) (((x) & 0x1) << 30)
+#define G_008008_CPG_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008008_CPG_BUSY 0xBFFFFFFF
+#define R_008010_GRBM_STATUS 0x008010
+#define S_008010_ME0PIPE0_CMDFIFO_AVAIL(x) (((x) & 0x0F) << 0)
+#define G_008010_ME0PIPE0_CMDFIFO_AVAIL(x) (((x) >> 0) & 0x0F)
+#define C_008010_ME0PIPE0_CMDFIFO_AVAIL 0xFFFFFFF0
+#define S_008010_SRBM_RQ_PENDING(x) (((x) & 0x1) << 5)
+#define G_008010_SRBM_RQ_PENDING(x) (((x) >> 5) & 0x1)
+#define C_008010_SRBM_RQ_PENDING 0xFFFFFFDF
+#define S_008010_ME0PIPE0_CF_RQ_PENDING(x) (((x) & 0x1) << 7)
+#define G_008010_ME0PIPE0_CF_RQ_PENDING(x) (((x) >> 7) & 0x1)
+#define C_008010_ME0PIPE0_CF_RQ_PENDING 0xFFFFFF7F
+#define S_008010_ME0PIPE0_PF_RQ_PENDING(x) (((x) & 0x1) << 8)
+#define G_008010_ME0PIPE0_PF_RQ_PENDING(x) (((x) >> 8) & 0x1)
+#define C_008010_ME0PIPE0_PF_RQ_PENDING 0xFFFFFEFF
+#define S_008010_GDS_DMA_RQ_PENDING(x) (((x) & 0x1) << 9)
+#define G_008010_GDS_DMA_RQ_PENDING(x) (((x) >> 9) & 0x1)
+#define C_008010_GDS_DMA_RQ_PENDING 0xFFFFFDFF
+#define S_008010_DB_CLEAN(x) (((x) & 0x1) << 12)
+#define G_008010_DB_CLEAN(x) (((x) >> 12) & 0x1)
+#define C_008010_DB_CLEAN 0xFFFFEFFF
+#define S_008010_CB_CLEAN(x) (((x) & 0x1) << 13)
+#define G_008010_CB_CLEAN(x) (((x) >> 13) & 0x1)
+#define C_008010_CB_CLEAN 0xFFFFDFFF
+#define S_008010_TA_BUSY(x) (((x) & 0x1) << 14)
+#define G_008010_TA_BUSY(x) (((x) >> 14) & 0x1)
+#define C_008010_TA_BUSY 0xFFFFBFFF
+#define S_008010_GDS_BUSY(x) (((x) & 0x1) << 15)
+#define G_008010_GDS_BUSY(x) (((x) >> 15) & 0x1)
+#define C_008010_GDS_BUSY 0xFFFF7FFF
+#define S_008010_WD_BUSY_NO_DMA(x) (((x) & 0x1) << 16)
+#define G_008010_WD_BUSY_NO_DMA(x) (((x) >> 16) & 0x1)
+#define C_008010_WD_BUSY_NO_DMA 0xFFFEFFFF
+#define S_008010_VGT_BUSY(x) (((x) & 0x1) << 17)
+#define G_008010_VGT_BUSY(x) (((x) >> 17) & 0x1)
+#define C_008010_VGT_BUSY 0xFFFDFFFF
+#define S_008010_IA_BUSY_NO_DMA(x) (((x) & 0x1) << 18)
+#define G_008010_IA_BUSY_NO_DMA(x) (((x) >> 18) & 0x1)
+#define C_008010_IA_BUSY_NO_DMA 0xFFFBFFFF
+#define S_008010_IA_BUSY(x) (((x) & 0x1) << 19)
+#define G_008010_IA_BUSY(x) (((x) >> 19) & 0x1)
+#define C_008010_IA_BUSY 0xFFF7FFFF
+#define S_008010_SX_BUSY(x) (((x) & 0x1) << 20)
+#define G_008010_SX_BUSY(x) (((x) >> 20) & 0x1)
+#define C_008010_SX_BUSY 0xFFEFFFFF
+#define S_008010_WD_BUSY(x) (((x) & 0x1) << 21)
+#define G_008010_WD_BUSY(x) (((x) >> 21) & 0x1)
+#define C_008010_WD_BUSY 0xFFDFFFFF
+#define S_008010_SPI_BUSY(x) (((x) & 0x1) << 22)
+#define G_008010_SPI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008010_SPI_BUSY 0xFFBFFFFF
+#define S_008010_BCI_BUSY(x) (((x) & 0x1) << 23)
+#define G_008010_BCI_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008010_BCI_BUSY 0xFF7FFFFF
+#define S_008010_SC_BUSY(x) (((x) & 0x1) << 24)
+#define G_008010_SC_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008010_SC_BUSY 0xFEFFFFFF
+#define S_008010_PA_BUSY(x) (((x) & 0x1) << 25)
+#define G_008010_PA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008010_PA_BUSY 0xFDFFFFFF
+#define S_008010_DB_BUSY(x) (((x) & 0x1) << 26)
+#define G_008010_DB_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008010_DB_BUSY 0xFBFFFFFF
+#define S_008010_CP_COHERENCY_BUSY(x) (((x) & 0x1) << 28)
+#define G_008010_CP_COHERENCY_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008010_CP_COHERENCY_BUSY 0xEFFFFFFF
+#define S_008010_CP_BUSY(x) (((x) & 0x1) << 29)
+#define G_008010_CP_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008010_CP_BUSY 0xDFFFFFFF
+#define S_008010_CB_BUSY(x) (((x) & 0x1) << 30)
+#define G_008010_CB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008010_CB_BUSY 0xBFFFFFFF
+#define S_008010_GUI_ACTIVE(x) (((x) & 0x1) << 31)
+#define G_008010_GUI_ACTIVE(x) (((x) >> 31) & 0x1)
+#define C_008010_GUI_ACTIVE 0x7FFFFFFF
#define GRBM_GFX_INDEX 0x802C
#define INSTANCE_INDEX(x) ((x) << 0)
#define SH_INDEX(x) ((x) << 8)
@@ -276,12 +668,155 @@
#define C_0085F0_SH_ICACHE_ACTION_ENA 0xDFFFFFFF
#define R_0085F4_CP_COHER_SIZE 0x0085F4
#define R_0085F8_CP_COHER_BASE 0x0085F8
-
+#define R_008014_GRBM_STATUS_SE0 0x008014
+#define S_008014_DB_CLEAN(x) (((x) & 0x1) << 1)
+#define G_008014_DB_CLEAN(x) (((x) >> 1) & 0x1)
+#define C_008014_DB_CLEAN 0xFFFFFFFD
+#define S_008014_CB_CLEAN(x) (((x) & 0x1) << 2)
+#define G_008014_CB_CLEAN(x) (((x) >> 2) & 0x1)
+#define C_008014_CB_CLEAN 0xFFFFFFFB
+#define S_008014_BCI_BUSY(x) (((x) & 0x1) << 22)
+#define G_008014_BCI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008014_BCI_BUSY 0xFFBFFFFF
+#define S_008014_VGT_BUSY(x) (((x) & 0x1) << 23)
+#define G_008014_VGT_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008014_VGT_BUSY 0xFF7FFFFF
+#define S_008014_PA_BUSY(x) (((x) & 0x1) << 24)
+#define G_008014_PA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008014_PA_BUSY 0xFEFFFFFF
+#define S_008014_TA_BUSY(x) (((x) & 0x1) << 25)
+#define G_008014_TA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008014_TA_BUSY 0xFDFFFFFF
+#define S_008014_SX_BUSY(x) (((x) & 0x1) << 26)
+#define G_008014_SX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008014_SX_BUSY 0xFBFFFFFF
+#define S_008014_SPI_BUSY(x) (((x) & 0x1) << 27)
+#define G_008014_SPI_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008014_SPI_BUSY 0xF7FFFFFF
+#define S_008014_SC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008014_SC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008014_SC_BUSY 0xDFFFFFFF
+#define S_008014_DB_BUSY(x) (((x) & 0x1) << 30)
+#define G_008014_DB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008014_DB_BUSY 0xBFFFFFFF
+#define S_008014_CB_BUSY(x) (((x) & 0x1) << 31)
+#define G_008014_CB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008014_CB_BUSY 0x7FFFFFFF
+#define R_008018_GRBM_STATUS_SE1 0x008018
+#define S_008018_DB_CLEAN(x) (((x) & 0x1) << 1)
+#define G_008018_DB_CLEAN(x) (((x) >> 1) & 0x1)
+#define C_008018_DB_CLEAN 0xFFFFFFFD
+#define S_008018_CB_CLEAN(x) (((x) & 0x1) << 2)
+#define G_008018_CB_CLEAN(x) (((x) >> 2) & 0x1)
+#define C_008018_CB_CLEAN 0xFFFFFFFB
+#define S_008018_BCI_BUSY(x) (((x) & 0x1) << 22)
+#define G_008018_BCI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008018_BCI_BUSY 0xFFBFFFFF
+#define S_008018_VGT_BUSY(x) (((x) & 0x1) << 23)
+#define G_008018_VGT_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008018_VGT_BUSY 0xFF7FFFFF
+#define S_008018_PA_BUSY(x) (((x) & 0x1) << 24)
+#define G_008018_PA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008018_PA_BUSY 0xFEFFFFFF
+#define S_008018_TA_BUSY(x) (((x) & 0x1) << 25)
+#define G_008018_TA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008018_TA_BUSY 0xFDFFFFFF
+#define S_008018_SX_BUSY(x) (((x) & 0x1) << 26)
+#define G_008018_SX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008018_SX_BUSY 0xFBFFFFFF
+#define S_008018_SPI_BUSY(x) (((x) & 0x1) << 27)
+#define G_008018_SPI_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008018_SPI_BUSY 0xF7FFFFFF
+#define S_008018_SC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008018_SC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008018_SC_BUSY 0xDFFFFFFF
+#define S_008018_DB_BUSY(x) (((x) & 0x1) << 30)
+#define G_008018_DB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008018_DB_BUSY 0xBFFFFFFF
+#define S_008018_CB_BUSY(x) (((x) & 0x1) << 31)
+#define G_008018_CB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008018_CB_BUSY 0x7FFFFFFF
+#define R_008038_GRBM_STATUS_SE2 0x008038
+#define S_008038_DB_CLEAN(x) (((x) & 0x1) << 1)
+#define G_008038_DB_CLEAN(x) (((x) >> 1) & 0x1)
+#define C_008038_DB_CLEAN 0xFFFFFFFD
+#define S_008038_CB_CLEAN(x) (((x) & 0x1) << 2)
+#define G_008038_CB_CLEAN(x) (((x) >> 2) & 0x1)
+#define C_008038_CB_CLEAN 0xFFFFFFFB
+#define S_008038_BCI_BUSY(x) (((x) & 0x1) << 22)
+#define G_008038_BCI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008038_BCI_BUSY 0xFFBFFFFF
+#define S_008038_VGT_BUSY(x) (((x) & 0x1) << 23)
+#define G_008038_VGT_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008038_VGT_BUSY 0xFF7FFFFF
+#define S_008038_PA_BUSY(x) (((x) & 0x1) << 24)
+#define G_008038_PA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008038_PA_BUSY 0xFEFFFFFF
+#define S_008038_TA_BUSY(x) (((x) & 0x1) << 25)
+#define G_008038_TA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008038_TA_BUSY 0xFDFFFFFF
+#define S_008038_SX_BUSY(x) (((x) & 0x1) << 26)
+#define G_008038_SX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008038_SX_BUSY 0xFBFFFFFF
+#define S_008038_SPI_BUSY(x) (((x) & 0x1) << 27)
+#define G_008038_SPI_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008038_SPI_BUSY 0xF7FFFFFF
+#define S_008038_SC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008038_SC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008038_SC_BUSY 0xDFFFFFFF
+#define S_008038_DB_BUSY(x) (((x) & 0x1) << 30)
+#define G_008038_DB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008038_DB_BUSY 0xBFFFFFFF
+#define S_008038_CB_BUSY(x) (((x) & 0x1) << 31)
+#define G_008038_CB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008038_CB_BUSY 0x7FFFFFFF
+#define R_00803C_GRBM_STATUS_SE3 0x00803C
+#define S_00803C_DB_CLEAN(x) (((x) & 0x1) << 1)
+#define G_00803C_DB_CLEAN(x) (((x) >> 1) & 0x1)
+#define C_00803C_DB_CLEAN 0xFFFFFFFD
+#define S_00803C_CB_CLEAN(x) (((x) & 0x1) << 2)
+#define G_00803C_CB_CLEAN(x) (((x) >> 2) & 0x1)
+#define C_00803C_CB_CLEAN 0xFFFFFFFB
+#define S_00803C_BCI_BUSY(x) (((x) & 0x1) << 22)
+#define G_00803C_BCI_BUSY(x) (((x) >> 22) & 0x1)
+#define C_00803C_BCI_BUSY 0xFFBFFFFF
+#define S_00803C_VGT_BUSY(x) (((x) & 0x1) << 23)
+#define G_00803C_VGT_BUSY(x) (((x) >> 23) & 0x1)
+#define C_00803C_VGT_BUSY 0xFF7FFFFF
+#define S_00803C_PA_BUSY(x) (((x) & 0x1) << 24)
+#define G_00803C_PA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_00803C_PA_BUSY 0xFEFFFFFF
+#define S_00803C_TA_BUSY(x) (((x) & 0x1) << 25)
+#define G_00803C_TA_BUSY(x) (((x) >> 25) & 0x1)
+#define C_00803C_TA_BUSY 0xFDFFFFFF
+#define S_00803C_SX_BUSY(x) (((x) & 0x1) << 26)
+#define G_00803C_SX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_00803C_SX_BUSY 0xFBFFFFFF
+#define S_00803C_SPI_BUSY(x) (((x) & 0x1) << 27)
+#define G_00803C_SPI_BUSY(x) (((x) >> 27) & 0x1)
+#define C_00803C_SPI_BUSY 0xF7FFFFFF
+#define S_00803C_SC_BUSY(x) (((x) & 0x1) << 29)
+#define G_00803C_SC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_00803C_SC_BUSY 0xDFFFFFFF
+#define S_00803C_DB_BUSY(x) (((x) & 0x1) << 30)
+#define G_00803C_DB_BUSY(x) (((x) >> 30) & 0x1)
+#define C_00803C_DB_BUSY 0xBFFFFFFF
+#define S_00803C_CB_BUSY(x) (((x) & 0x1) << 31)
+#define G_00803C_CB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_00803C_CB_BUSY 0x7FFFFFFF
/* CIK */
+#define R_0300FC_CP_STRMOUT_CNTL 0x0300FC
+#define S_0300FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0)
+#define G_0300FC_OFFSET_UPDATE_DONE(x) (((x) >> 0) & 0x1)
+#define C_0300FC_OFFSET_UPDATE_DONE 0xFFFFFFFE
#define R_0301E4_CP_COHER_BASE_HI 0x0301E4
#define S_0301E4_COHER_BASE_HI_256B(x) (((x) & 0xFF) << 0)
#define G_0301E4_COHER_BASE_HI_256B(x) (((x) >> 0) & 0xFF)
#define C_0301E4_COHER_BASE_HI_256B 0xFFFFFF00
+#define R_0301EC_CP_COHER_START_DELAY 0x0301EC
+#define S_0301EC_START_DELAY_COUNT(x) (((x) & 0x3F) << 0)
+#define G_0301EC_START_DELAY_COUNT(x) (((x) >> 0) & 0x3F)
+#define C_0301EC_START_DELAY_COUNT 0xFFFFFFC0
#define R_0301F0_CP_COHER_CNTL 0x0301F0
#define S_0301F0_DEST_BASE_0_ENA(x) (((x) & 0x1) << 0)
#define G_0301F0_DEST_BASE_0_ENA(x) (((x) >> 0) & 0x1)
@@ -289,6 +824,14 @@
#define S_0301F0_DEST_BASE_1_ENA(x) (((x) & 0x1) << 1)
#define G_0301F0_DEST_BASE_1_ENA(x) (((x) >> 1) & 0x1)
#define C_0301F0_DEST_BASE_1_ENA 0xFFFFFFFD
+/* VI */
+#define S_0301F0_TC_SD_ACTION_ENA(x) (((x) & 0x1) << 2)
+#define G_0301F0_TC_SD_ACTION_ENA(x) (((x) >> 2) & 0x1)
+#define C_0301F0_TC_SD_ACTION_ENA 0xFFFFFFFB
+#define S_0301F0_TC_NC_ACTION_ENA(x) (((x) & 0x1) << 3)
+#define G_0301F0_TC_NC_ACTION_ENA(x) (((x) >> 3) & 0x1)
+#define C_0301F0_TC_NC_ACTION_ENA 0xFFFFFFF7
+/* */
#define S_0301F0_CB0_DEST_BASE_ENA(x) (((x) & 0x1) << 6)
#define G_0301F0_CB0_DEST_BASE_ENA(x) (((x) >> 6) & 0x1)
#define C_0301F0_CB0_DEST_BASE_ENA 0xFFFFFFBF
@@ -319,7 +862,7 @@
#define S_0301F0_TCL1_VOL_ACTION_ENA(x) (((x) & 0x1) << 15)
#define G_0301F0_TCL1_VOL_ACTION_ENA(x) (((x) >> 15) & 0x1)
#define C_0301F0_TCL1_VOL_ACTION_ENA 0xFFFF7FFF
-#define S_0301F0_TC_VOL_ACTION_ENA(x) (((x) & 0x1) << 16)
+#define S_0301F0_TC_VOL_ACTION_ENA(x) (((x) & 0x1) << 16) /* not on VI */
#define G_0301F0_TC_VOL_ACTION_ENA(x) (((x) >> 16) & 0x1)
#define C_0301F0_TC_VOL_ACTION_ENA 0xFFFEFFFF
#define S_0301F0_TC_WB_ACTION_ENA(x) (((x) & 0x1) << 18)
@@ -352,8 +895,389 @@
#define S_0301F0_SH_ICACHE_ACTION_ENA(x) (((x) & 0x1) << 29)
#define G_0301F0_SH_ICACHE_ACTION_ENA(x) (((x) >> 29) & 0x1)
#define C_0301F0_SH_ICACHE_ACTION_ENA 0xDFFFFFFF
+/* VI */
+#define S_0301F0_SH_KCACHE_WB_ACTION_ENA(x) (((x) & 0x1) << 30)
+#define G_0301F0_SH_KCACHE_WB_ACTION_ENA(x) (((x) >> 30) & 0x1)
+#define C_0301F0_SH_KCACHE_WB_ACTION_ENA 0xBFFFFFFF
+#define S_0301F0_SH_SD_ACTION_ENA(x) (((x) & 0x1) << 31)
+#define G_0301F0_SH_SD_ACTION_ENA(x) (((x) >> 31) & 0x1)
+#define C_0301F0_SH_SD_ACTION_ENA 0x7FFFFFFF
+/* */
#define R_0301F4_CP_COHER_SIZE 0x0301F4
#define R_0301F8_CP_COHER_BASE 0x0301F8
+#define R_0301FC_CP_COHER_STATUS 0x0301FC
+#define S_0301FC_MATCHING_GFX_CNTX(x) (((x) & 0xFF) << 0)
+#define G_0301FC_MATCHING_GFX_CNTX(x) (((x) >> 0) & 0xFF)
+#define C_0301FC_MATCHING_GFX_CNTX 0xFFFFFF00
+#define S_0301FC_MEID(x) (((x) & 0x03) << 24)
+#define G_0301FC_MEID(x) (((x) >> 24) & 0x03)
+#define C_0301FC_MEID 0xFCFFFFFF
+#define S_0301FC_PHASE1_STATUS(x) (((x) & 0x1) << 30)
+#define G_0301FC_PHASE1_STATUS(x) (((x) >> 30) & 0x1)
+#define C_0301FC_PHASE1_STATUS 0xBFFFFFFF
+#define S_0301FC_STATUS(x) (((x) & 0x1) << 31)
+#define G_0301FC_STATUS(x) (((x) >> 31) & 0x1)
+#define C_0301FC_STATUS 0x7FFFFFFF
+#define R_008210_CP_CPC_STATUS 0x008210
+#define S_008210_MEC1_BUSY(x) (((x) & 0x1) << 0)
+#define G_008210_MEC1_BUSY(x) (((x) >> 0) & 0x1)
+#define C_008210_MEC1_BUSY 0xFFFFFFFE
+#define S_008210_MEC2_BUSY(x) (((x) & 0x1) << 1)
+#define G_008210_MEC2_BUSY(x) (((x) >> 1) & 0x1)
+#define C_008210_MEC2_BUSY 0xFFFFFFFD
+#define S_008210_DC0_BUSY(x) (((x) & 0x1) << 2)
+#define G_008210_DC0_BUSY(x) (((x) >> 2) & 0x1)
+#define C_008210_DC0_BUSY 0xFFFFFFFB
+#define S_008210_DC1_BUSY(x) (((x) & 0x1) << 3)
+#define G_008210_DC1_BUSY(x) (((x) >> 3) & 0x1)
+#define C_008210_DC1_BUSY 0xFFFFFFF7
+#define S_008210_RCIU1_BUSY(x) (((x) & 0x1) << 4)
+#define G_008210_RCIU1_BUSY(x) (((x) >> 4) & 0x1)
+#define C_008210_RCIU1_BUSY 0xFFFFFFEF
+#define S_008210_RCIU2_BUSY(x) (((x) & 0x1) << 5)
+#define G_008210_RCIU2_BUSY(x) (((x) >> 5) & 0x1)
+#define C_008210_RCIU2_BUSY 0xFFFFFFDF
+#define S_008210_ROQ1_BUSY(x) (((x) & 0x1) << 6)
+#define G_008210_ROQ1_BUSY(x) (((x) >> 6) & 0x1)
+#define C_008210_ROQ1_BUSY 0xFFFFFFBF
+#define S_008210_ROQ2_BUSY(x) (((x) & 0x1) << 7)
+#define G_008210_ROQ2_BUSY(x) (((x) >> 7) & 0x1)
+#define C_008210_ROQ2_BUSY 0xFFFFFF7F
+#define S_008210_TCIU_BUSY(x) (((x) & 0x1) << 10)
+#define G_008210_TCIU_BUSY(x) (((x) >> 10) & 0x1)
+#define C_008210_TCIU_BUSY 0xFFFFFBFF
+#define S_008210_SCRATCH_RAM_BUSY(x) (((x) & 0x1) << 11)
+#define G_008210_SCRATCH_RAM_BUSY(x) (((x) >> 11) & 0x1)
+#define C_008210_SCRATCH_RAM_BUSY 0xFFFFF7FF
+#define S_008210_QU_BUSY(x) (((x) & 0x1) << 12)
+#define G_008210_QU_BUSY(x) (((x) >> 12) & 0x1)
+#define C_008210_QU_BUSY 0xFFFFEFFF
+#define S_008210_ATCL2IU_BUSY(x) (((x) & 0x1) << 13)
+#define G_008210_ATCL2IU_BUSY(x) (((x) >> 13) & 0x1)
+#define C_008210_ATCL2IU_BUSY 0xFFFFDFFF
+#define S_008210_CPG_CPC_BUSY(x) (((x) & 0x1) << 29)
+#define G_008210_CPG_CPC_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008210_CPG_CPC_BUSY 0xDFFFFFFF
+#define S_008210_CPF_CPC_BUSY(x) (((x) & 0x1) << 30)
+#define G_008210_CPF_CPC_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008210_CPF_CPC_BUSY 0xBFFFFFFF
+#define S_008210_CPC_BUSY(x) (((x) & 0x1) << 31)
+#define G_008210_CPC_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008210_CPC_BUSY 0x7FFFFFFF
+#define R_008214_CP_CPC_BUSY_STAT 0x008214
+#define S_008214_MEC1_LOAD_BUSY(x) (((x) & 0x1) << 0)
+#define G_008214_MEC1_LOAD_BUSY(x) (((x) >> 0) & 0x1)
+#define C_008214_MEC1_LOAD_BUSY 0xFFFFFFFE
+#define S_008214_MEC1_SEMAPOHRE_BUSY(x) (((x) & 0x1) << 1)
+#define G_008214_MEC1_SEMAPOHRE_BUSY(x) (((x) >> 1) & 0x1)
+#define C_008214_MEC1_SEMAPOHRE_BUSY 0xFFFFFFFD
+#define S_008214_MEC1_MUTEX_BUSY(x) (((x) & 0x1) << 2)
+#define G_008214_MEC1_MUTEX_BUSY(x) (((x) >> 2) & 0x1)
+#define C_008214_MEC1_MUTEX_BUSY 0xFFFFFFFB
+#define S_008214_MEC1_MESSAGE_BUSY(x) (((x) & 0x1) << 3)
+#define G_008214_MEC1_MESSAGE_BUSY(x) (((x) >> 3) & 0x1)
+#define C_008214_MEC1_MESSAGE_BUSY 0xFFFFFFF7
+#define S_008214_MEC1_EOP_QUEUE_BUSY(x) (((x) & 0x1) << 4)
+#define G_008214_MEC1_EOP_QUEUE_BUSY(x) (((x) >> 4) & 0x1)
+#define C_008214_MEC1_EOP_QUEUE_BUSY 0xFFFFFFEF
+#define S_008214_MEC1_IQ_QUEUE_BUSY(x) (((x) & 0x1) << 5)
+#define G_008214_MEC1_IQ_QUEUE_BUSY(x) (((x) >> 5) & 0x1)
+#define C_008214_MEC1_IQ_QUEUE_BUSY 0xFFFFFFDF
+#define S_008214_MEC1_IB_QUEUE_BUSY(x) (((x) & 0x1) << 6)
+#define G_008214_MEC1_IB_QUEUE_BUSY(x) (((x) >> 6) & 0x1)
+#define C_008214_MEC1_IB_QUEUE_BUSY 0xFFFFFFBF
+#define S_008214_MEC1_TC_BUSY(x) (((x) & 0x1) << 7)
+#define G_008214_MEC1_TC_BUSY(x) (((x) >> 7) & 0x1)
+#define C_008214_MEC1_TC_BUSY 0xFFFFFF7F
+#define S_008214_MEC1_DMA_BUSY(x) (((x) & 0x1) << 8)
+#define G_008214_MEC1_DMA_BUSY(x) (((x) >> 8) & 0x1)
+#define C_008214_MEC1_DMA_BUSY 0xFFFFFEFF
+#define S_008214_MEC1_PARTIAL_FLUSH_BUSY(x) (((x) & 0x1) << 9)
+#define G_008214_MEC1_PARTIAL_FLUSH_BUSY(x) (((x) >> 9) & 0x1)
+#define C_008214_MEC1_PARTIAL_FLUSH_BUSY 0xFFFFFDFF
+#define S_008214_MEC1_PIPE0_BUSY(x) (((x) & 0x1) << 10)
+#define G_008214_MEC1_PIPE0_BUSY(x) (((x) >> 10) & 0x1)
+#define C_008214_MEC1_PIPE0_BUSY 0xFFFFFBFF
+#define S_008214_MEC1_PIPE1_BUSY(x) (((x) & 0x1) << 11)
+#define G_008214_MEC1_PIPE1_BUSY(x) (((x) >> 11) & 0x1)
+#define C_008214_MEC1_PIPE1_BUSY 0xFFFFF7FF
+#define S_008214_MEC1_PIPE2_BUSY(x) (((x) & 0x1) << 12)
+#define G_008214_MEC1_PIPE2_BUSY(x) (((x) >> 12) & 0x1)
+#define C_008214_MEC1_PIPE2_BUSY 0xFFFFEFFF
+#define S_008214_MEC1_PIPE3_BUSY(x) (((x) & 0x1) << 13)
+#define G_008214_MEC1_PIPE3_BUSY(x) (((x) >> 13) & 0x1)
+#define C_008214_MEC1_PIPE3_BUSY 0xFFFFDFFF
+#define S_008214_MEC2_LOAD_BUSY(x) (((x) & 0x1) << 16)
+#define G_008214_MEC2_LOAD_BUSY(x) (((x) >> 16) & 0x1)
+#define C_008214_MEC2_LOAD_BUSY 0xFFFEFFFF
+#define S_008214_MEC2_SEMAPOHRE_BUSY(x) (((x) & 0x1) << 17)
+#define G_008214_MEC2_SEMAPOHRE_BUSY(x) (((x) >> 17) & 0x1)
+#define C_008214_MEC2_SEMAPOHRE_BUSY 0xFFFDFFFF
+#define S_008214_MEC2_MUTEX_BUSY(x) (((x) & 0x1) << 18)
+#define G_008214_MEC2_MUTEX_BUSY(x) (((x) >> 18) & 0x1)
+#define C_008214_MEC2_MUTEX_BUSY 0xFFFBFFFF
+#define S_008214_MEC2_MESSAGE_BUSY(x) (((x) & 0x1) << 19)
+#define G_008214_MEC2_MESSAGE_BUSY(x) (((x) >> 19) & 0x1)
+#define C_008214_MEC2_MESSAGE_BUSY 0xFFF7FFFF
+#define S_008214_MEC2_EOP_QUEUE_BUSY(x) (((x) & 0x1) << 20)
+#define G_008214_MEC2_EOP_QUEUE_BUSY(x) (((x) >> 20) & 0x1)
+#define C_008214_MEC2_EOP_QUEUE_BUSY 0xFFEFFFFF
+#define S_008214_MEC2_IQ_QUEUE_BUSY(x) (((x) & 0x1) << 21)
+#define G_008214_MEC2_IQ_QUEUE_BUSY(x) (((x) >> 21) & 0x1)
+#define C_008214_MEC2_IQ_QUEUE_BUSY 0xFFDFFFFF
+#define S_008214_MEC2_IB_QUEUE_BUSY(x) (((x) & 0x1) << 22)
+#define G_008214_MEC2_IB_QUEUE_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008214_MEC2_IB_QUEUE_BUSY 0xFFBFFFFF
+#define S_008214_MEC2_TC_BUSY(x) (((x) & 0x1) << 23)
+#define G_008214_MEC2_TC_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008214_MEC2_TC_BUSY 0xFF7FFFFF
+#define S_008214_MEC2_DMA_BUSY(x) (((x) & 0x1) << 24)
+#define G_008214_MEC2_DMA_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008214_MEC2_DMA_BUSY 0xFEFFFFFF
+#define S_008214_MEC2_PARTIAL_FLUSH_BUSY(x) (((x) & 0x1) << 25)
+#define G_008214_MEC2_PARTIAL_FLUSH_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008214_MEC2_PARTIAL_FLUSH_BUSY 0xFDFFFFFF
+#define S_008214_MEC2_PIPE0_BUSY(x) (((x) & 0x1) << 26)
+#define G_008214_MEC2_PIPE0_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008214_MEC2_PIPE0_BUSY 0xFBFFFFFF
+#define S_008214_MEC2_PIPE1_BUSY(x) (((x) & 0x1) << 27)
+#define G_008214_MEC2_PIPE1_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008214_MEC2_PIPE1_BUSY 0xF7FFFFFF
+#define S_008214_MEC2_PIPE2_BUSY(x) (((x) & 0x1) << 28)
+#define G_008214_MEC2_PIPE2_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008214_MEC2_PIPE2_BUSY 0xEFFFFFFF
+#define S_008214_MEC2_PIPE3_BUSY(x) (((x) & 0x1) << 29)
+#define G_008214_MEC2_PIPE3_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008214_MEC2_PIPE3_BUSY 0xDFFFFFFF
+#define R_008218_CP_CPC_STALLED_STAT1 0x008218
+#define S_008218_RCIU_TX_FREE_STALL(x) (((x) & 0x1) << 3)
+#define G_008218_RCIU_TX_FREE_STALL(x) (((x) >> 3) & 0x1)
+#define C_008218_RCIU_TX_FREE_STALL 0xFFFFFFF7
+#define S_008218_RCIU_PRIV_VIOLATION(x) (((x) & 0x1) << 4)
+#define G_008218_RCIU_PRIV_VIOLATION(x) (((x) >> 4) & 0x1)
+#define C_008218_RCIU_PRIV_VIOLATION 0xFFFFFFEF
+#define S_008218_TCIU_TX_FREE_STALL(x) (((x) & 0x1) << 6)
+#define G_008218_TCIU_TX_FREE_STALL(x) (((x) >> 6) & 0x1)
+#define C_008218_TCIU_TX_FREE_STALL 0xFFFFFFBF
+#define S_008218_MEC1_DECODING_PACKET(x) (((x) & 0x1) << 8)
+#define G_008218_MEC1_DECODING_PACKET(x) (((x) >> 8) & 0x1)
+#define C_008218_MEC1_DECODING_PACKET 0xFFFFFEFF
+#define S_008218_MEC1_WAIT_ON_RCIU(x) (((x) & 0x1) << 9)
+#define G_008218_MEC1_WAIT_ON_RCIU(x) (((x) >> 9) & 0x1)
+#define C_008218_MEC1_WAIT_ON_RCIU 0xFFFFFDFF
+#define S_008218_MEC1_WAIT_ON_RCIU_READ(x) (((x) & 0x1) << 10)
+#define G_008218_MEC1_WAIT_ON_RCIU_READ(x) (((x) >> 10) & 0x1)
+#define C_008218_MEC1_WAIT_ON_RCIU_READ 0xFFFFFBFF
+#define S_008218_MEC1_WAIT_ON_ROQ_DATA(x) (((x) & 0x1) << 13)
+#define G_008218_MEC1_WAIT_ON_ROQ_DATA(x) (((x) >> 13) & 0x1)
+#define C_008218_MEC1_WAIT_ON_ROQ_DATA 0xFFFFDFFF
+#define S_008218_MEC2_DECODING_PACKET(x) (((x) & 0x1) << 16)
+#define G_008218_MEC2_DECODING_PACKET(x) (((x) >> 16) & 0x1)
+#define C_008218_MEC2_DECODING_PACKET 0xFFFEFFFF
+#define S_008218_MEC2_WAIT_ON_RCIU(x) (((x) & 0x1) << 17)
+#define G_008218_MEC2_WAIT_ON_RCIU(x) (((x) >> 17) & 0x1)
+#define C_008218_MEC2_WAIT_ON_RCIU 0xFFFDFFFF
+#define S_008218_MEC2_WAIT_ON_RCIU_READ(x) (((x) & 0x1) << 18)
+#define G_008218_MEC2_WAIT_ON_RCIU_READ(x) (((x) >> 18) & 0x1)
+#define C_008218_MEC2_WAIT_ON_RCIU_READ 0xFFFBFFFF
+#define S_008218_MEC2_WAIT_ON_ROQ_DATA(x) (((x) & 0x1) << 21)
+#define G_008218_MEC2_WAIT_ON_ROQ_DATA(x) (((x) >> 21) & 0x1)
+#define C_008218_MEC2_WAIT_ON_ROQ_DATA 0xFFDFFFFF
+#define S_008218_ATCL2IU_WAITING_ON_FREE(x) (((x) & 0x1) << 22)
+#define G_008218_ATCL2IU_WAITING_ON_FREE(x) (((x) >> 22) & 0x1)
+#define C_008218_ATCL2IU_WAITING_ON_FREE 0xFFBFFFFF
+#define S_008218_ATCL2IU_WAITING_ON_TAGS(x) (((x) & 0x1) << 23)
+#define G_008218_ATCL2IU_WAITING_ON_TAGS(x) (((x) >> 23) & 0x1)
+#define C_008218_ATCL2IU_WAITING_ON_TAGS 0xFF7FFFFF
+#define S_008218_ATCL1_WAITING_ON_TRANS(x) (((x) & 0x1) << 24)
+#define G_008218_ATCL1_WAITING_ON_TRANS(x) (((x) >> 24) & 0x1)
+#define C_008218_ATCL1_WAITING_ON_TRANS 0xFEFFFFFF
+#define R_00821C_CP_CPF_STATUS 0x00821C
+#define S_00821C_POST_WPTR_GFX_BUSY(x) (((x) & 0x1) << 0)
+#define G_00821C_POST_WPTR_GFX_BUSY(x) (((x) >> 0) & 0x1)
+#define C_00821C_POST_WPTR_GFX_BUSY 0xFFFFFFFE
+#define S_00821C_CSF_BUSY(x) (((x) & 0x1) << 1)
+#define G_00821C_CSF_BUSY(x) (((x) >> 1) & 0x1)
+#define C_00821C_CSF_BUSY 0xFFFFFFFD
+#define S_00821C_ROQ_ALIGN_BUSY(x) (((x) & 0x1) << 4)
+#define G_00821C_ROQ_ALIGN_BUSY(x) (((x) >> 4) & 0x1)
+#define C_00821C_ROQ_ALIGN_BUSY 0xFFFFFFEF
+#define S_00821C_ROQ_RING_BUSY(x) (((x) & 0x1) << 5)
+#define G_00821C_ROQ_RING_BUSY(x) (((x) >> 5) & 0x1)
+#define C_00821C_ROQ_RING_BUSY 0xFFFFFFDF
+#define S_00821C_ROQ_INDIRECT1_BUSY(x) (((x) & 0x1) << 6)
+#define G_00821C_ROQ_INDIRECT1_BUSY(x) (((x) >> 6) & 0x1)
+#define C_00821C_ROQ_INDIRECT1_BUSY 0xFFFFFFBF
+#define S_00821C_ROQ_INDIRECT2_BUSY(x) (((x) & 0x1) << 7)
+#define G_00821C_ROQ_INDIRECT2_BUSY(x) (((x) >> 7) & 0x1)
+#define C_00821C_ROQ_INDIRECT2_BUSY 0xFFFFFF7F
+#define S_00821C_ROQ_STATE_BUSY(x) (((x) & 0x1) << 8)
+#define G_00821C_ROQ_STATE_BUSY(x) (((x) >> 8) & 0x1)
+#define C_00821C_ROQ_STATE_BUSY 0xFFFFFEFF
+#define S_00821C_ROQ_CE_RING_BUSY(x) (((x) & 0x1) << 9)
+#define G_00821C_ROQ_CE_RING_BUSY(x) (((x) >> 9) & 0x1)
+#define C_00821C_ROQ_CE_RING_BUSY 0xFFFFFDFF
+#define S_00821C_ROQ_CE_INDIRECT1_BUSY(x) (((x) & 0x1) << 10)
+#define G_00821C_ROQ_CE_INDIRECT1_BUSY(x) (((x) >> 10) & 0x1)
+#define C_00821C_ROQ_CE_INDIRECT1_BUSY 0xFFFFFBFF
+#define S_00821C_ROQ_CE_INDIRECT2_BUSY(x) (((x) & 0x1) << 11)
+#define G_00821C_ROQ_CE_INDIRECT2_BUSY(x) (((x) >> 11) & 0x1)
+#define C_00821C_ROQ_CE_INDIRECT2_BUSY 0xFFFFF7FF
+#define S_00821C_SEMAPHORE_BUSY(x) (((x) & 0x1) << 12)
+#define G_00821C_SEMAPHORE_BUSY(x) (((x) >> 12) & 0x1)
+#define C_00821C_SEMAPHORE_BUSY 0xFFFFEFFF
+#define S_00821C_INTERRUPT_BUSY(x) (((x) & 0x1) << 13)
+#define G_00821C_INTERRUPT_BUSY(x) (((x) >> 13) & 0x1)
+#define C_00821C_INTERRUPT_BUSY 0xFFFFDFFF
+#define S_00821C_TCIU_BUSY(x) (((x) & 0x1) << 14)
+#define G_00821C_TCIU_BUSY(x) (((x) >> 14) & 0x1)
+#define C_00821C_TCIU_BUSY 0xFFFFBFFF
+#define S_00821C_HQD_BUSY(x) (((x) & 0x1) << 15)
+#define G_00821C_HQD_BUSY(x) (((x) >> 15) & 0x1)
+#define C_00821C_HQD_BUSY 0xFFFF7FFF
+#define S_00821C_PRT_BUSY(x) (((x) & 0x1) << 16)
+#define G_00821C_PRT_BUSY(x) (((x) >> 16) & 0x1)
+#define C_00821C_PRT_BUSY 0xFFFEFFFF
+#define S_00821C_ATCL2IU_BUSY(x) (((x) & 0x1) << 17)
+#define G_00821C_ATCL2IU_BUSY(x) (((x) >> 17) & 0x1)
+#define C_00821C_ATCL2IU_BUSY 0xFFFDFFFF
+#define S_00821C_CPF_GFX_BUSY(x) (((x) & 0x1) << 26)
+#define G_00821C_CPF_GFX_BUSY(x) (((x) >> 26) & 0x1)
+#define C_00821C_CPF_GFX_BUSY 0xFBFFFFFF
+#define S_00821C_CPF_CMP_BUSY(x) (((x) & 0x1) << 27)
+#define G_00821C_CPF_CMP_BUSY(x) (((x) >> 27) & 0x1)
+#define C_00821C_CPF_CMP_BUSY 0xF7FFFFFF
+#define S_00821C_GRBM_CPF_STAT_BUSY(x) (((x) & 0x03) << 28)
+#define G_00821C_GRBM_CPF_STAT_BUSY(x) (((x) >> 28) & 0x03)
+#define C_00821C_GRBM_CPF_STAT_BUSY 0xCFFFFFFF
+#define S_00821C_CPC_CPF_BUSY(x) (((x) & 0x1) << 30)
+#define G_00821C_CPC_CPF_BUSY(x) (((x) >> 30) & 0x1)
+#define C_00821C_CPC_CPF_BUSY 0xBFFFFFFF
+#define S_00821C_CPF_BUSY(x) (((x) & 0x1) << 31)
+#define G_00821C_CPF_BUSY(x) (((x) >> 31) & 0x1)
+#define C_00821C_CPF_BUSY 0x7FFFFFFF
+#define R_008220_CP_CPF_BUSY_STAT 0x008220
+#define S_008220_REG_BUS_FIFO_BUSY(x) (((x) & 0x1) << 0)
+#define G_008220_REG_BUS_FIFO_BUSY(x) (((x) >> 0) & 0x1)
+#define C_008220_REG_BUS_FIFO_BUSY 0xFFFFFFFE
+#define S_008220_CSF_RING_BUSY(x) (((x) & 0x1) << 1)
+#define G_008220_CSF_RING_BUSY(x) (((x) >> 1) & 0x1)
+#define C_008220_CSF_RING_BUSY 0xFFFFFFFD
+#define S_008220_CSF_INDIRECT1_BUSY(x) (((x) & 0x1) << 2)
+#define G_008220_CSF_INDIRECT1_BUSY(x) (((x) >> 2) & 0x1)
+#define C_008220_CSF_INDIRECT1_BUSY 0xFFFFFFFB
+#define S_008220_CSF_INDIRECT2_BUSY(x) (((x) & 0x1) << 3)
+#define G_008220_CSF_INDIRECT2_BUSY(x) (((x) >> 3) & 0x1)
+#define C_008220_CSF_INDIRECT2_BUSY 0xFFFFFFF7
+#define S_008220_CSF_STATE_BUSY(x) (((x) & 0x1) << 4)
+#define G_008220_CSF_STATE_BUSY(x) (((x) >> 4) & 0x1)
+#define C_008220_CSF_STATE_BUSY 0xFFFFFFEF
+#define S_008220_CSF_CE_INDR1_BUSY(x) (((x) & 0x1) << 5)
+#define G_008220_CSF_CE_INDR1_BUSY(x) (((x) >> 5) & 0x1)
+#define C_008220_CSF_CE_INDR1_BUSY 0xFFFFFFDF
+#define S_008220_CSF_CE_INDR2_BUSY(x) (((x) & 0x1) << 6)
+#define G_008220_CSF_CE_INDR2_BUSY(x) (((x) >> 6) & 0x1)
+#define C_008220_CSF_CE_INDR2_BUSY 0xFFFFFFBF
+#define S_008220_CSF_ARBITER_BUSY(x) (((x) & 0x1) << 7)
+#define G_008220_CSF_ARBITER_BUSY(x) (((x) >> 7) & 0x1)
+#define C_008220_CSF_ARBITER_BUSY 0xFFFFFF7F
+#define S_008220_CSF_INPUT_BUSY(x) (((x) & 0x1) << 8)
+#define G_008220_CSF_INPUT_BUSY(x) (((x) >> 8) & 0x1)
+#define C_008220_CSF_INPUT_BUSY 0xFFFFFEFF
+#define S_008220_OUTSTANDING_READ_TAGS(x) (((x) & 0x1) << 9)
+#define G_008220_OUTSTANDING_READ_TAGS(x) (((x) >> 9) & 0x1)
+#define C_008220_OUTSTANDING_READ_TAGS 0xFFFFFDFF
+#define S_008220_HPD_PROCESSING_EOP_BUSY(x) (((x) & 0x1) << 11)
+#define G_008220_HPD_PROCESSING_EOP_BUSY(x) (((x) >> 11) & 0x1)
+#define C_008220_HPD_PROCESSING_EOP_BUSY 0xFFFFF7FF
+#define S_008220_HQD_DISPATCH_BUSY(x) (((x) & 0x1) << 12)
+#define G_008220_HQD_DISPATCH_BUSY(x) (((x) >> 12) & 0x1)
+#define C_008220_HQD_DISPATCH_BUSY 0xFFFFEFFF
+#define S_008220_HQD_IQ_TIMER_BUSY(x) (((x) & 0x1) << 13)
+#define G_008220_HQD_IQ_TIMER_BUSY(x) (((x) >> 13) & 0x1)
+#define C_008220_HQD_IQ_TIMER_BUSY 0xFFFFDFFF
+#define S_008220_HQD_DMA_OFFLOAD_BUSY(x) (((x) & 0x1) << 14)
+#define G_008220_HQD_DMA_OFFLOAD_BUSY(x) (((x) >> 14) & 0x1)
+#define C_008220_HQD_DMA_OFFLOAD_BUSY 0xFFFFBFFF
+#define S_008220_HQD_WAIT_SEMAPHORE_BUSY(x) (((x) & 0x1) << 15)
+#define G_008220_HQD_WAIT_SEMAPHORE_BUSY(x) (((x) >> 15) & 0x1)
+#define C_008220_HQD_WAIT_SEMAPHORE_BUSY 0xFFFF7FFF
+#define S_008220_HQD_SIGNAL_SEMAPHORE_BUSY(x) (((x) & 0x1) << 16)
+#define G_008220_HQD_SIGNAL_SEMAPHORE_BUSY(x) (((x) >> 16) & 0x1)
+#define C_008220_HQD_SIGNAL_SEMAPHORE_BUSY 0xFFFEFFFF
+#define S_008220_HQD_MESSAGE_BUSY(x) (((x) & 0x1) << 17)
+#define G_008220_HQD_MESSAGE_BUSY(x) (((x) >> 17) & 0x1)
+#define C_008220_HQD_MESSAGE_BUSY 0xFFFDFFFF
+#define S_008220_HQD_PQ_FETCHER_BUSY(x) (((x) & 0x1) << 18)
+#define G_008220_HQD_PQ_FETCHER_BUSY(x) (((x) >> 18) & 0x1)
+#define C_008220_HQD_PQ_FETCHER_BUSY 0xFFFBFFFF
+#define S_008220_HQD_IB_FETCHER_BUSY(x) (((x) & 0x1) << 19)
+#define G_008220_HQD_IB_FETCHER_BUSY(x) (((x) >> 19) & 0x1)
+#define C_008220_HQD_IB_FETCHER_BUSY 0xFFF7FFFF
+#define S_008220_HQD_IQ_FETCHER_BUSY(x) (((x) & 0x1) << 20)
+#define G_008220_HQD_IQ_FETCHER_BUSY(x) (((x) >> 20) & 0x1)
+#define C_008220_HQD_IQ_FETCHER_BUSY 0xFFEFFFFF
+#define S_008220_HQD_EOP_FETCHER_BUSY(x) (((x) & 0x1) << 21)
+#define G_008220_HQD_EOP_FETCHER_BUSY(x) (((x) >> 21) & 0x1)
+#define C_008220_HQD_EOP_FETCHER_BUSY 0xFFDFFFFF
+#define S_008220_HQD_CONSUMED_RPTR_BUSY(x) (((x) & 0x1) << 22)
+#define G_008220_HQD_CONSUMED_RPTR_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008220_HQD_CONSUMED_RPTR_BUSY 0xFFBFFFFF
+#define S_008220_HQD_FETCHER_ARB_BUSY(x) (((x) & 0x1) << 23)
+#define G_008220_HQD_FETCHER_ARB_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008220_HQD_FETCHER_ARB_BUSY 0xFF7FFFFF
+#define S_008220_HQD_ROQ_ALIGN_BUSY(x) (((x) & 0x1) << 24)
+#define G_008220_HQD_ROQ_ALIGN_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008220_HQD_ROQ_ALIGN_BUSY 0xFEFFFFFF
+#define S_008220_HQD_ROQ_EOP_BUSY(x) (((x) & 0x1) << 25)
+#define G_008220_HQD_ROQ_EOP_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008220_HQD_ROQ_EOP_BUSY 0xFDFFFFFF
+#define S_008220_HQD_ROQ_IQ_BUSY(x) (((x) & 0x1) << 26)
+#define G_008220_HQD_ROQ_IQ_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008220_HQD_ROQ_IQ_BUSY 0xFBFFFFFF
+#define S_008220_HQD_ROQ_PQ_BUSY(x) (((x) & 0x1) << 27)
+#define G_008220_HQD_ROQ_PQ_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008220_HQD_ROQ_PQ_BUSY 0xF7FFFFFF
+#define S_008220_HQD_ROQ_IB_BUSY(x) (((x) & 0x1) << 28)
+#define G_008220_HQD_ROQ_IB_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008220_HQD_ROQ_IB_BUSY 0xEFFFFFFF
+#define S_008220_HQD_WPTR_POLL_BUSY(x) (((x) & 0x1) << 29)
+#define G_008220_HQD_WPTR_POLL_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008220_HQD_WPTR_POLL_BUSY 0xDFFFFFFF
+#define S_008220_HQD_PQ_BUSY(x) (((x) & 0x1) << 30)
+#define G_008220_HQD_PQ_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008220_HQD_PQ_BUSY 0xBFFFFFFF
+#define S_008220_HQD_IB_BUSY(x) (((x) & 0x1) << 31)
+#define G_008220_HQD_IB_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008220_HQD_IB_BUSY 0x7FFFFFFF
+#define R_008224_CP_CPF_STALLED_STAT1 0x008224
+#define S_008224_RING_FETCHING_DATA(x) (((x) & 0x1) << 0)
+#define G_008224_RING_FETCHING_DATA(x) (((x) >> 0) & 0x1)
+#define C_008224_RING_FETCHING_DATA 0xFFFFFFFE
+#define S_008224_INDR1_FETCHING_DATA(x) (((x) & 0x1) << 1)
+#define G_008224_INDR1_FETCHING_DATA(x) (((x) >> 1) & 0x1)
+#define C_008224_INDR1_FETCHING_DATA 0xFFFFFFFD
+#define S_008224_INDR2_FETCHING_DATA(x) (((x) & 0x1) << 2)
+#define G_008224_INDR2_FETCHING_DATA(x) (((x) >> 2) & 0x1)
+#define C_008224_INDR2_FETCHING_DATA 0xFFFFFFFB
+#define S_008224_STATE_FETCHING_DATA(x) (((x) & 0x1) << 3)
+#define G_008224_STATE_FETCHING_DATA(x) (((x) >> 3) & 0x1)
+#define C_008224_STATE_FETCHING_DATA 0xFFFFFFF7
+#define S_008224_TCIU_WAITING_ON_FREE(x) (((x) & 0x1) << 5)
+#define G_008224_TCIU_WAITING_ON_FREE(x) (((x) >> 5) & 0x1)
+#define C_008224_TCIU_WAITING_ON_FREE 0xFFFFFFDF
+#define S_008224_TCIU_WAITING_ON_TAGS(x) (((x) & 0x1) << 6)
+#define G_008224_TCIU_WAITING_ON_TAGS(x) (((x) >> 6) & 0x1)
+#define C_008224_TCIU_WAITING_ON_TAGS 0xFFFFFFBF
+#define S_008224_ATCL2IU_WAITING_ON_FREE(x) (((x) & 0x1) << 7)
+#define G_008224_ATCL2IU_WAITING_ON_FREE(x) (((x) >> 7) & 0x1)
+#define C_008224_ATCL2IU_WAITING_ON_FREE 0xFFFFFF7F
+#define S_008224_ATCL2IU_WAITING_ON_TAGS(x) (((x) & 0x1) << 8)
+#define G_008224_ATCL2IU_WAITING_ON_TAGS(x) (((x) >> 8) & 0x1)
+#define C_008224_ATCL2IU_WAITING_ON_TAGS 0xFFFFFEFF
+#define S_008224_ATCL1_WAITING_ON_TRANS(x) (((x) & 0x1) << 9)
+#define G_008224_ATCL1_WAITING_ON_TRANS(x) (((x) >> 9) & 0x1)
+#define C_008224_ATCL1_WAITING_ON_TRANS 0xFFFFFDFF
#define R_030230_CP_COHER_SIZE_HI 0x030230
#define S_030230_COHER_SIZE_HI_256B(x) (((x) & 0xFF) << 0)
#define G_030230_COHER_SIZE_HI_256B(x) (((x) >> 0) & 0xFF)
@@ -375,10 +1299,6 @@
#define C_0088C4_ES_LIMIT 0xFFE0FFFF
#define R_0088C8_VGT_ESGS_RING_SIZE 0x0088C8
#define R_0088CC_VGT_GSVS_RING_SIZE 0x0088CC
-/* CIK */
-#define R_030900_VGT_ESGS_RING_SIZE 0x030900
-#define R_030904_VGT_GSVS_RING_SIZE 0x030904
-/* */
#define R_0088D4_VGT_GS_VERTEX_REUSE 0x0088D4
#define S_0088D4_VERT_REUSE(x) (((x) & 0x1F) << 0)
#define G_0088D4_VERT_REUSE(x) (((x) >> 0) & 0x1F)
@@ -461,7 +1381,293 @@
#define S_008B10_CURRENT_COUNT(x) (((x) & 0xFF) << 8)
#define G_008B10_CURRENT_COUNT(x) (((x) >> 8) & 0xFF)
#define C_008B10_CURRENT_COUNT 0xFFFF00FF
+#define R_008670_CP_STALLED_STAT3 0x008670
+#define S_008670_CE_TO_CSF_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 0)
+#define G_008670_CE_TO_CSF_NOT_RDY_TO_RCV(x) (((x) >> 0) & 0x1)
+#define C_008670_CE_TO_CSF_NOT_RDY_TO_RCV 0xFFFFFFFE
+#define S_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 1)
+#define G_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV(x) (((x) >> 1) & 0x1)
+#define C_008670_CE_TO_RAM_INIT_FETCHER_NOT_RDY_TO_RCV 0xFFFFFFFD
+#define S_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER(x) (((x) & 0x1) << 2)
+#define G_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER(x) (((x) >> 2) & 0x1)
+#define C_008670_CE_WAITING_ON_DATA_FROM_RAM_INIT_FETCHER 0xFFFFFFFB
+#define S_008670_CE_TO_RAM_INIT_NOT_RDY(x) (((x) & 0x1) << 3)
+#define G_008670_CE_TO_RAM_INIT_NOT_RDY(x) (((x) >> 3) & 0x1)
+#define C_008670_CE_TO_RAM_INIT_NOT_RDY 0xFFFFFFF7
+#define S_008670_CE_TO_RAM_DUMP_NOT_RDY(x) (((x) & 0x1) << 4)
+#define G_008670_CE_TO_RAM_DUMP_NOT_RDY(x) (((x) >> 4) & 0x1)
+#define C_008670_CE_TO_RAM_DUMP_NOT_RDY 0xFFFFFFEF
+#define S_008670_CE_TO_RAM_WRITE_NOT_RDY(x) (((x) & 0x1) << 5)
+#define G_008670_CE_TO_RAM_WRITE_NOT_RDY(x) (((x) >> 5) & 0x1)
+#define C_008670_CE_TO_RAM_WRITE_NOT_RDY 0xFFFFFFDF
+#define S_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 6)
+#define G_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV(x) (((x) >> 6) & 0x1)
+#define C_008670_CE_TO_INC_FIFO_NOT_RDY_TO_RCV 0xFFFFFFBF
+#define S_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 7)
+#define G_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV(x) (((x) >> 7) & 0x1)
+#define C_008670_CE_TO_WR_FIFO_NOT_RDY_TO_RCV 0xFFFFFF7F
+#define S_008670_CE_WAITING_ON_BUFFER_DATA(x) (((x) & 0x1) << 10)
+#define G_008670_CE_WAITING_ON_BUFFER_DATA(x) (((x) >> 10) & 0x1)
+#define C_008670_CE_WAITING_ON_BUFFER_DATA 0xFFFFFBFF
+#define S_008670_CE_WAITING_ON_CE_BUFFER_FLAG(x) (((x) & 0x1) << 11)
+#define G_008670_CE_WAITING_ON_CE_BUFFER_FLAG(x) (((x) >> 11) & 0x1)
+#define C_008670_CE_WAITING_ON_CE_BUFFER_FLAG 0xFFFFF7FF
+#define S_008670_CE_WAITING_ON_DE_COUNTER(x) (((x) & 0x1) << 12)
+#define G_008670_CE_WAITING_ON_DE_COUNTER(x) (((x) >> 12) & 0x1)
+#define C_008670_CE_WAITING_ON_DE_COUNTER 0xFFFFEFFF
+#define S_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW(x) (((x) & 0x1) << 13)
+#define G_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW(x) (((x) >> 13) & 0x1)
+#define C_008670_CE_WAITING_ON_DE_COUNTER_UNDERFLOW 0xFFFFDFFF
+#define S_008670_TCIU_WAITING_ON_FREE(x) (((x) & 0x1) << 14)
+#define G_008670_TCIU_WAITING_ON_FREE(x) (((x) >> 14) & 0x1)
+#define C_008670_TCIU_WAITING_ON_FREE 0xFFFFBFFF
+#define S_008670_TCIU_WAITING_ON_TAGS(x) (((x) & 0x1) << 15)
+#define G_008670_TCIU_WAITING_ON_TAGS(x) (((x) >> 15) & 0x1)
+#define C_008670_TCIU_WAITING_ON_TAGS 0xFFFF7FFF
+#define S_008670_CE_STALLED_ON_TC_WR_CONFIRM(x) (((x) & 0x1) << 16)
+#define G_008670_CE_STALLED_ON_TC_WR_CONFIRM(x) (((x) >> 16) & 0x1)
+#define C_008670_CE_STALLED_ON_TC_WR_CONFIRM 0xFFFEFFFF
+#define S_008670_CE_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) & 0x1) << 17)
+#define G_008670_CE_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) >> 17) & 0x1)
+#define C_008670_CE_STALLED_ON_ATOMIC_RTN_DATA 0xFFFDFFFF
+#define S_008670_ATCL2IU_WAITING_ON_FREE(x) (((x) & 0x1) << 18)
+#define G_008670_ATCL2IU_WAITING_ON_FREE(x) (((x) >> 18) & 0x1)
+#define C_008670_ATCL2IU_WAITING_ON_FREE 0xFFFBFFFF
+#define S_008670_ATCL2IU_WAITING_ON_TAGS(x) (((x) & 0x1) << 19)
+#define G_008670_ATCL2IU_WAITING_ON_TAGS(x) (((x) >> 19) & 0x1)
+#define C_008670_ATCL2IU_WAITING_ON_TAGS 0xFFF7FFFF
+#define S_008670_ATCL1_WAITING_ON_TRANS(x) (((x) & 0x1) << 20)
+#define G_008670_ATCL1_WAITING_ON_TRANS(x) (((x) >> 20) & 0x1)
+#define C_008670_ATCL1_WAITING_ON_TRANS 0xFFEFFFFF
+#define R_008674_CP_STALLED_STAT1 0x008674
+#define S_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 0)
+#define G_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV(x) (((x) >> 0) & 0x1)
+#define C_008674_RBIU_TO_DMA_NOT_RDY_TO_RCV 0xFFFFFFFE
+#define S_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 2)
+#define G_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV(x) (((x) >> 2) & 0x1)
+#define C_008674_RBIU_TO_SEM_NOT_RDY_TO_RCV 0xFFFFFFFB
+#define S_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 4)
+#define G_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV(x) (((x) >> 4) & 0x1)
+#define C_008674_RBIU_TO_MEMWR_NOT_RDY_TO_RCV 0xFFFFFFEF
+#define S_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG(x) (((x) & 0x1) << 10)
+#define G_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG(x) (((x) >> 10) & 0x1)
+#define C_008674_ME_HAS_ACTIVE_CE_BUFFER_FLAG 0xFFFFFBFF
+#define S_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG(x) (((x) & 0x1) << 11)
+#define G_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG(x) (((x) >> 11) & 0x1)
+#define C_008674_ME_HAS_ACTIVE_DE_BUFFER_FLAG 0xFFFFF7FF
+#define S_008674_ME_STALLED_ON_TC_WR_CONFIRM(x) (((x) & 0x1) << 12)
+#define G_008674_ME_STALLED_ON_TC_WR_CONFIRM(x) (((x) >> 12) & 0x1)
+#define C_008674_ME_STALLED_ON_TC_WR_CONFIRM 0xFFFFEFFF
+#define S_008674_ME_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) & 0x1) << 13)
+#define G_008674_ME_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) >> 13) & 0x1)
+#define C_008674_ME_STALLED_ON_ATOMIC_RTN_DATA 0xFFFFDFFF
+#define S_008674_ME_WAITING_ON_TC_READ_DATA(x) (((x) & 0x1) << 14)
+#define G_008674_ME_WAITING_ON_TC_READ_DATA(x) (((x) >> 14) & 0x1)
+#define C_008674_ME_WAITING_ON_TC_READ_DATA 0xFFFFBFFF
+#define S_008674_ME_WAITING_ON_REG_READ_DATA(x) (((x) & 0x1) << 15)
+#define G_008674_ME_WAITING_ON_REG_READ_DATA(x) (((x) >> 15) & 0x1)
+#define C_008674_ME_WAITING_ON_REG_READ_DATA 0xFFFF7FFF
+#define S_008674_RCIU_WAITING_ON_GDS_FREE(x) (((x) & 0x1) << 23)
+#define G_008674_RCIU_WAITING_ON_GDS_FREE(x) (((x) >> 23) & 0x1)
+#define C_008674_RCIU_WAITING_ON_GDS_FREE 0xFF7FFFFF
+#define S_008674_RCIU_WAITING_ON_GRBM_FREE(x) (((x) & 0x1) << 24)
+#define G_008674_RCIU_WAITING_ON_GRBM_FREE(x) (((x) >> 24) & 0x1)
+#define C_008674_RCIU_WAITING_ON_GRBM_FREE 0xFEFFFFFF
+#define S_008674_RCIU_WAITING_ON_VGT_FREE(x) (((x) & 0x1) << 25)
+#define G_008674_RCIU_WAITING_ON_VGT_FREE(x) (((x) >> 25) & 0x1)
+#define C_008674_RCIU_WAITING_ON_VGT_FREE 0xFDFFFFFF
+#define S_008674_RCIU_STALLED_ON_ME_READ(x) (((x) & 0x1) << 26)
+#define G_008674_RCIU_STALLED_ON_ME_READ(x) (((x) >> 26) & 0x1)
+#define C_008674_RCIU_STALLED_ON_ME_READ 0xFBFFFFFF
+#define S_008674_RCIU_STALLED_ON_DMA_READ(x) (((x) & 0x1) << 27)
+#define G_008674_RCIU_STALLED_ON_DMA_READ(x) (((x) >> 27) & 0x1)
+#define C_008674_RCIU_STALLED_ON_DMA_READ 0xF7FFFFFF
+#define S_008674_RCIU_STALLED_ON_APPEND_READ(x) (((x) & 0x1) << 28)
+#define G_008674_RCIU_STALLED_ON_APPEND_READ(x) (((x) >> 28) & 0x1)
+#define C_008674_RCIU_STALLED_ON_APPEND_READ 0xEFFFFFFF
+#define S_008674_RCIU_HALTED_BY_REG_VIOLATION(x) (((x) & 0x1) << 29)
+#define G_008674_RCIU_HALTED_BY_REG_VIOLATION(x) (((x) >> 29) & 0x1)
+#define C_008674_RCIU_HALTED_BY_REG_VIOLATION 0xDFFFFFFF
+#define R_008678_CP_STALLED_STAT2 0x008678
+#define S_008678_PFP_TO_CSF_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 0)
+#define G_008678_PFP_TO_CSF_NOT_RDY_TO_RCV(x) (((x) >> 0) & 0x1)
+#define C_008678_PFP_TO_CSF_NOT_RDY_TO_RCV 0xFFFFFFFE
+#define S_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 1)
+#define G_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV(x) (((x) >> 1) & 0x1)
+#define C_008678_PFP_TO_MEQ_NOT_RDY_TO_RCV 0xFFFFFFFD
+#define S_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 2)
+#define G_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV(x) (((x) >> 2) & 0x1)
+#define C_008678_PFP_TO_RCIU_NOT_RDY_TO_RCV 0xFFFFFFFB
+#define S_008678_PFP_TO_VGT_WRITES_PENDING(x) (((x) & 0x1) << 4)
+#define G_008678_PFP_TO_VGT_WRITES_PENDING(x) (((x) >> 4) & 0x1)
+#define C_008678_PFP_TO_VGT_WRITES_PENDING 0xFFFFFFEF
+#define S_008678_PFP_RCIU_READ_PENDING(x) (((x) & 0x1) << 5)
+#define G_008678_PFP_RCIU_READ_PENDING(x) (((x) >> 5) & 0x1)
+#define C_008678_PFP_RCIU_READ_PENDING 0xFFFFFFDF
+#define S_008678_PFP_WAITING_ON_BUFFER_DATA(x) (((x) & 0x1) << 8)
+#define G_008678_PFP_WAITING_ON_BUFFER_DATA(x) (((x) >> 8) & 0x1)
+#define C_008678_PFP_WAITING_ON_BUFFER_DATA 0xFFFFFEFF
+#define S_008678_ME_WAIT_ON_CE_COUNTER(x) (((x) & 0x1) << 9)
+#define G_008678_ME_WAIT_ON_CE_COUNTER(x) (((x) >> 9) & 0x1)
+#define C_008678_ME_WAIT_ON_CE_COUNTER 0xFFFFFDFF
+#define S_008678_ME_WAIT_ON_AVAIL_BUFFER(x) (((x) & 0x1) << 10)
+#define G_008678_ME_WAIT_ON_AVAIL_BUFFER(x) (((x) >> 10) & 0x1)
+#define C_008678_ME_WAIT_ON_AVAIL_BUFFER 0xFFFFFBFF
+#define S_008678_GFX_CNTX_NOT_AVAIL_TO_ME(x) (((x) & 0x1) << 11)
+#define G_008678_GFX_CNTX_NOT_AVAIL_TO_ME(x) (((x) >> 11) & 0x1)
+#define C_008678_GFX_CNTX_NOT_AVAIL_TO_ME 0xFFFFF7FF
+#define S_008678_ME_RCIU_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 12)
+#define G_008678_ME_RCIU_NOT_RDY_TO_RCV(x) (((x) >> 12) & 0x1)
+#define C_008678_ME_RCIU_NOT_RDY_TO_RCV 0xFFFFEFFF
+#define S_008678_ME_TO_CONST_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 13)
+#define G_008678_ME_TO_CONST_NOT_RDY_TO_RCV(x) (((x) >> 13) & 0x1)
+#define C_008678_ME_TO_CONST_NOT_RDY_TO_RCV 0xFFFFDFFF
+#define S_008678_ME_WAITING_DATA_FROM_PFP(x) (((x) & 0x1) << 14)
+#define G_008678_ME_WAITING_DATA_FROM_PFP(x) (((x) >> 14) & 0x1)
+#define C_008678_ME_WAITING_DATA_FROM_PFP 0xFFFFBFFF
+#define S_008678_ME_WAITING_ON_PARTIAL_FLUSH(x) (((x) & 0x1) << 15)
+#define G_008678_ME_WAITING_ON_PARTIAL_FLUSH(x) (((x) >> 15) & 0x1)
+#define C_008678_ME_WAITING_ON_PARTIAL_FLUSH 0xFFFF7FFF
+#define S_008678_MEQ_TO_ME_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 16)
+#define G_008678_MEQ_TO_ME_NOT_RDY_TO_RCV(x) (((x) >> 16) & 0x1)
+#define C_008678_MEQ_TO_ME_NOT_RDY_TO_RCV 0xFFFEFFFF
+#define S_008678_STQ_TO_ME_NOT_RDY_TO_RCV(x) (((x) & 0x1) << 17)
+#define G_008678_STQ_TO_ME_NOT_RDY_TO_RCV(x) (((x) >> 17) & 0x1)
+#define C_008678_STQ_TO_ME_NOT_RDY_TO_RCV 0xFFFDFFFF
+#define S_008678_ME_WAITING_DATA_FROM_STQ(x) (((x) & 0x1) << 18)
+#define G_008678_ME_WAITING_DATA_FROM_STQ(x) (((x) >> 18) & 0x1)
+#define C_008678_ME_WAITING_DATA_FROM_STQ 0xFFFBFFFF
+#define S_008678_PFP_STALLED_ON_TC_WR_CONFIRM(x) (((x) & 0x1) << 19)
+#define G_008678_PFP_STALLED_ON_TC_WR_CONFIRM(x) (((x) >> 19) & 0x1)
+#define C_008678_PFP_STALLED_ON_TC_WR_CONFIRM 0xFFF7FFFF
+#define S_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) & 0x1) << 20)
+#define G_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA(x) (((x) >> 20) & 0x1)
+#define C_008678_PFP_STALLED_ON_ATOMIC_RTN_DATA 0xFFEFFFFF
+#define S_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE(x) (((x) & 0x1) << 21)
+#define G_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE(x) (((x) >> 21) & 0x1)
+#define C_008678_EOPD_FIFO_NEEDS_SC_EOP_DONE 0xFFDFFFFF
+#define S_008678_EOPD_FIFO_NEEDS_WR_CONFIRM(x) (((x) & 0x1) << 22)
+#define G_008678_EOPD_FIFO_NEEDS_WR_CONFIRM(x) (((x) >> 22) & 0x1)
+#define C_008678_EOPD_FIFO_NEEDS_WR_CONFIRM 0xFFBFFFFF
+#define S_008678_STRMO_WR_OF_PRIM_DATA_PENDING(x) (((x) & 0x1) << 23)
+#define G_008678_STRMO_WR_OF_PRIM_DATA_PENDING(x) (((x) >> 23) & 0x1)
+#define C_008678_STRMO_WR_OF_PRIM_DATA_PENDING 0xFF7FFFFF
+#define S_008678_PIPE_STATS_WR_DATA_PENDING(x) (((x) & 0x1) << 24)
+#define G_008678_PIPE_STATS_WR_DATA_PENDING(x) (((x) >> 24) & 0x1)
+#define C_008678_PIPE_STATS_WR_DATA_PENDING 0xFEFFFFFF
+#define S_008678_APPEND_RDY_WAIT_ON_CS_DONE(x) (((x) & 0x1) << 25)
+#define G_008678_APPEND_RDY_WAIT_ON_CS_DONE(x) (((x) >> 25) & 0x1)
+#define C_008678_APPEND_RDY_WAIT_ON_CS_DONE 0xFDFFFFFF
+#define S_008678_APPEND_RDY_WAIT_ON_PS_DONE(x) (((x) & 0x1) << 26)
+#define G_008678_APPEND_RDY_WAIT_ON_PS_DONE(x) (((x) >> 26) & 0x1)
+#define C_008678_APPEND_RDY_WAIT_ON_PS_DONE 0xFBFFFFFF
+#define S_008678_APPEND_WAIT_ON_WR_CONFIRM(x) (((x) & 0x1) << 27)
+#define G_008678_APPEND_WAIT_ON_WR_CONFIRM(x) (((x) >> 27) & 0x1)
+#define C_008678_APPEND_WAIT_ON_WR_CONFIRM 0xF7FFFFFF
+#define S_008678_APPEND_ACTIVE_PARTITION(x) (((x) & 0x1) << 28)
+#define G_008678_APPEND_ACTIVE_PARTITION(x) (((x) >> 28) & 0x1)
+#define C_008678_APPEND_ACTIVE_PARTITION 0xEFFFFFFF
+#define S_008678_APPEND_WAITING_TO_SEND_MEMWRITE(x) (((x) & 0x1) << 29)
+#define G_008678_APPEND_WAITING_TO_SEND_MEMWRITE(x) (((x) >> 29) & 0x1)
+#define C_008678_APPEND_WAITING_TO_SEND_MEMWRITE 0xDFFFFFFF
+#define S_008678_SURF_SYNC_NEEDS_IDLE_CNTXS(x) (((x) & 0x1) << 30)
+#define G_008678_SURF_SYNC_NEEDS_IDLE_CNTXS(x) (((x) >> 30) & 0x1)
+#define C_008678_SURF_SYNC_NEEDS_IDLE_CNTXS 0xBFFFFFFF
+#define S_008678_SURF_SYNC_NEEDS_ALL_CLEAN(x) (((x) & 0x1) << 31)
+#define G_008678_SURF_SYNC_NEEDS_ALL_CLEAN(x) (((x) >> 31) & 0x1)
+#define C_008678_SURF_SYNC_NEEDS_ALL_CLEAN 0x7FFFFFFF
+#define R_008680_CP_STAT 0x008680
+#define S_008680_ROQ_RING_BUSY(x) (((x) & 0x1) << 9)
+#define G_008680_ROQ_RING_BUSY(x) (((x) >> 9) & 0x1)
+#define C_008680_ROQ_RING_BUSY 0xFFFFFDFF
+#define S_008680_ROQ_INDIRECT1_BUSY(x) (((x) & 0x1) << 10)
+#define G_008680_ROQ_INDIRECT1_BUSY(x) (((x) >> 10) & 0x1)
+#define C_008680_ROQ_INDIRECT1_BUSY 0xFFFFFBFF
+#define S_008680_ROQ_INDIRECT2_BUSY(x) (((x) & 0x1) << 11)
+#define G_008680_ROQ_INDIRECT2_BUSY(x) (((x) >> 11) & 0x1)
+#define C_008680_ROQ_INDIRECT2_BUSY 0xFFFFF7FF
+#define S_008680_ROQ_STATE_BUSY(x) (((x) & 0x1) << 12)
+#define G_008680_ROQ_STATE_BUSY(x) (((x) >> 12) & 0x1)
+#define C_008680_ROQ_STATE_BUSY 0xFFFFEFFF
+#define S_008680_DC_BUSY(x) (((x) & 0x1) << 13)
+#define G_008680_DC_BUSY(x) (((x) >> 13) & 0x1)
+#define C_008680_DC_BUSY 0xFFFFDFFF
+#define S_008680_ATCL2IU_BUSY(x) (((x) & 0x1) << 14)
+#define G_008680_ATCL2IU_BUSY(x) (((x) >> 14) & 0x1)
+#define C_008680_ATCL2IU_BUSY 0xFFFFBFFF
+#define S_008680_PFP_BUSY(x) (((x) & 0x1) << 15)
+#define G_008680_PFP_BUSY(x) (((x) >> 15) & 0x1)
+#define C_008680_PFP_BUSY 0xFFFF7FFF
+#define S_008680_MEQ_BUSY(x) (((x) & 0x1) << 16)
+#define G_008680_MEQ_BUSY(x) (((x) >> 16) & 0x1)
+#define C_008680_MEQ_BUSY 0xFFFEFFFF
+#define S_008680_ME_BUSY(x) (((x) & 0x1) << 17)
+#define G_008680_ME_BUSY(x) (((x) >> 17) & 0x1)
+#define C_008680_ME_BUSY 0xFFFDFFFF
+#define S_008680_QUERY_BUSY(x) (((x) & 0x1) << 18)
+#define G_008680_QUERY_BUSY(x) (((x) >> 18) & 0x1)
+#define C_008680_QUERY_BUSY 0xFFFBFFFF
+#define S_008680_SEMAPHORE_BUSY(x) (((x) & 0x1) << 19)
+#define G_008680_SEMAPHORE_BUSY(x) (((x) >> 19) & 0x1)
+#define C_008680_SEMAPHORE_BUSY 0xFFF7FFFF
+#define S_008680_INTERRUPT_BUSY(x) (((x) & 0x1) << 20)
+#define G_008680_INTERRUPT_BUSY(x) (((x) >> 20) & 0x1)
+#define C_008680_INTERRUPT_BUSY 0xFFEFFFFF
+#define S_008680_SURFACE_SYNC_BUSY(x) (((x) & 0x1) << 21)
+#define G_008680_SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)
+#define C_008680_SURFACE_SYNC_BUSY 0xFFDFFFFF
+#define S_008680_DMA_BUSY(x) (((x) & 0x1) << 22)
+#define G_008680_DMA_BUSY(x) (((x) >> 22) & 0x1)
+#define C_008680_DMA_BUSY 0xFFBFFFFF
+#define S_008680_RCIU_BUSY(x) (((x) & 0x1) << 23)
+#define G_008680_RCIU_BUSY(x) (((x) >> 23) & 0x1)
+#define C_008680_RCIU_BUSY 0xFF7FFFFF
+#define S_008680_SCRATCH_RAM_BUSY(x) (((x) & 0x1) << 24)
+#define G_008680_SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)
+#define C_008680_SCRATCH_RAM_BUSY 0xFEFFFFFF
+#define S_008680_CPC_CPG_BUSY(x) (((x) & 0x1) << 25)
+#define G_008680_CPC_CPG_BUSY(x) (((x) >> 25) & 0x1)
+#define C_008680_CPC_CPG_BUSY 0xFDFFFFFF
+#define S_008680_CE_BUSY(x) (((x) & 0x1) << 26)
+#define G_008680_CE_BUSY(x) (((x) >> 26) & 0x1)
+#define C_008680_CE_BUSY 0xFBFFFFFF
+#define S_008680_TCIU_BUSY(x) (((x) & 0x1) << 27)
+#define G_008680_TCIU_BUSY(x) (((x) >> 27) & 0x1)
+#define C_008680_TCIU_BUSY 0xF7FFFFFF
+#define S_008680_ROQ_CE_RING_BUSY(x) (((x) & 0x1) << 28)
+#define G_008680_ROQ_CE_RING_BUSY(x) (((x) >> 28) & 0x1)
+#define C_008680_ROQ_CE_RING_BUSY 0xEFFFFFFF
+#define S_008680_ROQ_CE_INDIRECT1_BUSY(x) (((x) & 0x1) << 29)
+#define G_008680_ROQ_CE_INDIRECT1_BUSY(x) (((x) >> 29) & 0x1)
+#define C_008680_ROQ_CE_INDIRECT1_BUSY 0xDFFFFFFF
+#define S_008680_ROQ_CE_INDIRECT2_BUSY(x) (((x) & 0x1) << 30)
+#define G_008680_ROQ_CE_INDIRECT2_BUSY(x) (((x) >> 30) & 0x1)
+#define C_008680_ROQ_CE_INDIRECT2_BUSY 0xBFFFFFFF
+#define S_008680_CP_BUSY(x) (((x) & 0x1) << 31)
+#define G_008680_CP_BUSY(x) (((x) >> 31) & 0x1)
+#define C_008680_CP_BUSY 0x7FFFFFFF
/* CIK */
+#define R_030800_GRBM_GFX_INDEX 0x030800
+#define S_030800_INSTANCE_INDEX(x) (((x) & 0xFF) << 0)
+#define G_030800_INSTANCE_INDEX(x) (((x) >> 0) & 0xFF)
+#define C_030800_INSTANCE_INDEX 0xFFFFFF00
+#define S_030800_SH_INDEX(x) (((x) & 0xFF) << 8)
+#define G_030800_SH_INDEX(x) (((x) >> 8) & 0xFF)
+#define C_030800_SH_INDEX 0xFFFF00FF
+#define S_030800_SE_INDEX(x) (((x) & 0xFF) << 16)
+#define G_030800_SE_INDEX(x) (((x) >> 16) & 0xFF)
+#define C_030800_SE_INDEX 0xFF00FFFF
+#define S_030800_SH_BROADCAST_WRITES(x) (((x) & 0x1) << 29)
+#define G_030800_SH_BROADCAST_WRITES(x) (((x) >> 29) & 0x1)
+#define C_030800_SH_BROADCAST_WRITES 0xDFFFFFFF
+#define S_030800_INSTANCE_BROADCAST_WRITES(x) (((x) & 0x1) << 30)
+#define G_030800_INSTANCE_BROADCAST_WRITES(x) (((x) >> 30) & 0x1)
+#define C_030800_INSTANCE_BROADCAST_WRITES 0xBFFFFFFF
+#define S_030800_SE_BROADCAST_WRITES(x) (((x) & 0x1) << 31)
+#define G_030800_SE_BROADCAST_WRITES(x) (((x) >> 31) & 0x1)
+#define C_030800_SE_BROADCAST_WRITES 0x7FFFFFFF
+#define R_030900_VGT_ESGS_RING_SIZE 0x030900
+#define R_030904_VGT_GSVS_RING_SIZE 0x030904
#define R_030908_VGT_PRIMITIVE_TYPE 0x030908
#define S_030908_PRIM_TYPE(x) (((x) & 0x3F) << 0)
#define G_030908_PRIM_TYPE(x) (((x) >> 0) & 0x3F)
@@ -530,6 +1736,34 @@
#define S_030A04_CURRENT_COUNT(x) (((x) & 0xFF) << 8)
#define G_030A04_CURRENT_COUNT(x) (((x) >> 8) & 0xFF)
#define C_030A04_CURRENT_COUNT 0xFFFF00FF
+#define R_030A10_PA_SC_SCREEN_EXTENT_MIN_0 0x030A10
+#define S_030A10_X(x) (((x) & 0xFFFF) << 0)
+#define G_030A10_X(x) (((x) >> 0) & 0xFFFF)
+#define C_030A10_X 0xFFFF0000
+#define S_030A10_Y(x) (((x) & 0xFFFF) << 16)
+#define G_030A10_Y(x) (((x) >> 16) & 0xFFFF)
+#define C_030A10_Y 0x0000FFFF
+#define R_030A14_PA_SC_SCREEN_EXTENT_MAX_0 0x030A14
+#define S_030A14_X(x) (((x) & 0xFFFF) << 0)
+#define G_030A14_X(x) (((x) >> 0) & 0xFFFF)
+#define C_030A14_X 0xFFFF0000
+#define S_030A14_Y(x) (((x) & 0xFFFF) << 16)
+#define G_030A14_Y(x) (((x) >> 16) & 0xFFFF)
+#define C_030A14_Y 0x0000FFFF
+#define R_030A18_PA_SC_SCREEN_EXTENT_MIN_1 0x030A18
+#define S_030A18_X(x) (((x) & 0xFFFF) << 0)
+#define G_030A18_X(x) (((x) >> 0) & 0xFFFF)
+#define C_030A18_X 0xFFFF0000
+#define S_030A18_Y(x) (((x) & 0xFFFF) << 16)
+#define G_030A18_Y(x) (((x) >> 16) & 0xFFFF)
+#define C_030A18_Y 0x0000FFFF
+#define R_030A2C_PA_SC_SCREEN_EXTENT_MAX_1 0x030A2C
+#define S_030A2C_X(x) (((x) & 0xFFFF) << 0)
+#define G_030A2C_X(x) (((x) >> 0) & 0xFFFF)
+#define C_030A2C_X 0xFFFF0000
+#define S_030A2C_Y(x) (((x) & 0xFFFF) << 16)
+#define G_030A2C_Y(x) (((x) >> 16) & 0xFFFF)
+#define C_030A2C_Y 0x0000FFFF
/* */
#define R_008BF0_PA_SC_ENHANCE 0x008BF0
#define S_008BF0_ENABLE_PA_SC_OUT_OF_ORDER(x) (((x) & 0x1) << 0)
@@ -608,6 +1842,32 @@
#define V_008DFC_SQ_VGPR 0x00
/* */
#define R_008DFC_SQ_INST 0x008DFC
+#define R_030D20_SQC_CACHES 0x030D20
+#define S_030D20_TARGET_INST(x) (((x) & 0x1) << 0)
+#define G_030D20_TARGET_INST(x) (((x) >> 0) & 0x1)
+#define C_030D20_TARGET_INST 0xFFFFFFFE
+#define S_030D20_TARGET_DATA(x) (((x) & 0x1) << 1)
+#define G_030D20_TARGET_DATA(x) (((x) >> 1) & 0x1)
+#define C_030D20_TARGET_DATA 0xFFFFFFFD
+#define S_030D20_INVALIDATE(x) (((x) & 0x1) << 2)
+#define G_030D20_INVALIDATE(x) (((x) >> 2) & 0x1)
+#define C_030D20_INVALIDATE 0xFFFFFFFB
+#define S_030D20_WRITEBACK(x) (((x) & 0x1) << 3)
+#define G_030D20_WRITEBACK(x) (((x) >> 3) & 0x1)
+#define C_030D20_WRITEBACK 0xFFFFFFF7
+#define S_030D20_VOL(x) (((x) & 0x1) << 4)
+#define G_030D20_VOL(x) (((x) >> 4) & 0x1)
+#define C_030D20_VOL 0xFFFFFFEF
+#define S_030D20_COMPLETE(x) (((x) & 0x1) << 16)
+#define G_030D20_COMPLETE(x) (((x) >> 16) & 0x1)
+#define C_030D20_COMPLETE 0xFFFEFFFF
+#define R_030D24_SQC_WRITEBACK 0x030D24
+#define S_030D24_DWB(x) (((x) & 0x1) << 0)
+#define G_030D24_DWB(x) (((x) >> 0) & 0x1)
+#define C_030D24_DWB 0xFFFFFFFE
+#define S_030D24_DIRTY(x) (((x) & 0x1) << 1)
+#define G_030D24_DIRTY(x) (((x) >> 1) & 0x1)
+#define C_030D24_DIRTY 0xFFFFFFFD
#define R_008DFC_SQ_VOP1 0x008DFC
#define S_008DFC_SRC0(x) (((x) & 0x1FF) << 0)
#define G_008DFC_SRC0(x) (((x) >> 0) & 0x1FF)
@@ -3740,7 +5000,17 @@
#define C_008DFC_ENCODING 0x03FFFFFF
#define V_008DFC_SQ_ENC_MUBUF_FIELD 0x38
#endif
+#define R_030E00_TA_CS_BC_BASE_ADDR 0x030E00
+#define R_030E04_TA_CS_BC_BASE_ADDR_HI 0x030E04
+#define S_030E04_ADDRESS(x) (((x) & 0xFF) << 0)
+#define G_030E04_ADDRESS(x) (((x) >> 0) & 0xFF)
+#define C_030E04_ADDRESS 0xFFFFFF00
+#define R_030F00_DB_OCCLUSION_COUNT0_LOW 0x030F00
#define R_008F00_SQ_BUF_RSRC_WORD0 0x008F00
+#define R_030F04_DB_OCCLUSION_COUNT0_HI 0x030F04
+#define S_030F04_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030F04_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030F04_COUNT_HI 0x80000000
#define R_008F04_SQ_BUF_RSRC_WORD1 0x008F04
#define S_008F04_BASE_ADDRESS_HI(x) (((x) & 0xFFFF) << 0)
#define G_008F04_BASE_ADDRESS_HI(x) (((x) >> 0) & 0xFFFF)
@@ -3754,7 +5024,12 @@
#define S_008F04_SWIZZLE_ENABLE(x) (((x) & 0x1) << 31)
#define G_008F04_SWIZZLE_ENABLE(x) (((x) >> 31) & 0x1)
#define C_008F04_SWIZZLE_ENABLE 0x7FFFFFFF
+#define R_030F08_DB_OCCLUSION_COUNT1_LOW 0x030F08
#define R_008F08_SQ_BUF_RSRC_WORD2 0x008F08
+#define R_030F0C_DB_OCCLUSION_COUNT1_HI 0x030F0C
+#define S_030F0C_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030F0C_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030F0C_COUNT_HI 0x80000000
#define R_008F0C_SQ_BUF_RSRC_WORD3 0x008F0C
#define S_008F0C_DST_SEL_X(x) (((x) & 0x07) << 0)
#define G_008F0C_DST_SEL_X(x) (((x) >> 0) & 0x07)
@@ -3862,7 +5137,12 @@
#define V_008F0C_SQ_RSRC_BUF_RSVD_1 0x01
#define V_008F0C_SQ_RSRC_BUF_RSVD_2 0x02
#define V_008F0C_SQ_RSRC_BUF_RSVD_3 0x03
+#define R_030F10_DB_OCCLUSION_COUNT2_LOW 0x030F10
#define R_008F10_SQ_IMG_RSRC_WORD0 0x008F10
+#define R_030F14_DB_OCCLUSION_COUNT2_HI 0x030F14
+#define S_030F14_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030F14_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030F14_COUNT_HI 0x80000000
#define R_008F14_SQ_IMG_RSRC_WORD1 0x008F14
#define S_008F14_BASE_ADDRESS_HI(x) (((x) & 0xFF) << 0)
#define G_008F14_BASE_ADDRESS_HI(x) (((x) >> 0) & 0xFF)
@@ -3961,6 +5241,7 @@
#define G_008F14_MTYPE(x) (((x) >> 30) & 0x03)
#define C_008F14_MTYPE 0x3FFFFFFF
/* */
+#define R_030F18_DB_OCCLUSION_COUNT3_LOW 0x030F18
#define R_008F18_SQ_IMG_RSRC_WORD2 0x008F18
#define S_008F18_WIDTH(x) (((x) & 0x3FFF) << 0)
#define G_008F18_WIDTH(x) (((x) >> 0) & 0x3FFF)
@@ -3974,6 +5255,10 @@
#define S_008F18_INTERLACED(x) (((x) & 0x1) << 31)
#define G_008F18_INTERLACED(x) (((x) >> 31) & 0x1)
#define C_008F18_INTERLACED 0x7FFFFFFF
+#define R_030F1C_DB_OCCLUSION_COUNT3_HI 0x030F1C
+#define S_030F1C_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030F1C_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030F1C_COUNT_HI 0x80000000
#define R_008F1C_SQ_IMG_RSRC_WORD3 0x008F1C
#define S_008F1C_DST_SEL_X(x) (((x) & 0x07) << 0)
#define G_008F1C_DST_SEL_X(x) (((x) >> 0) & 0x07)
@@ -4084,6 +5369,23 @@
#define G_008F28_LOD_HDW_CNT_EN(x) (((x) >> 20) & 0x1)
#define C_008F28_LOD_HDW_CNT_EN 0xFFEFFFFF
/* */
+/* VI */
+#define S_008F28_COMPRESSION_EN(x) (((x) & 0x1) << 21)
+#define G_008F28_COMPRESSION_EN(x) (((x) >> 21) & 0x1)
+#define C_008F28_COMPRESSION_EN 0xFFDFFFFF
+#define S_008F28_ALPHA_IS_ON_MSB(x) (((x) & 0x1) << 22)
+#define G_008F28_ALPHA_IS_ON_MSB(x) (((x) >> 22) & 0x1)
+#define C_008F28_ALPHA_IS_ON_MSB 0xFFBFFFFF
+#define S_008F28_COLOR_TRANSFORM(x) (((x) & 0x1) << 23)
+#define G_008F28_COLOR_TRANSFORM(x) (((x) >> 23) & 0x1)
+#define C_008F28_COLOR_TRANSFORM 0xFF7FFFFF
+#define S_008F28_LOST_ALPHA_BITS(x) (((x) & 0x0F) << 24)
+#define G_008F28_LOST_ALPHA_BITS(x) (((x) >> 24) & 0x0F)
+#define C_008F28_LOST_ALPHA_BITS 0xF0FFFFFF
+#define S_008F28_LOST_COLOR_BITS(x) (((x) & 0x0F) << 28)
+#define G_008F28_LOST_COLOR_BITS(x) (((x) >> 28) & 0x0F)
+#define C_008F28_LOST_COLOR_BITS 0x0FFFFFFF
+/* */
#define R_008F2C_SQ_IMG_RSRC_WORD7 0x008F2C
#define R_008F30_SQ_IMG_SAMP_WORD0 0x008F30
#define S_008F30_CLAMP_X(x) (((x) & 0x07) << 0)
@@ -4148,6 +5450,11 @@
#define S_008F30_FILTER_MODE(x) (((x) & 0x03) << 29)
#define G_008F30_FILTER_MODE(x) (((x) >> 29) & 0x03)
#define C_008F30_FILTER_MODE 0x9FFFFFFF
+/* VI */
+#define S_008F30_COMPAT_MODE(x) (((x) & 0x1) << 31)
+#define G_008F30_COMPAT_MODE(x) (((x) >> 31) & 0x1)
+#define C_008F30_COMPAT_MODE 0x7FFFFFFF
+/* */
#define R_008F34_SQ_IMG_SAMP_WORD1 0x008F34
#define S_008F34_MIN_LOD(x) (((x) & 0xFFF) << 0)
#define G_008F34_MIN_LOD(x) (((x) >> 0) & 0xFFF)
@@ -4313,6 +5620,11 @@
#define G_008F44_OFFSET(x) (((x) >> 0) & 0xFFFFFF)
#define C_008F44_OFFSET 0xFF000000
/* */
+#define R_030FF8_DB_ZPASS_COUNT_LOW 0x030FF8
+#define R_030FFC_DB_ZPASS_COUNT_HI 0x030FFC
+#define S_030FFC_COUNT_HI(x) (((x) & 0x7FFFFFFF) << 0)
+#define G_030FFC_COUNT_HI(x) (((x) >> 0) & 0x7FFFFFFF)
+#define C_030FFC_COUNT_HI 0x80000000
#define R_009100_SPI_CONFIG_CNTL 0x009100
#define S_009100_GPR_WRITE_PRIORITY(x) (((x) & 0x1FFFFF) << 0)
#define G_009100_GPR_WRITE_PRIORITY(x) (((x) >> 0) & 0x1FFFFF)
@@ -4437,6 +5749,34 @@
#define S_009858_MSAA16_Y(x) (((x) & 0x03) << 18)
#define G_009858_MSAA16_Y(x) (((x) >> 18) & 0x03)
#define C_009858_MSAA16_Y 0xFFF3FFFF
+#define R_0098F8_GB_ADDR_CONFIG 0x0098F8
+#define S_0098F8_NUM_PIPES(x) (((x) & 0x07) << 0)
+#define G_0098F8_NUM_PIPES(x) (((x) >> 0) & 0x07)
+#define C_0098F8_NUM_PIPES 0xFFFFFFF8
+#define S_0098F8_PIPE_INTERLEAVE_SIZE(x) (((x) & 0x07) << 4)
+#define G_0098F8_PIPE_INTERLEAVE_SIZE(x) (((x) >> 4) & 0x07)
+#define C_0098F8_PIPE_INTERLEAVE_SIZE 0xFFFFFF8F
+#define S_0098F8_BANK_INTERLEAVE_SIZE(x) (((x) & 0x07) << 8)
+#define G_0098F8_BANK_INTERLEAVE_SIZE(x) (((x) >> 8) & 0x07)
+#define C_0098F8_BANK_INTERLEAVE_SIZE 0xFFFFF8FF
+#define S_0098F8_NUM_SHADER_ENGINES(x) (((x) & 0x03) << 12)
+#define G_0098F8_NUM_SHADER_ENGINES(x) (((x) >> 12) & 0x03)
+#define C_0098F8_NUM_SHADER_ENGINES 0xFFFFCFFF
+#define S_0098F8_SHADER_ENGINE_TILE_SIZE(x) (((x) & 0x07) << 16)
+#define G_0098F8_SHADER_ENGINE_TILE_SIZE(x) (((x) >> 16) & 0x07)
+#define C_0098F8_SHADER_ENGINE_TILE_SIZE 0xFFF8FFFF
+#define S_0098F8_NUM_GPUS(x) (((x) & 0x07) << 20)
+#define G_0098F8_NUM_GPUS(x) (((x) >> 20) & 0x07)
+#define C_0098F8_NUM_GPUS 0xFF8FFFFF
+#define S_0098F8_MULTI_GPU_TILE_SIZE(x) (((x) & 0x03) << 24)
+#define G_0098F8_MULTI_GPU_TILE_SIZE(x) (((x) >> 24) & 0x03)
+#define C_0098F8_MULTI_GPU_TILE_SIZE 0xFCFFFFFF
+#define S_0098F8_ROW_SIZE(x) (((x) & 0x03) << 28)
+#define G_0098F8_ROW_SIZE(x) (((x) >> 28) & 0x03)
+#define C_0098F8_ROW_SIZE 0xCFFFFFFF
+#define S_0098F8_NUM_LOWER_PIPES(x) (((x) & 0x1) << 30)
+#define G_0098F8_NUM_LOWER_PIPES(x) (((x) >> 30) & 0x1)
+#define C_0098F8_NUM_LOWER_PIPES 0xBFFFFFFF
#define R_009910_GB_TILE_MODE0 0x009910
#define S_009910_MICRO_TILE_MODE(x) (((x) & 0x03) << 0)
#define G_009910_MICRO_TILE_MODE(x) (((x) >> 0) & 0x03)
@@ -4515,14 +5855,88 @@
#define V_009910_ADDR_SURF_4_BANK 0x01
#define V_009910_ADDR_SURF_8_BANK 0x02
#define V_009910_ADDR_SURF_16_BANK 0x03
-/* CIK */
#define S_009910_MICRO_TILE_MODE_NEW(x) (((x) & 0x07) << 22)
#define G_009910_MICRO_TILE_MODE_NEW(x) (((x) >> 22) & 0x07)
-#define C_009910_MICRO_TILE_MODE_NEW(x) 0xFE3FFFFF
+#define C_009910_MICRO_TILE_MODE_NEW 0xFE3FFFFF
#define V_009910_ADDR_SURF_DISPLAY_MICRO_TILING 0x00
#define V_009910_ADDR_SURF_THIN_MICRO_TILING 0x01
#define V_009910_ADDR_SURF_DEPTH_MICRO_TILING 0x02
#define V_009910_ADDR_SURF_ROTATED_MICRO_TILING 0x03
+#define S_009910_SAMPLE_SPLIT(x) (((x) & 0x03) << 25)
+#define G_009910_SAMPLE_SPLIT(x) (((x) >> 25) & 0x03)
+#define C_009910_SAMPLE_SPLIT 0xF9FFFFFF
+#define R_009914_GB_TILE_MODE1 0x009914
+#define R_009918_GB_TILE_MODE2 0x009918
+#define R_00991C_GB_TILE_MODE3 0x00991C
+#define R_009920_GB_TILE_MODE4 0x009920
+#define R_009924_GB_TILE_MODE5 0x009924
+#define R_009928_GB_TILE_MODE6 0x009928
+#define R_00992C_GB_TILE_MODE7 0x00992C
+#define R_009930_GB_TILE_MODE8 0x009930
+#define R_009934_GB_TILE_MODE9 0x009934
+#define R_009938_GB_TILE_MODE10 0x009938
+#define R_00993C_GB_TILE_MODE11 0x00993C
+#define R_009940_GB_TILE_MODE12 0x009940
+#define R_009944_GB_TILE_MODE13 0x009944
+#define R_009948_GB_TILE_MODE14 0x009948
+#define R_00994C_GB_TILE_MODE15 0x00994C
+#define R_009950_GB_TILE_MODE16 0x009950
+#define R_009954_GB_TILE_MODE17 0x009954
+#define R_009958_GB_TILE_MODE18 0x009958
+#define R_00995C_GB_TILE_MODE19 0x00995C
+#define R_009960_GB_TILE_MODE20 0x009960
+#define R_009964_GB_TILE_MODE21 0x009964
+#define R_009968_GB_TILE_MODE22 0x009968
+#define R_00996C_GB_TILE_MODE23 0x00996C
+#define R_009970_GB_TILE_MODE24 0x009970
+#define R_009974_GB_TILE_MODE25 0x009974
+#define R_009978_GB_TILE_MODE26 0x009978
+#define R_00997C_GB_TILE_MODE27 0x00997C
+#define R_009980_GB_TILE_MODE28 0x009980
+#define R_009984_GB_TILE_MODE29 0x009984
+#define R_009988_GB_TILE_MODE30 0x009988
+#define R_00998C_GB_TILE_MODE31 0x00998C
+/* CIK */
+#define R_009990_GB_MACROTILE_MODE0 0x009990
+#define S_009990_BANK_WIDTH(x) (((x) & 0x03) << 0)
+#define G_009990_BANK_WIDTH(x) (((x) >> 0) & 0x03)
+#define C_009990_BANK_WIDTH 0xFFFFFFFC
+#define S_009990_BANK_HEIGHT(x) (((x) & 0x03) << 2)
+#define G_009990_BANK_HEIGHT(x) (((x) >> 2) & 0x03)
+#define C_009990_BANK_HEIGHT 0xFFFFFFF3
+#define S_009990_MACRO_TILE_ASPECT(x) (((x) & 0x03) << 4)
+#define G_009990_MACRO_TILE_ASPECT(x) (((x) >> 4) & 0x03)
+#define C_009990_MACRO_TILE_ASPECT 0xFFFFFFCF
+#define S_009990_NUM_BANKS(x) (((x) & 0x03) << 6)
+#define G_009990_NUM_BANKS(x) (((x) >> 6) & 0x03)
+#define C_009990_NUM_BANKS 0xFFFFFF3F
+#define R_009994_GB_MACROTILE_MODE1 0x009994
+#define R_009998_GB_MACROTILE_MODE2 0x009998
+#define R_00999C_GB_MACROTILE_MODE3 0x00999C
+#define R_0099A0_GB_MACROTILE_MODE4 0x0099A0
+#define R_0099A4_GB_MACROTILE_MODE5 0x0099A4
+#define R_0099A8_GB_MACROTILE_MODE6 0x0099A8
+#define R_0099AC_GB_MACROTILE_MODE7 0x0099AC
+#define R_0099B0_GB_MACROTILE_MODE8 0x0099B0
+#define R_0099B4_GB_MACROTILE_MODE9 0x0099B4
+#define R_0099B8_GB_MACROTILE_MODE10 0x0099B8
+#define R_0099BC_GB_MACROTILE_MODE11 0x0099BC
+#define R_0099C0_GB_MACROTILE_MODE12 0x0099C0
+#define R_0099C4_GB_MACROTILE_MODE13 0x0099C4
+#define R_0099C8_GB_MACROTILE_MODE14 0x0099C8
+#define R_0099CC_GB_MACROTILE_MODE15 0x0099CC
+/* */
+#define R_00B000_SPI_SHADER_TBA_LO_PS 0x00B000
+#define R_00B004_SPI_SHADER_TBA_HI_PS 0x00B004
+#define S_00B004_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B004_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B004_MEM_BASE 0xFFFFFF00
+#define R_00B008_SPI_SHADER_TMA_LO_PS 0x00B008
+#define R_00B00C_SPI_SHADER_TMA_HI_PS 0x00B00C
+#define S_00B00C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B00C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B00C_MEM_BASE 0xFFFFFF00
+/* CIK */
#define R_00B01C_SPI_SHADER_PGM_RSRC3_PS 0x00B01C
#define S_00B01C_CU_EN(x) (((x) & 0xFFFF) << 0)
#define G_00B01C_CU_EN(x) (((x) >> 0) & 0xFFFF)
@@ -4582,6 +5996,9 @@
#define S_00B02C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B02C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B02C_USER_SGPR 0xFFFFFFC1
+#define S_00B02C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B02C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B02C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B02C_WAVE_CNT_EN(x) (((x) & 0x1) << 7)
#define G_00B02C_WAVE_CNT_EN(x) (((x) >> 7) & 0x1)
#define C_00B02C_WAVE_CNT_EN 0xFFFFFF7F
@@ -4591,6 +6008,9 @@
#define S_00B02C_EXCP_EN(x) (((x) & 0x7F) << 16) /* mask is 0x1FF on CIK */
#define G_00B02C_EXCP_EN(x) (((x) >> 16) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B02C_EXCP_EN 0xFF80FFFF /* mask is 0x1FF on CIK */
+#define S_00B02C_EXCP_EN_CIK(x) (((x) & 0x1FF) << 16)
+#define G_00B02C_EXCP_EN_CIK(x) (((x) >> 16) & 0x1FF)
+#define C_00B02C_EXCP_EN_CIK 0xFE00FFFF
#define R_00B030_SPI_SHADER_USER_DATA_PS_0 0x00B030
#define R_00B034_SPI_SHADER_USER_DATA_PS_1 0x00B034
#define R_00B038_SPI_SHADER_USER_DATA_PS_2 0x00B038
@@ -4607,6 +6027,16 @@
#define R_00B064_SPI_SHADER_USER_DATA_PS_13 0x00B064
#define R_00B068_SPI_SHADER_USER_DATA_PS_14 0x00B068
#define R_00B06C_SPI_SHADER_USER_DATA_PS_15 0x00B06C
+#define R_00B100_SPI_SHADER_TBA_LO_VS 0x00B100
+#define R_00B104_SPI_SHADER_TBA_HI_VS 0x00B104
+#define S_00B104_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B104_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B104_MEM_BASE 0xFFFFFF00
+#define R_00B108_SPI_SHADER_TMA_LO_VS 0x00B108
+#define R_00B10C_SPI_SHADER_TMA_HI_VS 0x00B10C
+#define S_00B10C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B10C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B10C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B118_SPI_SHADER_PGM_RSRC3_VS 0x00B118
#define S_00B118_CU_EN(x) (((x) & 0xFFFF) << 0)
@@ -4674,6 +6104,9 @@
#define S_00B12C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B12C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B12C_USER_SGPR 0xFFFFFFC1
+#define S_00B12C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B12C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B12C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B12C_OC_LDS_EN(x) (((x) & 0x1) << 7)
#define G_00B12C_OC_LDS_EN(x) (((x) >> 7) & 0x1)
#define C_00B12C_OC_LDS_EN 0xFFFFFF7F
@@ -4695,6 +6128,14 @@
#define S_00B12C_EXCP_EN(x) (((x) & 0x7F) << 13) /* mask is 0x1FF on CIK */
#define G_00B12C_EXCP_EN(x) (((x) >> 13) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B12C_EXCP_EN 0xFFF01FFF /* mask is 0x1FF on CIK */
+#define S_00B12C_EXCP_EN_CIK(x) (((x) & 0x1FF) << 13)
+#define G_00B12C_EXCP_EN_CIK(x) (((x) >> 13) & 0x1FF)
+#define C_00B12C_EXCP_EN_CIK 0xFFC01FFF
+/* VI */
+#define S_00B12C_DISPATCH_DRAW_EN(x) (((x) & 0x1) << 24)
+#define G_00B12C_DISPATCH_DRAW_EN(x) (((x) >> 24) & 0x1)
+#define C_00B12C_DISPATCH_DRAW_EN 0xFEFFFFFF
+/* */
#define R_00B130_SPI_SHADER_USER_DATA_VS_0 0x00B130
#define R_00B134_SPI_SHADER_USER_DATA_VS_1 0x00B134
#define R_00B138_SPI_SHADER_USER_DATA_VS_2 0x00B138
@@ -4711,6 +6152,16 @@
#define R_00B164_SPI_SHADER_USER_DATA_VS_13 0x00B164
#define R_00B168_SPI_SHADER_USER_DATA_VS_14 0x00B168
#define R_00B16C_SPI_SHADER_USER_DATA_VS_15 0x00B16C
+#define R_00B200_SPI_SHADER_TBA_LO_GS 0x00B200
+#define R_00B204_SPI_SHADER_TBA_HI_GS 0x00B204
+#define S_00B204_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B204_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B204_MEM_BASE 0xFFFFFF00
+#define R_00B208_SPI_SHADER_TMA_LO_GS 0x00B208
+#define R_00B20C_SPI_SHADER_TMA_HI_GS 0x00B20C
+#define S_00B20C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B20C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B20C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B21C_SPI_SHADER_PGM_RSRC3_GS 0x00B21C
#define S_00B21C_CU_EN(x) (((x) & 0xFFFF) << 0)
@@ -4723,6 +6174,11 @@
#define G_00B21C_LOCK_LOW_THRESHOLD(x) (((x) >> 22) & 0x0F)
#define C_00B21C_LOCK_LOW_THRESHOLD 0xFC3FFFFF
/* */
+/* VI */
+#define S_00B21C_GROUP_FIFO_DEPTH(x) (((x) & 0x3F) << 26)
+#define G_00B21C_GROUP_FIFO_DEPTH(x) (((x) >> 26) & 0x3F)
+#define C_00B21C_GROUP_FIFO_DEPTH 0x03FFFFFF
+/* */
#define R_00B220_SPI_SHADER_PGM_LO_GS 0x00B220
#define R_00B224_SPI_SHADER_PGM_HI_GS 0x00B224
#define S_00B224_MEM_BASE(x) (((x) & 0xFF) << 0)
@@ -4771,10 +6227,41 @@
#define S_00B22C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B22C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B22C_USER_SGPR 0xFFFFFFC1
+#define S_00B22C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B22C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B22C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B22C_EXCP_EN(x) (((x) & 0x7F) << 7) /* mask is 0x1FF on CIK */
#define G_00B22C_EXCP_EN(x) (((x) >> 7) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B22C_EXCP_EN 0xFFFFC07F /* mask is 0x1FF on CIK */
+#define S_00B22C_EXCP_EN_CIK(x) (((x) & 0x1FF) << 7)
+#define G_00B22C_EXCP_EN_CIK(x) (((x) >> 7) & 0x1FF)
+#define C_00B22C_EXCP_EN_CIK 0xFFFF007F
#define R_00B230_SPI_SHADER_USER_DATA_GS_0 0x00B230
+#define R_00B234_SPI_SHADER_USER_DATA_GS_1 0x00B234
+#define R_00B238_SPI_SHADER_USER_DATA_GS_2 0x00B238
+#define R_00B23C_SPI_SHADER_USER_DATA_GS_3 0x00B23C
+#define R_00B240_SPI_SHADER_USER_DATA_GS_4 0x00B240
+#define R_00B244_SPI_SHADER_USER_DATA_GS_5 0x00B244
+#define R_00B248_SPI_SHADER_USER_DATA_GS_6 0x00B248
+#define R_00B24C_SPI_SHADER_USER_DATA_GS_7 0x00B24C
+#define R_00B250_SPI_SHADER_USER_DATA_GS_8 0x00B250
+#define R_00B254_SPI_SHADER_USER_DATA_GS_9 0x00B254
+#define R_00B258_SPI_SHADER_USER_DATA_GS_10 0x00B258
+#define R_00B25C_SPI_SHADER_USER_DATA_GS_11 0x00B25C
+#define R_00B260_SPI_SHADER_USER_DATA_GS_12 0x00B260
+#define R_00B264_SPI_SHADER_USER_DATA_GS_13 0x00B264
+#define R_00B268_SPI_SHADER_USER_DATA_GS_14 0x00B268
+#define R_00B26C_SPI_SHADER_USER_DATA_GS_15 0x00B26C
+#define R_00B300_SPI_SHADER_TBA_LO_ES 0x00B300
+#define R_00B304_SPI_SHADER_TBA_HI_ES 0x00B304
+#define S_00B304_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B304_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B304_MEM_BASE 0xFFFFFF00
+#define R_00B308_SPI_SHADER_TMA_LO_ES 0x00B308
+#define R_00B30C_SPI_SHADER_TMA_HI_ES 0x00B30C
+#define S_00B30C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B30C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B30C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B31C_SPI_SHADER_PGM_RSRC3_ES 0x00B31C
#define S_00B31C_CU_EN(x) (((x) & 0xFFFF) << 0)
@@ -4787,6 +6274,11 @@
#define G_00B31C_LOCK_LOW_THRESHOLD(x) (((x) >> 22) & 0x0F)
#define C_00B31C_LOCK_LOW_THRESHOLD 0xFC3FFFFF
/* */
+/* VI */
+#define S_00B31C_GROUP_FIFO_DEPTH(x) (((x) & 0x3F) << 26)
+#define G_00B31C_GROUP_FIFO_DEPTH(x) (((x) >> 26) & 0x3F)
+#define C_00B31C_GROUP_FIFO_DEPTH 0x03FFFFFF
+/* */
#define R_00B320_SPI_SHADER_PGM_LO_ES 0x00B320
#define R_00B324_SPI_SHADER_PGM_HI_ES 0x00B324
#define S_00B324_MEM_BASE(x) (((x) & 0xFF) << 0)
@@ -4838,6 +6330,9 @@
#define S_00B32C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B32C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B32C_USER_SGPR 0xFFFFFFC1
+#define S_00B32C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B32C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B32C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B32C_OC_LDS_EN(x) (((x) & 0x1) << 7)
#define G_00B32C_OC_LDS_EN(x) (((x) >> 7) & 0x1)
#define C_00B32C_OC_LDS_EN 0xFFFFFF7F
@@ -4848,6 +6343,31 @@
#define G_00B32C_LDS_SIZE(x) (((x) >> 20) & 0x1FF) /* CIK, for on-chip GS */
#define C_00B32C_LDS_SIZE 0xE00FFFFF /* CIK, for on-chip GS */
#define R_00B330_SPI_SHADER_USER_DATA_ES_0 0x00B330
+#define R_00B334_SPI_SHADER_USER_DATA_ES_1 0x00B334
+#define R_00B338_SPI_SHADER_USER_DATA_ES_2 0x00B338
+#define R_00B33C_SPI_SHADER_USER_DATA_ES_3 0x00B33C
+#define R_00B340_SPI_SHADER_USER_DATA_ES_4 0x00B340
+#define R_00B344_SPI_SHADER_USER_DATA_ES_5 0x00B344
+#define R_00B348_SPI_SHADER_USER_DATA_ES_6 0x00B348
+#define R_00B34C_SPI_SHADER_USER_DATA_ES_7 0x00B34C
+#define R_00B350_SPI_SHADER_USER_DATA_ES_8 0x00B350
+#define R_00B354_SPI_SHADER_USER_DATA_ES_9 0x00B354
+#define R_00B358_SPI_SHADER_USER_DATA_ES_10 0x00B358
+#define R_00B35C_SPI_SHADER_USER_DATA_ES_11 0x00B35C
+#define R_00B360_SPI_SHADER_USER_DATA_ES_12 0x00B360
+#define R_00B364_SPI_SHADER_USER_DATA_ES_13 0x00B364
+#define R_00B368_SPI_SHADER_USER_DATA_ES_14 0x00B368
+#define R_00B36C_SPI_SHADER_USER_DATA_ES_15 0x00B36C
+#define R_00B400_SPI_SHADER_TBA_LO_HS 0x00B400
+#define R_00B404_SPI_SHADER_TBA_HI_HS 0x00B404
+#define S_00B404_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B404_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B404_MEM_BASE 0xFFFFFF00
+#define R_00B408_SPI_SHADER_TMA_LO_HS 0x00B408
+#define R_00B40C_SPI_SHADER_TMA_HI_HS 0x00B40C
+#define S_00B40C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B40C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B40C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B41C_SPI_SHADER_PGM_RSRC3_HS 0x00B41C
#define S_00B41C_WAVE_LIMIT(x) (((x) & 0x3F) << 0)
@@ -4857,6 +6377,11 @@
#define G_00B41C_LOCK_LOW_THRESHOLD(x) (((x) >> 6) & 0x0F)
#define C_00B41C_LOCK_LOW_THRESHOLD 0xFFFFFC3F
/* */
+/* VI */
+#define S_00B41C_GROUP_FIFO_DEPTH(x) (((x) & 0x3F) << 10)
+#define G_00B41C_GROUP_FIFO_DEPTH(x) (((x) >> 10) & 0x3F)
+#define C_00B41C_GROUP_FIFO_DEPTH 0xFFFF03FF
+/* */
#define R_00B420_SPI_SHADER_PGM_LO_HS 0x00B420
#define R_00B424_SPI_SHADER_PGM_HI_HS 0x00B424
#define S_00B424_MEM_BASE(x) (((x) & 0xFF) << 0)
@@ -4902,6 +6427,9 @@
#define S_00B42C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B42C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B42C_USER_SGPR 0xFFFFFFC1
+#define S_00B42C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B42C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B42C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B42C_OC_LDS_EN(x) (((x) & 0x1) << 7)
#define G_00B42C_OC_LDS_EN(x) (((x) >> 7) & 0x1)
#define C_00B42C_OC_LDS_EN 0xFFFFFF7F
@@ -4912,6 +6440,31 @@
#define G_00B42C_EXCP_EN(x) (((x) >> 9) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B42C_EXCP_EN 0xFFFF01FF /* mask is 0x1FF on CIK */
#define R_00B430_SPI_SHADER_USER_DATA_HS_0 0x00B430
+#define R_00B434_SPI_SHADER_USER_DATA_HS_1 0x00B434
+#define R_00B438_SPI_SHADER_USER_DATA_HS_2 0x00B438
+#define R_00B43C_SPI_SHADER_USER_DATA_HS_3 0x00B43C
+#define R_00B440_SPI_SHADER_USER_DATA_HS_4 0x00B440
+#define R_00B444_SPI_SHADER_USER_DATA_HS_5 0x00B444
+#define R_00B448_SPI_SHADER_USER_DATA_HS_6 0x00B448
+#define R_00B44C_SPI_SHADER_USER_DATA_HS_7 0x00B44C
+#define R_00B450_SPI_SHADER_USER_DATA_HS_8 0x00B450
+#define R_00B454_SPI_SHADER_USER_DATA_HS_9 0x00B454
+#define R_00B458_SPI_SHADER_USER_DATA_HS_10 0x00B458
+#define R_00B45C_SPI_SHADER_USER_DATA_HS_11 0x00B45C
+#define R_00B460_SPI_SHADER_USER_DATA_HS_12 0x00B460
+#define R_00B464_SPI_SHADER_USER_DATA_HS_13 0x00B464
+#define R_00B468_SPI_SHADER_USER_DATA_HS_14 0x00B468
+#define R_00B46C_SPI_SHADER_USER_DATA_HS_15 0x00B46C
+#define R_00B500_SPI_SHADER_TBA_LO_LS 0x00B500
+#define R_00B504_SPI_SHADER_TBA_HI_LS 0x00B504
+#define S_00B504_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B504_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B504_MEM_BASE 0xFFFFFF00
+#define R_00B508_SPI_SHADER_TMA_LO_LS 0x00B508
+#define R_00B50C_SPI_SHADER_TMA_HI_LS 0x00B50C
+#define S_00B50C_MEM_BASE(x) (((x) & 0xFF) << 0)
+#define G_00B50C_MEM_BASE(x) (((x) >> 0) & 0xFF)
+#define C_00B50C_MEM_BASE 0xFFFFFF00
/* CIK */
#define R_00B51C_SPI_SHADER_PGM_RSRC3_LS 0x00B51C
#define S_00B51C_CU_EN(x) (((x) & 0xFFFF) << 0)
@@ -4924,6 +6477,11 @@
#define G_00B51C_LOCK_LOW_THRESHOLD(x) (((x) >> 22) & 0x0F)
#define C_00B51C_LOCK_LOW_THRESHOLD 0xFC3FFFFF
/* */
+/* VI */
+#define S_00B51C_GROUP_FIFO_DEPTH(x) (((x) & 0x3F) << 26)
+#define G_00B51C_GROUP_FIFO_DEPTH(x) (((x) >> 26) & 0x3F)
+#define C_00B51C_GROUP_FIFO_DEPTH 0x03FFFFFF
+/* */
#define R_00B520_SPI_SHADER_PGM_LO_LS 0x00B520
#define R_00B524_SPI_SHADER_PGM_HI_LS 0x00B524
#define S_00B524_MEM_BASE(x) (((x) & 0xFF) << 0)
@@ -4972,6 +6530,9 @@
#define S_00B52C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B52C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B52C_USER_SGPR 0xFFFFFFC1
+#define S_00B52C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B52C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B52C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B52C_LDS_SIZE(x) (((x) & 0x1FF) << 7)
#define G_00B52C_LDS_SIZE(x) (((x) >> 7) & 0x1FF)
#define C_00B52C_LDS_SIZE 0xFFFF007F
@@ -4979,6 +6540,21 @@
#define G_00B52C_EXCP_EN(x) (((x) >> 16) & 0x7F) /* mask is 0x1FF on CIK */
#define C_00B52C_EXCP_EN 0xFF80FFFF /* mask is 0x1FF on CIK */
#define R_00B530_SPI_SHADER_USER_DATA_LS_0 0x00B530
+#define R_00B534_SPI_SHADER_USER_DATA_LS_1 0x00B534
+#define R_00B538_SPI_SHADER_USER_DATA_LS_2 0x00B538
+#define R_00B53C_SPI_SHADER_USER_DATA_LS_3 0x00B53C
+#define R_00B540_SPI_SHADER_USER_DATA_LS_4 0x00B540
+#define R_00B544_SPI_SHADER_USER_DATA_LS_5 0x00B544
+#define R_00B548_SPI_SHADER_USER_DATA_LS_6 0x00B548
+#define R_00B54C_SPI_SHADER_USER_DATA_LS_7 0x00B54C
+#define R_00B550_SPI_SHADER_USER_DATA_LS_8 0x00B550
+#define R_00B554_SPI_SHADER_USER_DATA_LS_9 0x00B554
+#define R_00B558_SPI_SHADER_USER_DATA_LS_10 0x00B558
+#define R_00B55C_SPI_SHADER_USER_DATA_LS_11 0x00B55C
+#define R_00B560_SPI_SHADER_USER_DATA_LS_12 0x00B560
+#define R_00B564_SPI_SHADER_USER_DATA_LS_13 0x00B564
+#define R_00B568_SPI_SHADER_USER_DATA_LS_14 0x00B568
+#define R_00B56C_SPI_SHADER_USER_DATA_LS_15 0x00B56C
#define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800
#define S_00B800_COMPUTE_SHADER_EN(x) (((x) & 0x1) << 0)
#define G_00B800_COMPUTE_SHADER_EN(x) (((x) >> 0) & 0x1)
@@ -5049,6 +6625,16 @@
#define S_00B82C_MAX_WAVE_ID(x) (((x) & 0xFFF) << 0)
#define G_00B82C_MAX_WAVE_ID(x) (((x) >> 0) & 0xFFF)
#define C_00B82C_MAX_WAVE_ID 0xFFFFF000
+/* CIK */
+#define R_00B828_COMPUTE_PIPELINESTAT_ENABLE 0x00B828
+#define S_00B828_PIPELINESTAT_ENABLE(x) (((x) & 0x1) << 0)
+#define G_00B828_PIPELINESTAT_ENABLE(x) (((x) >> 0) & 0x1)
+#define C_00B828_PIPELINESTAT_ENABLE 0xFFFFFFFE
+#define R_00B82C_COMPUTE_PERFCOUNT_ENABLE 0x00B82C
+#define S_00B82C_PERFCOUNT_ENABLE(x) (((x) & 0x1) << 0)
+#define G_00B82C_PERFCOUNT_ENABLE(x) (((x) >> 0) & 0x1)
+#define C_00B82C_PERFCOUNT_ENABLE 0xFFFFFFFE
+/* */
#define R_00B830_COMPUTE_PGM_LO 0x00B830
#define R_00B834_COMPUTE_PGM_HI 0x00B834
#define S_00B834_DATA(x) (((x) & 0xFF) << 0)
@@ -5059,6 +6645,16 @@
#define G_00B834_INST_ATC(x) (((x) >> 8) & 0x1)
#define C_00B834_INST_ATC 0xFFFFFEFF
/* */
+#define R_00B838_COMPUTE_TBA_LO 0x00B838
+#define R_00B83C_COMPUTE_TBA_HI 0x00B83C
+#define S_00B83C_DATA(x) (((x) & 0xFF) << 0)
+#define G_00B83C_DATA(x) (((x) >> 0) & 0xFF)
+#define C_00B83C_DATA 0xFFFFFF00
+#define R_00B840_COMPUTE_TMA_LO 0x00B840
+#define R_00B844_COMPUTE_TMA_HI 0x00B844
+#define S_00B844_DATA(x) (((x) & 0xFF) << 0)
+#define G_00B844_DATA(x) (((x) >> 0) & 0xFF)
+#define C_00B844_DATA 0xFFFFFF00
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0)
#define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F)
@@ -5099,6 +6695,9 @@
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B84C_USER_SGPR 0xFFFFFFC1
+#define S_00B84C_TRAP_PRESENT(x) (((x) & 0x1) << 6)
+#define G_00B84C_TRAP_PRESENT(x) (((x) >> 6) & 0x1)
+#define C_00B84C_TRAP_PRESENT 0xFFFFFFBF
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
#define C_00B84C_TGID_X_EN 0xFFFFFF7F
@@ -5125,6 +6724,10 @@
#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
#define C_00B84C_EXCP_EN 0x80FFFFFF
+#define R_00B850_COMPUTE_VMID 0x00B850
+#define S_00B850_DATA(x) (((x) & 0x0F) << 0)
+#define G_00B850_DATA(x) (((x) >> 0) & 0x0F)
+#define C_00B850_DATA 0xFFFFFFF0
#define R_00B854_COMPUTE_RESOURCE_LIMITS 0x00B854
#define S_00B854_WAVES_PER_SH(x) (((x) & 0x3F) << 0) /* mask is 0x3FF on CIK */
#define G_00B854_WAVES_PER_SH(x) (((x) >> 0) & 0x3F) /* mask is 0x3FF on CIK */
@@ -5167,7 +6770,84 @@
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
#define G_00B860_WAVESIZE(x) (((x) >> 12) & 0x1FFF)
#define C_00B860_WAVESIZE 0xFE000FFF
+/* CIK */
+#define R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2 0x00B864
+#define S_00B864_SH0_CU_EN(x) (((x) & 0xFFFF) << 0)
+#define G_00B864_SH0_CU_EN(x) (((x) >> 0) & 0xFFFF)
+#define C_00B864_SH0_CU_EN 0xFFFF0000
+#define S_00B864_SH1_CU_EN(x) (((x) & 0xFFFF) << 16)
+#define G_00B864_SH1_CU_EN(x) (((x) >> 16) & 0xFFFF)
+#define C_00B864_SH1_CU_EN 0x0000FFFF
+#define R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3 0x00B868
+#define S_00B868_SH0_CU_EN(x) (((x) & 0xFFFF) << 0)
+#define G_00B868_SH0_CU_EN(x) (((x) >> 0) & 0xFFFF)
+#define C_00B868_SH0_CU_EN 0xFFFF0000
+#define S_00B868_SH1_CU_EN(x) (((x) & 0xFFFF) << 16)
+#define G_00B868_SH1_CU_EN(x) (((x) >> 16) & 0xFFFF)
+#define C_00B868_SH1_CU_EN 0x0000FFFF
+#define R_00B86C_COMPUTE_RESTART_X 0x00B86C
+#define R_00B870_COMPUTE_RESTART_Y 0x00B870
+#define R_00B874_COMPUTE_RESTART_Z 0x00B874
+#define R_00B87C_COMPUTE_MISC_RESERVED 0x00B87C
+#define S_00B87C_SEND_SEID(x) (((x) & 0x03) << 0)
+#define G_00B87C_SEND_SEID(x) (((x) >> 0) & 0x03)
+#define C_00B87C_SEND_SEID 0xFFFFFFFC
+#define S_00B87C_RESERVED2(x) (((x) & 0x1) << 2)
+#define G_00B87C_RESERVED2(x) (((x) >> 2) & 0x1)
+#define C_00B87C_RESERVED2 0xFFFFFFFB
+#define S_00B87C_RESERVED3(x) (((x) & 0x1) << 3)
+#define G_00B87C_RESERVED3(x) (((x) >> 3) & 0x1)
+#define C_00B87C_RESERVED3 0xFFFFFFF7
+#define S_00B87C_RESERVED4(x) (((x) & 0x1) << 4)
+#define G_00B87C_RESERVED4(x) (((x) >> 4) & 0x1)
+#define C_00B87C_RESERVED4 0xFFFFFFEF
+/* VI */
+#define S_00B87C_WAVE_ID_BASE(x) (((x) & 0xFFF) << 5)
+#define G_00B87C_WAVE_ID_BASE(x) (((x) >> 5) & 0xFFF)
+#define C_00B87C_WAVE_ID_BASE 0xFFFE001F
+#define R_00B880_COMPUTE_DISPATCH_ID 0x00B880
+#define R_00B884_COMPUTE_THREADGROUP_ID 0x00B884
+#define R_00B888_COMPUTE_RELAUNCH 0x00B888
+#define S_00B888_PAYLOAD(x) (((x) & 0x3FFFFFFF) << 0)
+#define G_00B888_PAYLOAD(x) (((x) >> 0) & 0x3FFFFFFF)
+#define C_00B888_PAYLOAD 0xC0000000
+#define S_00B888_IS_EVENT(x) (((x) & 0x1) << 30)
+#define G_00B888_IS_EVENT(x) (((x) >> 30) & 0x1)
+#define C_00B888_IS_EVENT 0xBFFFFFFF
+#define S_00B888_IS_STATE(x) (((x) & 0x1) << 31)
+#define G_00B888_IS_STATE(x) (((x) >> 31) & 0x1)
+#define C_00B888_IS_STATE 0x7FFFFFFF
+#define R_00B88C_COMPUTE_WAVE_RESTORE_ADDR_LO 0x00B88C
+#define R_00B890_COMPUTE_WAVE_RESTORE_ADDR_HI 0x00B890
+#define S_00B890_ADDR(x) (((x) & 0xFFFF) << 0)
+#define G_00B890_ADDR(x) (((x) >> 0) & 0xFFFF)
+#define C_00B890_ADDR 0xFFFF0000
+#define R_00B894_COMPUTE_WAVE_RESTORE_CONTROL 0x00B894
+#define S_00B894_ATC(x) (((x) & 0x1) << 0)
+#define G_00B894_ATC(x) (((x) >> 0) & 0x1)
+#define C_00B894_ATC 0xFFFFFFFE
+#define S_00B894_MTYPE(x) (((x) & 0x03) << 1)
+#define G_00B894_MTYPE(x) (((x) >> 1) & 0x03)
+#define C_00B894_MTYPE 0xFFFFFFF9
+/* */
+/* */
#define R_00B900_COMPUTE_USER_DATA_0 0x00B900
+#define R_00B904_COMPUTE_USER_DATA_1 0x00B904
+#define R_00B908_COMPUTE_USER_DATA_2 0x00B908
+#define R_00B90C_COMPUTE_USER_DATA_3 0x00B90C
+#define R_00B910_COMPUTE_USER_DATA_4 0x00B910
+#define R_00B914_COMPUTE_USER_DATA_5 0x00B914
+#define R_00B918_COMPUTE_USER_DATA_6 0x00B918
+#define R_00B91C_COMPUTE_USER_DATA_7 0x00B91C
+#define R_00B920_COMPUTE_USER_DATA_8 0x00B920
+#define R_00B924_COMPUTE_USER_DATA_9 0x00B924
+#define R_00B928_COMPUTE_USER_DATA_10 0x00B928
+#define R_00B92C_COMPUTE_USER_DATA_11 0x00B92C
+#define R_00B930_COMPUTE_USER_DATA_12 0x00B930
+#define R_00B934_COMPUTE_USER_DATA_13 0x00B934
+#define R_00B938_COMPUTE_USER_DATA_14 0x00B938
+#define R_00B93C_COMPUTE_USER_DATA_15 0x00B93C
+#define R_00B9FC_COMPUTE_NOWHERE 0x00B9FC
#define R_028000_DB_RENDER_CONTROL 0x028000
#define S_028000_DEPTH_CLEAR_ENABLE(x) (((x) & 0x1) << 0)
#define G_028000_DEPTH_CLEAR_ENABLE(x) (((x) >> 0) & 0x1)
@@ -5196,6 +6876,11 @@
#define S_028000_COPY_SAMPLE(x) (((x) & 0x0F) << 8)
#define G_028000_COPY_SAMPLE(x) (((x) >> 8) & 0x0F)
#define C_028000_COPY_SAMPLE 0xFFFFF0FF
+/* VI */
+#define S_028000_DECOMPRESS_ENABLE(x) (((x) & 0x1) << 12)
+#define G_028000_DECOMPRESS_ENABLE(x) (((x) >> 12) & 0x1)
+#define C_028000_DECOMPRESS_ENABLE 0xFFFFEFFF
+/* */
#define R_028004_DB_COUNT_CONTROL 0x028004
#define S_028004_ZPASS_INCREMENT_DISABLE(x) (((x) & 0x1) << 0)
#define G_028004_ZPASS_INCREMENT_DISABLE(x) (((x) >> 0) & 0x1)
@@ -5474,9 +7159,6 @@
#define S_028040_NUM_SAMPLES(x) (((x) & 0x03) << 2)
#define G_028040_NUM_SAMPLES(x) (((x) >> 2) & 0x03)
#define C_028040_NUM_SAMPLES 0xFFFFFFF3
-#define S_028040_TILE_MODE_INDEX(x) (((x) & 0x07) << 20) /* not on CIK */
-#define G_028040_TILE_MODE_INDEX(x) (((x) >> 20) & 0x07) /* not on CIK */
-#define C_028040_TILE_MODE_INDEX 0xFF8FFFFF /* not on CIK */
/* CIK */
#define S_028040_TILE_SPLIT(x) (((x) & 0x07) << 13)
#define G_028040_TILE_SPLIT(x) (((x) >> 13) & 0x07)
@@ -5489,6 +7171,14 @@
#define V_028040_ADDR_SURF_TILE_SPLIT_2KB 0x05
#define V_028040_ADDR_SURF_TILE_SPLIT_4KB 0x06
/* */
+#define S_028040_TILE_MODE_INDEX(x) (((x) & 0x07) << 20) /* not on CIK */
+#define G_028040_TILE_MODE_INDEX(x) (((x) >> 20) & 0x07) /* not on CIK */
+#define C_028040_TILE_MODE_INDEX 0xFF8FFFFF /* not on CIK */
+/* VI */
+#define S_028040_DECOMPRESS_ON_N_ZPLANES(x) (((x) & 0x0F) << 23)
+#define G_028040_DECOMPRESS_ON_N_ZPLANES(x) (((x) >> 23) & 0x0F)
+#define C_028040_DECOMPRESS_ON_N_ZPLANES 0xF87FFFFF
+/* */
#define S_028040_ALLOW_EXPCLEAR(x) (((x) & 0x1) << 27)
#define G_028040_ALLOW_EXPCLEAR(x) (((x) >> 27) & 0x1)
#define C_028040_ALLOW_EXPCLEAR 0xF7FFFFFF
@@ -5498,6 +7188,11 @@
#define S_028040_TILE_SURFACE_ENABLE(x) (((x) & 0x1) << 29)
#define G_028040_TILE_SURFACE_ENABLE(x) (((x) >> 29) & 0x1)
#define C_028040_TILE_SURFACE_ENABLE 0xDFFFFFFF
+/* VI */
+#define S_028040_CLEAR_DISALLOWED(x) (((x) & 0x1) << 30)
+#define G_028040_CLEAR_DISALLOWED(x) (((x) >> 30) & 0x1)
+#define C_028040_CLEAR_DISALLOWED 0xBFFFFFFF
+/* */
#define S_028040_ZRANGE_PRECISION(x) (((x) & 0x1) << 31)
#define G_028040_ZRANGE_PRECISION(x) (((x) >> 31) & 0x1)
#define C_028040_ZRANGE_PRECISION 0x7FFFFFFF
@@ -5507,9 +7202,6 @@
#define C_028044_FORMAT 0xFFFFFFFE
#define V_028044_STENCIL_INVALID 0x00
#define V_028044_STENCIL_8 0x01
-#define S_028044_TILE_MODE_INDEX(x) (((x) & 0x07) << 20) /* not on CIK */
-#define G_028044_TILE_MODE_INDEX(x) (((x) >> 20) & 0x07) /* not on CIK */
-#define C_028044_TILE_MODE_INDEX 0xFF8FFFFF /* not on CIK */
/* CIK */
#define S_028044_TILE_SPLIT(x) (((x) & 0x07) << 13)
#define G_028044_TILE_SPLIT(x) (((x) >> 13) & 0x07)
@@ -5522,12 +7214,20 @@
#define V_028044_ADDR_SURF_TILE_SPLIT_2KB 0x05
#define V_028044_ADDR_SURF_TILE_SPLIT_4KB 0x06
/* */
+#define S_028044_TILE_MODE_INDEX(x) (((x) & 0x07) << 20) /* not on CIK */
+#define G_028044_TILE_MODE_INDEX(x) (((x) >> 20) & 0x07) /* not on CIK */
+#define C_028044_TILE_MODE_INDEX 0xFF8FFFFF /* not on CIK */
#define S_028044_ALLOW_EXPCLEAR(x) (((x) & 0x1) << 27)
#define G_028044_ALLOW_EXPCLEAR(x) (((x) >> 27) & 0x1)
#define C_028044_ALLOW_EXPCLEAR 0xF7FFFFFF
#define S_028044_TILE_STENCIL_DISABLE(x) (((x) & 0x1) << 29)
#define G_028044_TILE_STENCIL_DISABLE(x) (((x) >> 29) & 0x1)
#define C_028044_TILE_STENCIL_DISABLE 0xDFFFFFFF
+/* VI */
+#define S_028044_CLEAR_DISALLOWED(x) (((x) & 0x1) << 30)
+#define G_028044_CLEAR_DISALLOWED(x) (((x) >> 30) & 0x1)
+#define C_028044_CLEAR_DISALLOWED 0xBFFFFFFF
+/* */
#define R_028048_DB_Z_READ_BASE 0x028048
#define R_02804C_DB_STENCIL_READ_BASE 0x02804C
#define R_028050_DB_Z_WRITE_BASE 0x028050
@@ -5549,7 +7249,13 @@
#define S_028084_ADDRESS(x) (((x) & 0xFF) << 0)
#define G_028084_ADDRESS(x) (((x) >> 0) & 0xFF)
#define C_028084_ADDRESS 0xFFFFFF00
-/* */
+#define R_0281E8_COHER_DEST_BASE_HI_0 0x0281E8
+#define R_0281EC_COHER_DEST_BASE_HI_1 0x0281EC
+#define R_0281F0_COHER_DEST_BASE_HI_2 0x0281F0
+#define R_0281F4_COHER_DEST_BASE_HI_3 0x0281F4
+/* */
+#define R_0281F8_COHER_DEST_BASE_2 0x0281F8
+#define R_0281FC_COHER_DEST_BASE_3 0x0281FC
#define R_028200_PA_SC_WINDOW_OFFSET 0x028200
#define S_028200_WINDOW_X_OFFSET(x) (((x) & 0xFFFF) << 0)
#define G_028200_WINDOW_X_OFFSET(x) (((x) >> 0) & 0xFFFF)
@@ -5694,6 +7400,8 @@
#define S_028244_BR_Y(x) (((x) & 0x7FFF) << 16)
#define G_028244_BR_Y(x) (((x) >> 16) & 0x7FFF)
#define C_028244_BR_Y 0x8000FFFF
+#define R_028248_COHER_DEST_BASE_0 0x028248
+#define R_02824C_COHER_DEST_BASE_1 0x02824C
#define R_028250_PA_SC_VPORT_SCISSOR_0_TL 0x028250
#define S_028250_TL_X(x) (((x) & 0x7FFF) << 0)
#define G_028250_TL_X(x) (((x) >> 0) & 0x7FFF)
@@ -5711,8 +7419,68 @@
#define S_028254_BR_Y(x) (((x) & 0x7FFF) << 16)
#define G_028254_BR_Y(x) (((x) >> 16) & 0x7FFF)
#define C_028254_BR_Y 0x8000FFFF
+#define R_028258_PA_SC_VPORT_SCISSOR_1_TL 0x028258
+#define R_02825C_PA_SC_VPORT_SCISSOR_1_BR 0x02825C
+#define R_028260_PA_SC_VPORT_SCISSOR_2_TL 0x028260
+#define R_028264_PA_SC_VPORT_SCISSOR_2_BR 0x028264
+#define R_028268_PA_SC_VPORT_SCISSOR_3_TL 0x028268
+#define R_02826C_PA_SC_VPORT_SCISSOR_3_BR 0x02826C
+#define R_028270_PA_SC_VPORT_SCISSOR_4_TL 0x028270
+#define R_028274_PA_SC_VPORT_SCISSOR_4_BR 0x028274
+#define R_028278_PA_SC_VPORT_SCISSOR_5_TL 0x028278
+#define R_02827C_PA_SC_VPORT_SCISSOR_5_BR 0x02827C
+#define R_028280_PA_SC_VPORT_SCISSOR_6_TL 0x028280
+#define R_028284_PA_SC_VPORT_SCISSOR_6_BR 0x028284
+#define R_028288_PA_SC_VPORT_SCISSOR_7_TL 0x028288
+#define R_02828C_PA_SC_VPORT_SCISSOR_7_BR 0x02828C
+#define R_028290_PA_SC_VPORT_SCISSOR_8_TL 0x028290
+#define R_028294_PA_SC_VPORT_SCISSOR_8_BR 0x028294
+#define R_028298_PA_SC_VPORT_SCISSOR_9_TL 0x028298
+#define R_02829C_PA_SC_VPORT_SCISSOR_9_BR 0x02829C
+#define R_0282A0_PA_SC_VPORT_SCISSOR_10_TL 0x0282A0
+#define R_0282A4_PA_SC_VPORT_SCISSOR_10_BR 0x0282A4
+#define R_0282A8_PA_SC_VPORT_SCISSOR_11_TL 0x0282A8
+#define R_0282AC_PA_SC_VPORT_SCISSOR_11_BR 0x0282AC
+#define R_0282B0_PA_SC_VPORT_SCISSOR_12_TL 0x0282B0
+#define R_0282B4_PA_SC_VPORT_SCISSOR_12_BR 0x0282B4
+#define R_0282B8_PA_SC_VPORT_SCISSOR_13_TL 0x0282B8
+#define R_0282BC_PA_SC_VPORT_SCISSOR_13_BR 0x0282BC
+#define R_0282C0_PA_SC_VPORT_SCISSOR_14_TL 0x0282C0
+#define R_0282C4_PA_SC_VPORT_SCISSOR_14_BR 0x0282C4
+#define R_0282C8_PA_SC_VPORT_SCISSOR_15_TL 0x0282C8
+#define R_0282CC_PA_SC_VPORT_SCISSOR_15_BR 0x0282CC
#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0
#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4
+#define R_0282D8_PA_SC_VPORT_ZMIN_1 0x0282D8
+#define R_0282DC_PA_SC_VPORT_ZMAX_1 0x0282DC
+#define R_0282E0_PA_SC_VPORT_ZMIN_2 0x0282E0
+#define R_0282E4_PA_SC_VPORT_ZMAX_2 0x0282E4
+#define R_0282E8_PA_SC_VPORT_ZMIN_3 0x0282E8
+#define R_0282EC_PA_SC_VPORT_ZMAX_3 0x0282EC
+#define R_0282F0_PA_SC_VPORT_ZMIN_4 0x0282F0
+#define R_0282F4_PA_SC_VPORT_ZMAX_4 0x0282F4
+#define R_0282F8_PA_SC_VPORT_ZMIN_5 0x0282F8
+#define R_0282FC_PA_SC_VPORT_ZMAX_5 0x0282FC
+#define R_028300_PA_SC_VPORT_ZMIN_6 0x028300
+#define R_028304_PA_SC_VPORT_ZMAX_6 0x028304
+#define R_028308_PA_SC_VPORT_ZMIN_7 0x028308
+#define R_02830C_PA_SC_VPORT_ZMAX_7 0x02830C
+#define R_028310_PA_SC_VPORT_ZMIN_8 0x028310
+#define R_028314_PA_SC_VPORT_ZMAX_8 0x028314
+#define R_028318_PA_SC_VPORT_ZMIN_9 0x028318
+#define R_02831C_PA_SC_VPORT_ZMAX_9 0x02831C
+#define R_028320_PA_SC_VPORT_ZMIN_10 0x028320
+#define R_028324_PA_SC_VPORT_ZMAX_10 0x028324
+#define R_028328_PA_SC_VPORT_ZMIN_11 0x028328
+#define R_02832C_PA_SC_VPORT_ZMAX_11 0x02832C
+#define R_028330_PA_SC_VPORT_ZMIN_12 0x028330
+#define R_028334_PA_SC_VPORT_ZMAX_12 0x028334
+#define R_028338_PA_SC_VPORT_ZMIN_13 0x028338
+#define R_02833C_PA_SC_VPORT_ZMAX_13 0x02833C
+#define R_028340_PA_SC_VPORT_ZMIN_14 0x028340
+#define R_028344_PA_SC_VPORT_ZMAX_14 0x028344
+#define R_028348_PA_SC_VPORT_ZMIN_15 0x028348
+#define R_02834C_PA_SC_VPORT_ZMAX_15 0x02834C
#define R_028350_PA_SC_RASTER_CONFIG 0x028350
#define S_028350_RB_MAP_PKR0(x) (((x) & 0x03) << 0)
#define G_028350_RB_MAP_PKR0(x) (((x) >> 0) & 0x03)
@@ -5834,6 +7602,13 @@
#define V_028354_RASTER_CONFIG_SE_PAIR_YSEL_16_WIDE_TILE 0x01
#define V_028354_RASTER_CONFIG_SE_PAIR_YSEL_32_WIDE_TILE 0x02
#define V_028354_RASTER_CONFIG_SE_PAIR_YSEL_64_WIDE_TILE 0x03
+#define R_028358_PA_SC_SCREEN_EXTENT_CONTROL 0x028358
+#define S_028358_SLICE_EVEN_ENABLE(x) (((x) & 0x03) << 0)
+#define G_028358_SLICE_EVEN_ENABLE(x) (((x) >> 0) & 0x03)
+#define C_028358_SLICE_EVEN_ENABLE 0xFFFFFFFC
+#define S_028358_SLICE_ODD_ENABLE(x) (((x) & 0x03) << 2)
+#define G_028358_SLICE_ODD_ENABLE(x) (((x) >> 2) & 0x03)
+#define C_028358_SLICE_ODD_ENABLE 0xFFFFFFF3
/* */
#define R_028400_VGT_MAX_VTX_INDX 0x028400
#define R_028404_VGT_MIN_VTX_INDX 0x028404
@@ -5843,6 +7618,18 @@
#define R_028418_CB_BLEND_GREEN 0x028418
#define R_02841C_CB_BLEND_BLUE 0x02841C
#define R_028420_CB_BLEND_ALPHA 0x028420
+/* VI */
+#define R_028424_CB_DCC_CONTROL 0x028424
+#define S_028424_OVERWRITE_COMBINER_DISABLE(x) (((x) & 0x1) << 0)
+#define G_028424_OVERWRITE_COMBINER_DISABLE(x) (((x) >> 0) & 0x1)
+#define C_028424_OVERWRITE_COMBINER_DISABLE 0xFFFFFFFE
+#define S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x) (((x) & 0x1) << 1)
+#define G_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x) (((x) >> 1) & 0x1)
+#define C_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE 0xFFFFFFFD
+#define S_028424_OVERWRITE_COMBINER_WATERMARK(x) (((x) & 0x1F) << 2)
+#define G_028424_OVERWRITE_COMBINER_WATERMARK(x) (((x) >> 2) & 0x1F)
+#define C_028424_OVERWRITE_COMBINER_WATERMARK 0xFFFFFF83
+/* */
#define R_02842C_DB_STENCIL_CONTROL 0x02842C
#define S_02842C_STENCILFAIL(x) (((x) & 0x0F) << 0)
#define G_02842C_STENCILFAIL(x) (((x) >> 0) & 0x0F)
@@ -5984,12 +7771,102 @@
#define S_028434_STENCILOPVAL_BF(x) (((x) & 0xFF) << 24)
#define G_028434_STENCILOPVAL_BF(x) (((x) >> 24) & 0xFF)
#define C_028434_STENCILOPVAL_BF 0x00FFFFFF
-#define R_02843C_PA_CL_VPORT_XSCALE_0 0x02843C
-#define R_028440_PA_CL_VPORT_XOFFSET_0 0x028440
-#define R_028444_PA_CL_VPORT_YSCALE_0 0x028444
-#define R_028448_PA_CL_VPORT_YOFFSET_0 0x028448
-#define R_02844C_PA_CL_VPORT_ZSCALE_0 0x02844C
-#define R_028450_PA_CL_VPORT_ZOFFSET_0 0x028450
+#define R_02843C_PA_CL_VPORT_XSCALE 0x02843C
+#define R_028440_PA_CL_VPORT_XOFFSET 0x028440
+#define R_028444_PA_CL_VPORT_YSCALE 0x028444
+#define R_028448_PA_CL_VPORT_YOFFSET 0x028448
+#define R_02844C_PA_CL_VPORT_ZSCALE 0x02844C
+#define R_028450_PA_CL_VPORT_ZOFFSET 0x028450
+#define R_028454_PA_CL_VPORT_XSCALE_1 0x028454
+#define R_028458_PA_CL_VPORT_XOFFSET_1 0x028458
+#define R_02845C_PA_CL_VPORT_YSCALE_1 0x02845C
+#define R_028460_PA_CL_VPORT_YOFFSET_1 0x028460
+#define R_028464_PA_CL_VPORT_ZSCALE_1 0x028464
+#define R_028468_PA_CL_VPORT_ZOFFSET_1 0x028468
+#define R_02846C_PA_CL_VPORT_XSCALE_2 0x02846C
+#define R_028470_PA_CL_VPORT_XOFFSET_2 0x028470
+#define R_028474_PA_CL_VPORT_YSCALE_2 0x028474
+#define R_028478_PA_CL_VPORT_YOFFSET_2 0x028478
+#define R_02847C_PA_CL_VPORT_ZSCALE_2 0x02847C
+#define R_028480_PA_CL_VPORT_ZOFFSET_2 0x028480
+#define R_028484_PA_CL_VPORT_XSCALE_3 0x028484
+#define R_028488_PA_CL_VPORT_XOFFSET_3 0x028488
+#define R_02848C_PA_CL_VPORT_YSCALE_3 0x02848C
+#define R_028490_PA_CL_VPORT_YOFFSET_3 0x028490
+#define R_028494_PA_CL_VPORT_ZSCALE_3 0x028494
+#define R_028498_PA_CL_VPORT_ZOFFSET_3 0x028498
+#define R_02849C_PA_CL_VPORT_XSCALE_4 0x02849C
+#define R_0284A0_PA_CL_VPORT_XOFFSET_4 0x0284A0
+#define R_0284A4_PA_CL_VPORT_YSCALE_4 0x0284A4
+#define R_0284A8_PA_CL_VPORT_YOFFSET_4 0x0284A8
+#define R_0284AC_PA_CL_VPORT_ZSCALE_4 0x0284AC
+#define R_0284B0_PA_CL_VPORT_ZOFFSET_4 0x0284B0
+#define R_0284B4_PA_CL_VPORT_XSCALE_5 0x0284B4
+#define R_0284B8_PA_CL_VPORT_XOFFSET_5 0x0284B8
+#define R_0284BC_PA_CL_VPORT_YSCALE_5 0x0284BC
+#define R_0284C0_PA_CL_VPORT_YOFFSET_5 0x0284C0
+#define R_0284C4_PA_CL_VPORT_ZSCALE_5 0x0284C4
+#define R_0284C8_PA_CL_VPORT_ZOFFSET_5 0x0284C8
+#define R_0284CC_PA_CL_VPORT_XSCALE_6 0x0284CC
+#define R_0284D0_PA_CL_VPORT_XOFFSET_6 0x0284D0
+#define R_0284D4_PA_CL_VPORT_YSCALE_6 0x0284D4
+#define R_0284D8_PA_CL_VPORT_YOFFSET_6 0x0284D8
+#define R_0284DC_PA_CL_VPORT_ZSCALE_6 0x0284DC
+#define R_0284E0_PA_CL_VPORT_ZOFFSET_6 0x0284E0
+#define R_0284E4_PA_CL_VPORT_XSCALE_7 0x0284E4
+#define R_0284E8_PA_CL_VPORT_XOFFSET_7 0x0284E8
+#define R_0284EC_PA_CL_VPORT_YSCALE_7 0x0284EC
+#define R_0284F0_PA_CL_VPORT_YOFFSET_7 0x0284F0
+#define R_0284F4_PA_CL_VPORT_ZSCALE_7 0x0284F4
+#define R_0284F8_PA_CL_VPORT_ZOFFSET_7 0x0284F8
+#define R_0284FC_PA_CL_VPORT_XSCALE_8 0x0284FC
+#define R_028500_PA_CL_VPORT_XOFFSET_8 0x028500
+#define R_028504_PA_CL_VPORT_YSCALE_8 0x028504
+#define R_028508_PA_CL_VPORT_YOFFSET_8 0x028508
+#define R_02850C_PA_CL_VPORT_ZSCALE_8 0x02850C
+#define R_028510_PA_CL_VPORT_ZOFFSET_8 0x028510
+#define R_028514_PA_CL_VPORT_XSCALE_9 0x028514
+#define R_028518_PA_CL_VPORT_XOFFSET_9 0x028518
+#define R_02851C_PA_CL_VPORT_YSCALE_9 0x02851C
+#define R_028520_PA_CL_VPORT_YOFFSET_9 0x028520
+#define R_028524_PA_CL_VPORT_ZSCALE_9 0x028524
+#define R_028528_PA_CL_VPORT_ZOFFSET_9 0x028528
+#define R_02852C_PA_CL_VPORT_XSCALE_10 0x02852C
+#define R_028530_PA_CL_VPORT_XOFFSET_10 0x028530
+#define R_028534_PA_CL_VPORT_YSCALE_10 0x028534
+#define R_028538_PA_CL_VPORT_YOFFSET_10 0x028538
+#define R_02853C_PA_CL_VPORT_ZSCALE_10 0x02853C
+#define R_028540_PA_CL_VPORT_ZOFFSET_10 0x028540
+#define R_028544_PA_CL_VPORT_XSCALE_11 0x028544
+#define R_028548_PA_CL_VPORT_XOFFSET_11 0x028548
+#define R_02854C_PA_CL_VPORT_YSCALE_11 0x02854C
+#define R_028550_PA_CL_VPORT_YOFFSET_11 0x028550
+#define R_028554_PA_CL_VPORT_ZSCALE_11 0x028554
+#define R_028558_PA_CL_VPORT_ZOFFSET_11 0x028558
+#define R_02855C_PA_CL_VPORT_XSCALE_12 0x02855C
+#define R_028560_PA_CL_VPORT_XOFFSET_12 0x028560
+#define R_028564_PA_CL_VPORT_YSCALE_12 0x028564
+#define R_028568_PA_CL_VPORT_YOFFSET_12 0x028568
+#define R_02856C_PA_CL_VPORT_ZSCALE_12 0x02856C
+#define R_028570_PA_CL_VPORT_ZOFFSET_12 0x028570
+#define R_028574_PA_CL_VPORT_XSCALE_13 0x028574
+#define R_028578_PA_CL_VPORT_XOFFSET_13 0x028578
+#define R_02857C_PA_CL_VPORT_YSCALE_13 0x02857C
+#define R_028580_PA_CL_VPORT_YOFFSET_13 0x028580
+#define R_028584_PA_CL_VPORT_ZSCALE_13 0x028584
+#define R_028588_PA_CL_VPORT_ZOFFSET_13 0x028588
+#define R_02858C_PA_CL_VPORT_XSCALE_14 0x02858C
+#define R_028590_PA_CL_VPORT_XOFFSET_14 0x028590
+#define R_028594_PA_CL_VPORT_YSCALE_14 0x028594
+#define R_028598_PA_CL_VPORT_YOFFSET_14 0x028598
+#define R_02859C_PA_CL_VPORT_ZSCALE_14 0x02859C
+#define R_0285A0_PA_CL_VPORT_ZOFFSET_14 0x0285A0
+#define R_0285A4_PA_CL_VPORT_XSCALE_15 0x0285A4
+#define R_0285A8_PA_CL_VPORT_XOFFSET_15 0x0285A8
+#define R_0285AC_PA_CL_VPORT_YSCALE_15 0x0285AC
+#define R_0285B0_PA_CL_VPORT_YOFFSET_15 0x0285B0
+#define R_0285B4_PA_CL_VPORT_ZSCALE_15 0x0285B4
+#define R_0285B8_PA_CL_VPORT_ZOFFSET_15 0x0285B8
#define R_0285BC_PA_CL_UCP_0_X 0x0285BC
#define R_0285C0_PA_CL_UCP_0_Y 0x0285C0
#define R_0285C4_PA_CL_UCP_0_Z 0x0285C4
@@ -6036,6 +7913,26 @@
#define G_028644_DUP(x) (((x) >> 18) & 0x1)
#define C_028644_DUP 0xFFFBFFFF
/* */
+/* VI */
+#define S_028644_FP16_INTERP_MODE(x) (((x) & 0x1) << 19)
+#define G_028644_FP16_INTERP_MODE(x) (((x) >> 19) & 0x1)
+#define C_028644_FP16_INTERP_MODE 0xFFF7FFFF
+#define S_028644_USE_DEFAULT_ATTR1(x) (((x) & 0x1) << 20)
+#define G_028644_USE_DEFAULT_ATTR1(x) (((x) >> 20) & 0x1)
+#define C_028644_USE_DEFAULT_ATTR1 0xFFEFFFFF
+#define S_028644_DEFAULT_VAL_ATTR1(x) (((x) & 0x03) << 21)
+#define G_028644_DEFAULT_VAL_ATTR1(x) (((x) >> 21) & 0x03)
+#define C_028644_DEFAULT_VAL_ATTR1 0xFF9FFFFF
+#define S_028644_PT_SPRITE_TEX_ATTR1(x) (((x) & 0x1) << 23)
+#define G_028644_PT_SPRITE_TEX_ATTR1(x) (((x) >> 23) & 0x1)
+#define C_028644_PT_SPRITE_TEX_ATTR1 0xFF7FFFFF
+#define S_028644_ATTR0_VALID(x) (((x) & 0x1) << 24)
+#define G_028644_ATTR0_VALID(x) (((x) >> 24) & 0x1)
+#define C_028644_ATTR0_VALID 0xFEFFFFFF
+#define S_028644_ATTR1_VALID(x) (((x) & 0x1) << 25)
+#define G_028644_ATTR1_VALID(x) (((x) >> 25) & 0x1)
+#define C_028644_ATTR1_VALID 0xFDFFFFFF
+/* */
#define R_028648_SPI_PS_INPUT_CNTL_1 0x028648
#define R_02864C_SPI_PS_INPUT_CNTL_2 0x02864C
#define R_028650_SPI_PS_INPUT_CNTL_3 0x028650
@@ -6559,6 +8456,10 @@
#define R_028794_CB_BLEND5_CONTROL 0x028794
#define R_028798_CB_BLEND6_CONTROL 0x028798
#define R_02879C_CB_BLEND7_CONTROL 0x02879C
+#define R_0287CC_CS_COPY_STATE 0x0287CC
+#define S_0287CC_SRC_STATE_ID(x) (((x) & 0x07) << 0)
+#define G_0287CC_SRC_STATE_ID(x) (((x) >> 0) & 0x07)
+#define C_0287CC_SRC_STATE_ID 0xFFFFFFF8
#define R_0287D4_PA_CL_POINT_X_RAD 0x0287D4
#define R_0287D8_PA_CL_POINT_Y_RAD 0x0287D8
#define R_0287DC_PA_CL_POINT_SIZE 0x0287DC
@@ -6588,6 +8489,10 @@
#define G_0287F0_USE_OPAQUE(x) (((x) >> 6) & 0x1)
#define C_0287F0_USE_OPAQUE 0xFFFFFFBF
#define R_0287F4_VGT_IMMED_DATA 0x0287F4 /* not on CIK */
+#define R_0287F8_VGT_EVENT_ADDRESS_REG 0x0287F8
+#define S_0287F8_ADDRESS_LOW(x) (((x) & 0xFFFFFFF) << 0)
+#define G_0287F8_ADDRESS_LOW(x) (((x) >> 0) & 0xFFFFFFF)
+#define C_0287F8_ADDRESS_LOW 0xF0000000
#define R_028800_DB_DEPTH_CONTROL 0x028800
#define S_028800_STENCIL_ENABLE(x) (((x) & 0x1) << 0)
#define G_028800_STENCIL_ENABLE(x) (((x) >> 0) & 0x1)
@@ -6644,36 +8549,42 @@
#define G_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS(x) (((x) >> 31) & 0x1)
#define C_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS 0x7FFFFFFF
#define R_028804_DB_EQAA 0x028804
-#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0)
-#define G_028804_MAX_ANCHOR_SAMPLES(x) (((x) >> 0) & 0x7)
-#define C_028804_MAX_ANCHOR_SAMPLES (~(((~0) & 0x7) << 0))
-#define S_028804_PS_ITER_SAMPLES(x) (((x) & 0x7) << 4)
-#define G_028804_PS_ITER_SAMPLES(x) (((x) >> 4) & 0x7)
-#define C_028804_PS_ITER_SAMPLES (~(((~0) & 0x7) << 4))
-#define S_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) & 0x7) << 8)
-#define G_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) >> 8) & 0x7)
-#define C_028804_MASK_EXPORT_NUM_SAMPLES (~(((~0) & 0x7) << 8))
-#define S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) & 0x7) << 12)
-#define G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) >> 12) & 0x7)
-#define C_028804_ALPHA_TO_MASK_NUM_SAMPLES (~(((~0) & 0x7) << 12))
-#define S_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) & 0x1) << 16)
-#define G_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) >> 16) & 0x1)
-#define C_028804_HIGH_QUALITY_INTERSECTIONS (~(((~0) & 0x1) << 16))
-#define S_028804_INCOHERENT_EQAA_READS(x) (((x) & 0x1) << 17)
-#define G_028804_INCOHERENT_EQAA_READS(x) (((x) >> 17) & 0x1)
-#define C_028804_INCOHERENT_EQAA_READS (~(((~0) & 0x1) << 17))
-#define S_028804_INTERPOLATE_COMP_Z(x) (((x) & 0x1) << 18)
-#define G_028804_INTERPOLATE_COMP_Z(x) (((x) >> 18) & 0x1)
-#define C_028804_INTERPOLATE_COMP_Z (~(((~0) >> 18) & 0x1))
-#define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19)
-#define G_028804_INTERPOLATE_SRC_Z(x) (((x) >> 19) & 0x1)
-#define C_028804_INTERPOLATE_SRC_Z (~(((~0) & 0x1) << 19))
-#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20)
-#define G_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) >> 20) & 0x1)
-#define C_028804_STATIC_ANCHOR_ASSOCIATIONS (~(((~0) & 0x1) << 20))
-#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21)
-#define G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) >> 21) & 0x1)
-#define C_028804_ALPHA_TO_MASK_EQAA_DISABLE (~(((~0) & 0x1) << 21))
+#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0)
+#define G_028804_MAX_ANCHOR_SAMPLES(x) (((x) >> 0) & 0x07)
+#define C_028804_MAX_ANCHOR_SAMPLES 0xFFFFFFF8
+#define S_028804_PS_ITER_SAMPLES(x) (((x) & 0x7) << 4)
+#define G_028804_PS_ITER_SAMPLES(x) (((x) >> 4) & 0x07)
+#define C_028804_PS_ITER_SAMPLES 0xFFFFFF8F
+#define S_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) & 0x7) << 8)
+#define G_028804_MASK_EXPORT_NUM_SAMPLES(x) (((x) >> 8) & 0x07)
+#define C_028804_MASK_EXPORT_NUM_SAMPLES 0xFFFFF8FF
+#define S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) & 0x7) << 12)
+#define G_028804_ALPHA_TO_MASK_NUM_SAMPLES(x) (((x) >> 12) & 0x07)
+#define C_028804_ALPHA_TO_MASK_NUM_SAMPLES 0xFFFF8FFF
+#define S_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) & 0x1) << 16)
+#define G_028804_HIGH_QUALITY_INTERSECTIONS(x) (((x) >> 16) & 0x1)
+#define C_028804_HIGH_QUALITY_INTERSECTIONS 0xFFFEFFFF
+#define S_028804_INCOHERENT_EQAA_READS(x) (((x) & 0x1) << 17)
+#define G_028804_INCOHERENT_EQAA_READS(x) (((x) >> 17) & 0x1)
+#define C_028804_INCOHERENT_EQAA_READS 0xFFFDFFFF
+#define S_028804_INTERPOLATE_COMP_Z(x) (((x) & 0x1) << 18)
+#define G_028804_INTERPOLATE_COMP_Z(x) (((x) >> 18) & 0x1)
+#define C_028804_INTERPOLATE_COMP_Z 0xFFFBFFFF
+#define S_028804_INTERPOLATE_SRC_Z(x) (((x) & 0x1) << 19)
+#define G_028804_INTERPOLATE_SRC_Z(x) (((x) >> 19) & 0x1)
+#define C_028804_INTERPOLATE_SRC_Z 0xFFF7FFFF
+#define S_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) & 0x1) << 20)
+#define G_028804_STATIC_ANCHOR_ASSOCIATIONS(x) (((x) >> 20) & 0x1)
+#define C_028804_STATIC_ANCHOR_ASSOCIATIONS 0xFFEFFFFF
+#define S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) & 0x1) << 21)
+#define G_028804_ALPHA_TO_MASK_EQAA_DISABLE(x) (((x) >> 21) & 0x1)
+#define C_028804_ALPHA_TO_MASK_EQAA_DISABLE 0xFFDFFFFF
+#define S_028804_OVERRASTERIZATION_AMOUNT(x) (((x) & 0x07) << 24)
+#define G_028804_OVERRASTERIZATION_AMOUNT(x) (((x) >> 24) & 0x07)
+#define C_028804_OVERRASTERIZATION_AMOUNT 0xF8FFFFFF
+#define S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) & 0x1) << 27)
+#define G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x) (((x) >> 27) & 0x1)
+#define C_028804_ENABLE_POSTZ_OVERRASTERIZATION 0xF7FFFFFF
#define R_028808_CB_COLOR_CONTROL 0x028808
#define S_028808_DEGAMMA_ENABLE(x) (((x) & 0x1) << 3)
#define G_028808_DEGAMMA_ENABLE(x) (((x) >> 3) & 0x1)
@@ -6977,6 +8888,11 @@
#define S_02881C_USE_VTX_GS_CUT_FLAG(x) (((x) & 0x1) << 25)
#define G_02881C_USE_VTX_GS_CUT_FLAG(x) (((x) >> 25) & 0x1)
#define C_02881C_USE_VTX_GS_CUT_FLAG 0xFDFFFFFF
+/* VI */
+#define S_02881C_USE_VTX_LINE_WIDTH(x) (((x) & 0x1) << 26)
+#define G_02881C_USE_VTX_LINE_WIDTH(x) (((x) >> 26) & 0x1)
+#define C_02881C_USE_VTX_LINE_WIDTH 0xFBFFFFFF
+/* */
#define R_028820_PA_CL_NANINF_CNTL 0x028820
#define S_028820_VTE_XY_INF_DISCARD(x) (((x) & 0x1) << 0)
#define G_028820_VTE_XY_INF_DISCARD(x) (((x) >> 0) & 0x1)
@@ -7447,9 +9363,21 @@
#define S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 16)
#define G_028A4C_PS_ITER_SAMPLE(x) (((x) >> 16) & 0x1)
#define C_028A4C_PS_ITER_SAMPLE 0xFFFEFFFF
-#define S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC(x) (((x) & 0x1) << 17)
-#define G_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC(x) (((x) >> 17) & 0x1)
-#define C_028A4C_MULTI_SHADER_ENGINE_PRIM_DISC 0xFFFDFFFF
+#define S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(x) (((x) & 0x1) << 17)
+#define G_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(x) (((x) >> 17) & 0x1)
+#define C_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE 0xFFFDFFFF
+#define S_028A4C_MULTI_GPU_SUPERTILE_ENABLE(x) (((x) & 0x1) << 18)
+#define G_028A4C_MULTI_GPU_SUPERTILE_ENABLE(x) (((x) >> 18) & 0x1)
+#define C_028A4C_MULTI_GPU_SUPERTILE_ENABLE 0xFFFBFFFF
+#define S_028A4C_GPU_ID_OVERRIDE_ENABLE(x) (((x) & 0x1) << 19)
+#define G_028A4C_GPU_ID_OVERRIDE_ENABLE(x) (((x) >> 19) & 0x1)
+#define C_028A4C_GPU_ID_OVERRIDE_ENABLE 0xFFF7FFFF
+#define S_028A4C_GPU_ID_OVERRIDE(x) (((x) & 0x0F) << 20)
+#define G_028A4C_GPU_ID_OVERRIDE(x) (((x) >> 20) & 0x0F)
+#define C_028A4C_GPU_ID_OVERRIDE 0xFF0FFFFF
+#define S_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(x) (((x) & 0x1) << 24)
+#define G_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE(x) (((x) >> 24) & 0x1)
+#define C_028A4C_MULTI_GPU_PRIM_DISCARD_ENABLE 0xFEFFFFFF
#define S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) & 0x1) << 25)
#define G_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) >> 25) & 0x1)
#define C_028A4C_FORCE_EOV_CNTDWN_ENABLE 0xFDFFFFFF
@@ -7515,6 +9443,7 @@
#define C_028A7C_INDEX_TYPE 0xFFFFFFFC
#define V_028A7C_VGT_INDEX_16 0x00
#define V_028A7C_VGT_INDEX_32 0x01
+#define V_028A7C_VGT_INDEX_8 0x02 /* VI */
#define S_028A7C_SWAP_MODE(x) (((x) & 0x03) << 2)
#define G_028A7C_SWAP_MODE(x) (((x) >> 2) & 0x03)
#define C_028A7C_SWAP_MODE 0xFFFFFFF3
@@ -7544,6 +9473,12 @@
#define G_028A7C_REQ_PATH(x) (((x) >> 10) & 0x1)
#define C_028A7C_REQ_PATH 0xFFFFFBFF
/* */
+/* VI */
+#define S_028A7C_MTYPE(x) (((x) & 0x03) << 11)
+#define G_028A7C_MTYPE(x) (((x) >> 11) & 0x03)
+#define C_028A7C_MTYPE 0xFFFFE7FF
+/* */
+#define R_028A80_WD_ENHANCE 0x028A80
#define R_028A84_VGT_PRIMITIVEID_EN 0x028A84
#define S_028A84_PRIMITIVEID_EN(x) (((x) & 0x1) << 0)
#define G_028A84_PRIMITIVEID_EN(x) (((x) >> 0) & 0x1)
@@ -7642,6 +9577,10 @@
#define S_028AA8_WD_SWITCH_ON_EOP(x) (((x) & 0x1) << 20)
#define G_028AA8_WD_SWITCH_ON_EOP(x) (((x) >> 20) & 0x1)
#define C_028AA8_WD_SWITCH_ON_EOP 0xFFEFFFFF
+/* VI */
+#define S_028AA8_MAX_PRIMGRP_IN_WAVE(x) (((x) & 0x0F) << 28)
+#define G_028AA8_MAX_PRIMGRP_IN_WAVE(x) (((x) >> 28) & 0x0F)
+#define C_028AA8_MAX_PRIMGRP_IN_WAVE 0x0FFFFFFF
/* */
#define R_028AAC_VGT_ESGS_RING_ITEMSIZE 0x028AAC
#define S_028AAC_ITEMSIZE(x) (((x) & 0x7FFF) << 0)
@@ -7681,6 +9620,11 @@
#define S_028ABC_DST_OUTSIDE_ZERO_TO_ONE(x) (((x) & 0x1) << 16)
#define G_028ABC_DST_OUTSIDE_ZERO_TO_ONE(x) (((x) >> 16) & 0x1)
#define C_028ABC_DST_OUTSIDE_ZERO_TO_ONE 0xFFFEFFFF
+/* VI */
+#define S_028ABC_TC_COMPATIBLE(x) (((x) & 0x1) << 17)
+#define G_028ABC_TC_COMPATIBLE(x) (((x) >> 17) & 0x1)
+#define C_028ABC_TC_COMPATIBLE 0xFFFDFFFF
+/* */
#define R_028AC0_DB_SRESULTS_COMPARE_STATE0 0x028AC0
#define S_028AC0_COMPAREFUNC0(x) (((x) & 0x07) << 0)
#define G_028AC0_COMPAREFUNC0(x) (((x) >> 0) & 0x07)
@@ -7770,6 +9714,21 @@
#define S_028B38_MAX_VERT_OUT(x) (((x) & 0x7FF) << 0)
#define G_028B38_MAX_VERT_OUT(x) (((x) >> 0) & 0x7FF)
#define C_028B38_MAX_VERT_OUT 0xFFFFF800
+/* VI */
+#define R_028B50_VGT_TESS_DISTRIBUTION 0x028B50
+#define S_028B50_ACCUM_ISOLINE(x) (((x) & 0xFF) << 0)
+#define G_028B50_ACCUM_ISOLINE(x) (((x) >> 0) & 0xFF)
+#define C_028B50_ACCUM_ISOLINE 0xFFFFFF00
+#define S_028B50_ACCUM_TRI(x) (((x) & 0xFF) << 8)
+#define G_028B50_ACCUM_TRI(x) (((x) >> 8) & 0xFF)
+#define C_028B50_ACCUM_TRI 0xFFFF00FF
+#define S_028B50_ACCUM_QUAD(x) (((x) & 0xFF) << 16)
+#define G_028B50_ACCUM_QUAD(x) (((x) >> 16) & 0xFF)
+#define C_028B50_ACCUM_QUAD 0xFF00FFFF
+#define S_028B50_DONUT_SPLIT(x) (((x) & 0xFF) << 24)
+#define G_028B50_DONUT_SPLIT(x) (((x) >> 24) & 0xFF)
+#define C_028B50_DONUT_SPLIT 0x00FFFFFF
+/* */
#define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
#define S_028B54_LS_EN(x) (((x) & 0x03) << 0)
#define G_028B54_LS_EN(x) (((x) >> 0) & 0x03)
@@ -7798,6 +9757,20 @@
#define S_028B54_DYNAMIC_HS(x) (((x) & 0x1) << 8)
#define G_028B54_DYNAMIC_HS(x) (((x) >> 8) & 0x1)
#define C_028B54_DYNAMIC_HS 0xFFFFFEFF
+/* VI */
+#define S_028B54_DISPATCH_DRAW_EN(x) (((x) & 0x1) << 9)
+#define G_028B54_DISPATCH_DRAW_EN(x) (((x) >> 9) & 0x1)
+#define C_028B54_DISPATCH_DRAW_EN 0xFFFFFDFF
+#define S_028B54_DIS_DEALLOC_ACCUM_0(x) (((x) & 0x1) << 10)
+#define G_028B54_DIS_DEALLOC_ACCUM_0(x) (((x) >> 10) & 0x1)
+#define C_028B54_DIS_DEALLOC_ACCUM_0 0xFFFFFBFF
+#define S_028B54_DIS_DEALLOC_ACCUM_1(x) (((x) & 0x1) << 11)
+#define G_028B54_DIS_DEALLOC_ACCUM_1(x) (((x) >> 11) & 0x1)
+#define C_028B54_DIS_DEALLOC_ACCUM_1 0xFFFFF7FF
+#define S_028B54_VS_WAVE_ID_EN(x) (((x) & 0x1) << 12)
+#define G_028B54_VS_WAVE_ID_EN(x) (((x) >> 12) & 0x1)
+#define C_028B54_VS_WAVE_ID_EN 0xFFFFEFFF
+/* */
#define R_028B58_VGT_LS_HS_CONFIG 0x028B58
#define S_028B58_NUM_PATCHES(x) (((x) & 0xFF) << 0)
#define G_028B58_NUM_PATCHES(x) (((x) >> 0) & 0xFF)
@@ -7848,6 +9821,9 @@
#define S_028B6C_RESERVED_REDUC_AXIS(x) (((x) & 0x1) << 8) /* not on CIK */
#define G_028B6C_RESERVED_REDUC_AXIS(x) (((x) >> 8) & 0x1) /* not on CIK */
#define C_028B6C_RESERVED_REDUC_AXIS 0xFFFFFEFF /* not on CIK */
+#define S_028B6C_DEPRECATED(x) (((x) & 0x1) << 9)
+#define G_028B6C_DEPRECATED(x) (((x) >> 9) & 0x1)
+#define C_028B6C_DEPRECATED 0xFFFFFDFF
#define S_028B6C_NUM_DS_WAVES_PER_SIMD(x) (((x) & 0x0F) << 10)
#define G_028B6C_NUM_DS_WAVES_PER_SIMD(x) (((x) >> 10) & 0x0F)
#define C_028B6C_NUM_DS_WAVES_PER_SIMD 0xFFFFC3FF
@@ -7862,6 +9838,14 @@
#define V_028B6C_VGT_POLICY_STREAM 0x01
#define V_028B6C_VGT_POLICY_BYPASS 0x02
/* */
+/* VI */
+#define S_028B6C_DISTRIBUTION_MODE(x) (((x) & 0x03) << 17)
+#define G_028B6C_DISTRIBUTION_MODE(x) (((x) >> 17) & 0x03)
+#define C_028B6C_DISTRIBUTION_MODE 0xFFF9FFFF
+#define S_028B6C_MTYPE(x) (((x) & 0x03) << 19)
+#define G_028B6C_MTYPE(x) (((x) >> 19) & 0x03)
+#define C_028B6C_MTYPE 0xFFE7FFFF
+/* */
#define R_028B70_DB_ALPHA_TO_MASK 0x028B70
#define S_028B70_ALPHA_TO_MASK_ENABLE(x) (((x) & 0x1) << 0)
#define G_028B70_ALPHA_TO_MASK_ENABLE(x) (((x) >> 0) & 0x1)
@@ -8001,6 +9985,22 @@
#define S_028BDC_DX10_DIAMOND_TEST_ENA(x) (((x) & 0x1) << 12)
#define G_028BDC_DX10_DIAMOND_TEST_ENA(x) (((x) >> 12) & 0x1)
#define C_028BDC_DX10_DIAMOND_TEST_ENA 0xFFFFEFFF
+#define R_028BE0_PA_SC_AA_CONFIG 0x028BE0
+#define S_028BE0_MSAA_NUM_SAMPLES(x) (((x) & 0x7) << 0)
+#define G_028BE0_MSAA_NUM_SAMPLES(x) (((x) >> 0) & 0x07)
+#define C_028BE0_MSAA_NUM_SAMPLES 0xFFFFFFF8
+#define S_028BE0_AA_MASK_CENTROID_DTMN(x) (((x) & 0x1) << 4)
+#define G_028BE0_AA_MASK_CENTROID_DTMN(x) (((x) >> 4) & 0x1)
+#define C_028BE0_AA_MASK_CENTROID_DTMN 0xFFFFFFEF
+#define S_028BE0_MAX_SAMPLE_DIST(x) (((x) & 0xf) << 13)
+#define G_028BE0_MAX_SAMPLE_DIST(x) (((x) >> 13) & 0x0F)
+#define C_028BE0_MAX_SAMPLE_DIST 0xFFFE1FFF
+#define S_028BE0_MSAA_EXPOSED_SAMPLES(x) (((x) & 0x7) << 20)
+#define G_028BE0_MSAA_EXPOSED_SAMPLES(x) (((x) >> 20) & 0x07)
+#define C_028BE0_MSAA_EXPOSED_SAMPLES 0xFF8FFFFF
+#define S_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((x) & 0x3) << 24)
+#define G_028BE0_DETAIL_TO_EXPOSED_MODE(x) (((x) >> 24) & 0x03)
+#define C_028BE0_DETAIL_TO_EXPOSED_MODE 0xFCFFFFFF
#define R_028BE4_PA_SU_VTX_CNTL 0x028BE4
#define S_028BE4_PIX_CENTER(x) (((x) & 0x1) << 0)
#define G_028BE4_PIX_CENTER(x) (((x) >> 0) & 0x1)
@@ -8569,6 +10569,17 @@
#define G_028C70_FMASK_COMPRESSION_DISABLE(x) (((x) >> 26) & 0x1)
#define C_028C70_FMASK_COMPRESSION_DISABLE 0xFBFFFFFF
/* */
+/* VI */
+#define S_028C70_FMASK_COMPRESS_1FRAG_ONLY(x) (((x) & 0x1) << 27)
+#define G_028C70_FMASK_COMPRESS_1FRAG_ONLY(x) (((x) >> 27) & 0x1)
+#define C_028C70_FMASK_COMPRESS_1FRAG_ONLY 0xF7FFFFFF
+#define S_028C70_DCC_ENABLE(x) (((x) & 0x1) << 28)
+#define G_028C70_DCC_ENABLE(x) (((x) >> 28) & 0x1)
+#define C_028C70_DCC_ENABLE 0xEFFFFFFF
+#define S_028C70_CMASK_ADDR_TYPE(x) (((x) & 0x03) << 29)
+#define G_028C70_CMASK_ADDR_TYPE(x) (((x) >> 29) & 0x03)
+#define C_028C70_CMASK_ADDR_TYPE 0x9FFFFFFF
+/* */
#define R_028C74_CB_COLOR0_ATTRIB 0x028C74
#define S_028C74_TILE_MODE_INDEX(x) (((x) & 0x1F) << 0)
#define G_028C74_TILE_MODE_INDEX(x) (((x) >> 0) & 0x1F)
@@ -8576,7 +10587,9 @@
#define S_028C74_FMASK_TILE_MODE_INDEX(x) (((x) & 0x1F) << 5)
#define G_028C74_FMASK_TILE_MODE_INDEX(x) (((x) >> 5) & 0x1F)
#define C_028C74_FMASK_TILE_MODE_INDEX 0xFFFFFC1F
-#define S_028C74_FMASK_BANK_HEIGHT(x) (((x) & 0x3) << 10) /* SI errata */
+#define S_028C74_FMASK_BANK_HEIGHT(x) (((x) & 0x03) << 10)
+#define G_028C74_FMASK_BANK_HEIGHT(x) (((x) >> 10) & 0x03)
+#define C_028C74_FMASK_BANK_HEIGHT 0xFFFFF3FF
#define S_028C74_NUM_SAMPLES(x) (((x) & 0x07) << 12)
#define G_028C74_NUM_SAMPLES(x) (((x) >> 12) & 0x07)
#define C_028C74_NUM_SAMPLES 0xFFFF8FFF
@@ -8586,6 +10599,36 @@
#define S_028C74_FORCE_DST_ALPHA_1(x) (((x) & 0x1) << 17)
#define G_028C74_FORCE_DST_ALPHA_1(x) (((x) >> 17) & 0x1)
#define C_028C74_FORCE_DST_ALPHA_1 0xFFFDFFFF
+/* VI */
+#define R_028C78_CB_COLOR0_DCC_CONTROL 0x028C78
+#define S_028C78_OVERWRITE_COMBINER_DISABLE(x) (((x) & 0x1) << 0)
+#define G_028C78_OVERWRITE_COMBINER_DISABLE(x) (((x) >> 0) & 0x1)
+#define C_028C78_OVERWRITE_COMBINER_DISABLE 0xFFFFFFFE
+#define S_028C78_KEY_CLEAR_ENABLE(x) (((x) & 0x1) << 1)
+#define G_028C78_KEY_CLEAR_ENABLE(x) (((x) >> 1) & 0x1)
+#define C_028C78_KEY_CLEAR_ENABLE 0xFFFFFFFD
+#define S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(x) (((x) & 0x03) << 2)
+#define G_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(x) (((x) >> 2) & 0x03)
+#define C_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE 0xFFFFFFF3
+#define S_028C78_MIN_COMPRESSED_BLOCK_SIZE(x) (((x) & 0x1) << 4)
+#define G_028C78_MIN_COMPRESSED_BLOCK_SIZE(x) (((x) >> 4) & 0x1)
+#define C_028C78_MIN_COMPRESSED_BLOCK_SIZE 0xFFFFFFEF
+#define S_028C78_MAX_COMPRESSED_BLOCK_SIZE(x) (((x) & 0x03) << 5)
+#define G_028C78_MAX_COMPRESSED_BLOCK_SIZE(x) (((x) >> 5) & 0x03)
+#define C_028C78_MAX_COMPRESSED_BLOCK_SIZE 0xFFFFFF9F
+#define S_028C78_COLOR_TRANSFORM(x) (((x) & 0x03) << 7)
+#define G_028C78_COLOR_TRANSFORM(x) (((x) >> 7) & 0x03)
+#define C_028C78_COLOR_TRANSFORM 0xFFFFFE7F
+#define S_028C78_INDEPENDENT_64B_BLOCKS(x) (((x) & 0x1) << 9)
+#define G_028C78_INDEPENDENT_64B_BLOCKS(x) (((x) >> 9) & 0x1)
+#define C_028C78_INDEPENDENT_64B_BLOCKS 0xFFFFFDFF
+#define S_028C78_LOSSY_RGB_PRECISION(x) (((x) & 0x0F) << 10)
+#define G_028C78_LOSSY_RGB_PRECISION(x) (((x) >> 10) & 0x0F)
+#define C_028C78_LOSSY_RGB_PRECISION 0xFFFFC3FF
+#define S_028C78_LOSSY_ALPHA_PRECISION(x) (((x) & 0x0F) << 14)
+#define G_028C78_LOSSY_ALPHA_PRECISION(x) (((x) >> 14) & 0x0F)
+#define C_028C78_LOSSY_ALPHA_PRECISION 0xFFFC3FFF
+/* */
#define R_028C7C_CB_COLOR0_CMASK 0x028C7C
#define R_028C80_CB_COLOR0_CMASK_SLICE 0x028C80
#define S_028C80_TILE_MAX(x) (((x) & 0x3FFF) << 0)
@@ -8598,90 +10641,105 @@
#define C_028C88_TILE_MAX 0xFFC00000
#define R_028C8C_CB_COLOR0_CLEAR_WORD0 0x028C8C
#define R_028C90_CB_COLOR0_CLEAR_WORD1 0x028C90
+#define R_028C94_CB_COLOR0_DCC_BASE 0x028C94 /* VI */
#define R_028C9C_CB_COLOR1_BASE 0x028C9C
#define R_028CA0_CB_COLOR1_PITCH 0x028CA0
#define R_028CA4_CB_COLOR1_SLICE 0x028CA4
#define R_028CA8_CB_COLOR1_VIEW 0x028CA8
#define R_028CAC_CB_COLOR1_INFO 0x028CAC
#define R_028CB0_CB_COLOR1_ATTRIB 0x028CB0
-#define R_028CD4_CB_COLOR1_CMASK 0x028CB8
+#define R_028CB4_CB_COLOR1_DCC_CONTROL 0x028CB4 /* VI */
+#define R_028CB8_CB_COLOR1_CMASK 0x028CB8
#define R_028CBC_CB_COLOR1_CMASK_SLICE 0x028CBC
#define R_028CC0_CB_COLOR1_FMASK 0x028CC0
#define R_028CC4_CB_COLOR1_FMASK_SLICE 0x028CC4
#define R_028CC8_CB_COLOR1_CLEAR_WORD0 0x028CC8
#define R_028CCC_CB_COLOR1_CLEAR_WORD1 0x028CCC
+#define R_028CD0_CB_COLOR1_DCC_BASE 0x028CD0 /* VI */
#define R_028CD8_CB_COLOR2_BASE 0x028CD8
#define R_028CDC_CB_COLOR2_PITCH 0x028CDC
#define R_028CE0_CB_COLOR2_SLICE 0x028CE0
#define R_028CE4_CB_COLOR2_VIEW 0x028CE4
#define R_028CE8_CB_COLOR2_INFO 0x028CE8
#define R_028CEC_CB_COLOR2_ATTRIB 0x028CEC
+#define R_028CF0_CB_COLOR2_DCC_CONTROL 0x028CF0 /* VI */
#define R_028CF4_CB_COLOR2_CMASK 0x028CF4
#define R_028CF8_CB_COLOR2_CMASK_SLICE 0x028CF8
#define R_028CFC_CB_COLOR2_FMASK 0x028CFC
#define R_028D00_CB_COLOR2_FMASK_SLICE 0x028D00
#define R_028D04_CB_COLOR2_CLEAR_WORD0 0x028D04
#define R_028D08_CB_COLOR2_CLEAR_WORD1 0x028D08
+#define R_028D0C_CB_COLOR2_DCC_BASE 0x028D0C /* VI */
#define R_028D14_CB_COLOR3_BASE 0x028D14
#define R_028D18_CB_COLOR3_PITCH 0x028D18
#define R_028D1C_CB_COLOR3_SLICE 0x028D1C
#define R_028D20_CB_COLOR3_VIEW 0x028D20
#define R_028D24_CB_COLOR3_INFO 0x028D24
#define R_028D28_CB_COLOR3_ATTRIB 0x028D28
+#define R_028D2C_CB_COLOR3_DCC_CONTROL 0x028D2C /* VI */
#define R_028D30_CB_COLOR3_CMASK 0x028D30
#define R_028D34_CB_COLOR3_CMASK_SLICE 0x028D34
#define R_028D38_CB_COLOR3_FMASK 0x028D38
#define R_028D3C_CB_COLOR3_FMASK_SLICE 0x028D3C
#define R_028D40_CB_COLOR3_CLEAR_WORD0 0x028D40
#define R_028D44_CB_COLOR3_CLEAR_WORD1 0x028D44
+#define R_028D48_CB_COLOR3_DCC_BASE 0x028D48 /* VI */
#define R_028D50_CB_COLOR4_BASE 0x028D50
#define R_028D54_CB_COLOR4_PITCH 0x028D54
#define R_028D58_CB_COLOR4_SLICE 0x028D58
#define R_028D5C_CB_COLOR4_VIEW 0x028D5C
#define R_028D60_CB_COLOR4_INFO 0x028D60
#define R_028D64_CB_COLOR4_ATTRIB 0x028D64
+#define R_028D68_CB_COLOR4_DCC_CONTROL 0x028D68 /* VI */
#define R_028D6C_CB_COLOR4_CMASK 0x028D6C
#define R_028D70_CB_COLOR4_CMASK_SLICE 0x028D70
#define R_028D74_CB_COLOR4_FMASK 0x028D74
#define R_028D78_CB_COLOR4_FMASK_SLICE 0x028D78
#define R_028D7C_CB_COLOR4_CLEAR_WORD0 0x028D7C
#define R_028D80_CB_COLOR4_CLEAR_WORD1 0x028D80
+#define R_028D84_CB_COLOR4_DCC_BASE 0x028D84 /* VI */
#define R_028D8C_CB_COLOR5_BASE 0x028D8C
#define R_028D90_CB_COLOR5_PITCH 0x028D90
#define R_028D94_CB_COLOR5_SLICE 0x028D94
#define R_028D98_CB_COLOR5_VIEW 0x028D98
#define R_028D9C_CB_COLOR5_INFO 0x028D9C
#define R_028DA0_CB_COLOR5_ATTRIB 0x028DA0
+#define R_028DA4_CB_COLOR5_DCC_CONTROL 0x028DA4 /* VI */
#define R_028DA8_CB_COLOR5_CMASK 0x028DA8
#define R_028DAC_CB_COLOR5_CMASK_SLICE 0x028DAC
#define R_028DB0_CB_COLOR5_FMASK 0x028DB0
#define R_028DB4_CB_COLOR5_FMASK_SLICE 0x028DB4
#define R_028DB8_CB_COLOR5_CLEAR_WORD0 0x028DB8
#define R_028DBC_CB_COLOR5_CLEAR_WORD1 0x028DBC
+#define R_028DC0_CB_COLOR5_DCC_BASE 0x028DC0 /* VI */
#define R_028DC8_CB_COLOR6_BASE 0x028DC8
#define R_028DCC_CB_COLOR6_PITCH 0x028DCC
#define R_028DD0_CB_COLOR6_SLICE 0x028DD0
#define R_028DD4_CB_COLOR6_VIEW 0x028DD4
#define R_028DD8_CB_COLOR6_INFO 0x028DD8
#define R_028DDC_CB_COLOR6_ATTRIB 0x028DDC
+#define R_028DE0_CB_COLOR6_DCC_CONTROL 0x028DE0 /* VI */
#define R_028DE4_CB_COLOR6_CMASK 0x028DE4
#define R_028DE8_CB_COLOR6_CMASK_SLICE 0x028DE8
#define R_028DEC_CB_COLOR6_FMASK 0x028DEC
#define R_028DF0_CB_COLOR6_FMASK_SLICE 0x028DF0
#define R_028DF4_CB_COLOR6_CLEAR_WORD0 0x028DF4
#define R_028DF8_CB_COLOR6_CLEAR_WORD1 0x028DF8
+#define R_028DFC_CB_COLOR6_DCC_BASE 0x028DFC /* VI */
#define R_028E04_CB_COLOR7_BASE 0x028E04
#define R_028E08_CB_COLOR7_PITCH 0x028E08
#define R_028E0C_CB_COLOR7_SLICE 0x028E0C
#define R_028E10_CB_COLOR7_VIEW 0x028E10
#define R_028E14_CB_COLOR7_INFO 0x028E14
#define R_028E18_CB_COLOR7_ATTRIB 0x028E18
+#define R_028E1C_CB_COLOR7_DCC_CONTROL 0x028E1C /* VI */
#define R_028E20_CB_COLOR7_CMASK 0x028E20
#define R_028E24_CB_COLOR7_CMASK_SLICE 0x028E24
#define R_028E28_CB_COLOR7_FMASK 0x028E28
#define R_028E2C_CB_COLOR7_FMASK_SLICE 0x028E2C
#define R_028E30_CB_COLOR7_CLEAR_WORD0 0x028E30
#define R_028E34_CB_COLOR7_CLEAR_WORD1 0x028E34
+#define R_028E38_CB_COLOR7_DCC_BASE 0x028E38 /* VI */
/* SI async DMA packets */
#define SI_DMA_PACKET(cmd, sub_cmd, n) ((((cmd) & 0xF) << 28) | \
diff --git a/src/gallium/drivers/rbug/rbug_context.h b/src/gallium/drivers/rbug/rbug_context.h
index 5e7b9d4dee4..e99f6edc523 100644
--- a/src/gallium/drivers/rbug/rbug_context.h
+++ b/src/gallium/drivers/rbug/rbug_context.h
@@ -79,7 +79,7 @@ struct rbug_context {
struct rbug_list shaders;
};
-static INLINE struct rbug_context *
+static inline struct rbug_context *
rbug_context(struct pipe_context *pipe)
{
return (struct rbug_context *)pipe;
diff --git a/src/gallium/drivers/rbug/rbug_objects.h b/src/gallium/drivers/rbug/rbug_objects.h
index 3fba3334228..02973e07996 100644
--- a/src/gallium/drivers/rbug/rbug_objects.h
+++ b/src/gallium/drivers/rbug/rbug_objects.h
@@ -93,7 +93,7 @@ struct rbug_transfer
};
-static INLINE struct rbug_resource *
+static inline struct rbug_resource *
rbug_resource(struct pipe_resource *_resource)
{
if (!_resource)
@@ -102,7 +102,7 @@ rbug_resource(struct pipe_resource *_resource)
return (struct rbug_resource *)_resource;
}
-static INLINE struct rbug_sampler_view *
+static inline struct rbug_sampler_view *
rbug_sampler_view(struct pipe_sampler_view *_sampler_view)
{
if (!_sampler_view)
@@ -111,7 +111,7 @@ rbug_sampler_view(struct pipe_sampler_view *_sampler_view)
return (struct rbug_sampler_view *)_sampler_view;
}
-static INLINE struct rbug_surface *
+static inline struct rbug_surface *
rbug_surface(struct pipe_surface *_surface)
{
if (!_surface)
@@ -120,7 +120,7 @@ rbug_surface(struct pipe_surface *_surface)
return (struct rbug_surface *)_surface;
}
-static INLINE struct rbug_transfer *
+static inline struct rbug_transfer *
rbug_transfer(struct pipe_transfer *_transfer)
{
if (!_transfer)
@@ -129,7 +129,7 @@ rbug_transfer(struct pipe_transfer *_transfer)
return (struct rbug_transfer *)_transfer;
}
-static INLINE struct rbug_shader *
+static inline struct rbug_shader *
rbug_shader(void *_state)
{
if (!_state)
@@ -137,7 +137,7 @@ rbug_shader(void *_state)
return (struct rbug_shader *)_state;
}
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
rbug_resource_unwrap(struct pipe_resource *_resource)
{
if (!_resource)
@@ -145,7 +145,7 @@ rbug_resource_unwrap(struct pipe_resource *_resource)
return rbug_resource(_resource)->resource;
}
-static INLINE struct pipe_sampler_view *
+static inline struct pipe_sampler_view *
rbug_sampler_view_unwrap(struct pipe_sampler_view *_sampler_view)
{
if (!_sampler_view)
@@ -153,7 +153,7 @@ rbug_sampler_view_unwrap(struct pipe_sampler_view *_sampler_view)
return rbug_sampler_view(_sampler_view)->sampler_view;
}
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
rbug_surface_unwrap(struct pipe_surface *_surface)
{
if (!_surface)
@@ -161,7 +161,7 @@ rbug_surface_unwrap(struct pipe_surface *_surface)
return rbug_surface(_surface)->surface;
}
-static INLINE struct pipe_transfer *
+static inline struct pipe_transfer *
rbug_transfer_unwrap(struct pipe_transfer *_transfer)
{
if (!_transfer)
@@ -169,7 +169,7 @@ rbug_transfer_unwrap(struct pipe_transfer *_transfer)
return rbug_transfer(_transfer)->transfer;
}
-static INLINE void *
+static inline void *
rbug_shader_unwrap(void *_state)
{
struct rbug_shader *shader;
diff --git a/src/gallium/drivers/rbug/rbug_screen.c b/src/gallium/drivers/rbug/rbug_screen.c
index d5a3164e217..7da4e81560a 100644
--- a/src/gallium/drivers/rbug/rbug_screen.c
+++ b/src/gallium/drivers/rbug/rbug_screen.c
@@ -225,17 +225,6 @@ rbug_screen_fence_reference(struct pipe_screen *_screen,
fence);
}
-static boolean
-rbug_screen_fence_signalled(struct pipe_screen *_screen,
- struct pipe_fence_handle *fence)
-{
- struct rbug_screen *rb_screen = rbug_screen(_screen);
- struct pipe_screen *screen = rb_screen->screen;
-
- return screen->fence_signalled(screen,
- fence);
-}
-
static boolean
rbug_screen_fence_finish(struct pipe_screen *_screen,
struct pipe_fence_handle *fence,
@@ -288,7 +277,6 @@ rbug_screen_create(struct pipe_screen *screen)
rb_screen->base.resource_destroy = rbug_screen_resource_destroy;
rb_screen->base.flush_frontbuffer = rbug_screen_flush_frontbuffer;
rb_screen->base.fence_reference = rbug_screen_fence_reference;
- rb_screen->base.fence_signalled = rbug_screen_fence_signalled;
rb_screen->base.fence_finish = rbug_screen_fence_finish;
rb_screen->screen = screen;
diff --git a/src/gallium/drivers/rbug/rbug_screen.h b/src/gallium/drivers/rbug/rbug_screen.h
index a53afac05e9..fd92374beda 100644
--- a/src/gallium/drivers/rbug/rbug_screen.h
+++ b/src/gallium/drivers/rbug/rbug_screen.h
@@ -60,7 +60,7 @@ struct rbug_screen
struct rbug_list transfers;
};
-static INLINE struct rbug_screen *
+static inline struct rbug_screen *
rbug_screen(struct pipe_screen *screen)
{
return (struct rbug_screen *)screen;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 50a73369c1d..577df814b29 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -203,7 +203,7 @@ struct softpipe_context {
};
-static INLINE struct softpipe_context *
+static inline struct softpipe_context *
softpipe_context( struct pipe_context *pipe )
{
return (struct softpipe_context *)pipe;
diff --git a/src/gallium/drivers/softpipe/sp_fence.c b/src/gallium/drivers/softpipe/sp_fence.c
index c2897ed1ef8..6168236ec96 100644
--- a/src/gallium/drivers/softpipe/sp_fence.c
+++ b/src/gallium/drivers/softpipe/sp_fence.c
@@ -40,15 +40,6 @@ softpipe_fence_reference(struct pipe_screen *screen,
}
-static boolean
-softpipe_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- assert(fence);
- return TRUE;
-}
-
-
static boolean
softpipe_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
@@ -64,5 +55,4 @@ softpipe_init_screen_fence_funcs(struct pipe_screen *screen)
{
screen->fence_reference = softpipe_fence_reference;
screen->fence_finish = softpipe_fence_finish;
- screen->fence_signalled = softpipe_fence_signalled;
}
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 369ab6ed8d4..89411777ec9 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -52,7 +52,7 @@ struct sp_exec_fragment_shader
/** cast wrapper */
-static INLINE struct sp_exec_fragment_shader *
+static inline struct sp_exec_fragment_shader *
sp_exec_fragment_shader(const struct sp_fragment_shader_variant *var)
{
return (struct sp_exec_fragment_shader *) var;
diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 18eca611669..f8a3eacdb37 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -145,7 +145,7 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
}
-static INLINE cptrf4 get_vert( const void *vertex_buffer,
+static inline cptrf4 get_vert( const void *vertex_buffer,
int index,
int stride )
{
diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index a32bd7fd241..5b458450cd8 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -63,7 +63,7 @@ struct blend_quad_stage
/** cast wrapper */
-static INLINE struct blend_quad_stage *
+static inline struct blend_quad_stage *
blend_quad_stage(struct quad_stage *stage)
{
return (struct blend_quad_stage *) stage;
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 82c58d04527..395bc70f2cf 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -56,7 +56,7 @@ struct quad_shade_stage
/** cast wrapper */
-static INLINE struct quad_shade_stage *
+static inline struct quad_shade_stage *
quad_shade_stage(struct quad_stage *qs)
{
return (struct quad_shade_stage *) qs;
@@ -67,7 +67,7 @@ quad_shade_stage(struct quad_stage *qs)
* Execute fragment shader for the four fragments in the quad.
* \return TRUE if quad is alive, FALSE if all four pixels are killed
*/
-static INLINE boolean
+static inline boolean
shade_quad(struct quad_stage *qs, struct quad_header *quad)
{
struct softpipe_context *softpipe = qs->softpipe;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index a688d319bb8..0bfd9c3578c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -234,6 +234,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
return 1;
case PIPE_CAP_CLIP_HALFZ:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
return 1;
case PIPE_CAP_VERTEXID_NOBASE:
return 0;
@@ -242,6 +244,8 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/softpipe/sp_screen.h b/src/gallium/drivers/softpipe/sp_screen.h
index d39e9f48e80..f0e929111c2 100644
--- a/src/gallium/drivers/softpipe/sp_screen.h
+++ b/src/gallium/drivers/softpipe/sp_screen.h
@@ -49,7 +49,7 @@ struct softpipe_screen {
boolean use_llvm;
};
-static INLINE struct softpipe_screen *
+static inline struct softpipe_screen *
softpipe_screen( struct pipe_screen *pipe )
{
return (struct softpipe_screen *)pipe;
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 6704015112b..ff3cb9fe5e1 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -125,7 +125,7 @@ struct setup_context {
/**
* Clip setup->quad against the scissor/surface bounds.
*/
-static INLINE void
+static inline void
quad_clip(struct setup_context *setup, struct quad_header *quad)
{
const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
@@ -156,7 +156,7 @@ quad_clip(struct setup_context *setup, struct quad_header *quad)
/**
* Emit a quad (pass to next stage) with clipping.
*/
-static INLINE void
+static inline void
clip_emit_quad(struct setup_context *setup, struct quad_header *quad)
{
quad_clip( setup, quad );
@@ -178,14 +178,14 @@ clip_emit_quad(struct setup_context *setup, struct quad_header *quad)
* Given an X or Y coordinate, return the block/quad coordinate that it
* belongs to.
*/
-static INLINE int
+static inline int
block(int x)
{
return x & ~(2-1);
}
-static INLINE int
+static inline int
block_x(int x)
{
return x & ~(16-1);
@@ -1039,7 +1039,7 @@ setup_line_coefficients(struct setup_context *setup,
/**
* Plot a pixel in a line segment.
*/
-static INLINE void
+static inline void
plot(struct setup_context *setup, int x, int y)
{
const int iy = y & 1;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 1010b63de2c..565fca632c6 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -58,7 +58,7 @@
* of improperly weighted linear-filtered textures.
* The tests/texwrap.c demo is a good test.
*/
-static INLINE float
+static inline float
frac(float f)
{
return f - floorf(f);
@@ -69,7 +69,7 @@ frac(float f)
/**
* Linear interpolation macro
*/
-static INLINE float
+static inline float
lerp(float a, float v0, float v1)
{
return v0 + a * (v1 - v0);
@@ -84,7 +84,7 @@ lerp(float a, float v0, float v1)
* optimization! If we find that's not true on some systems, convert
* to a macro.
*/
-static INLINE float
+static inline float
lerp_2d(float a, float b,
float v00, float v10, float v01, float v11)
{
@@ -97,7 +97,7 @@ lerp_2d(float a, float b,
/**
* As above, but 3D interpolation of 8 values.
*/
-static INLINE float
+static inline float
lerp_3d(float a, float b, float c,
float v000, float v100, float v010, float v110,
float v001, float v101, float v011, float v111)
@@ -115,7 +115,7 @@ lerp_3d(float a, float b, float c,
* value. To avoid that problem we add a large multiple of the size
* (rather than using a conditional).
*/
-static INLINE int
+static inline int
repeat(int coord, unsigned size)
{
return (coord + size * 1024) % size;
@@ -486,7 +486,7 @@ wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
/**
* Do coordinate to array index conversion. For array textures.
*/
-static INLINE int
+static inline int
coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
{
int c = util_ifloor(coord + 0.5F);
@@ -587,7 +587,7 @@ compute_lambda_vert(const struct sp_sampler_view *sview,
-static INLINE const float *
+static inline const float *
get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr, int x, int y)
{
@@ -603,7 +603,7 @@ get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
}
-static INLINE const float *
+static inline const float *
get_texel_2d(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y)
@@ -695,7 +695,7 @@ static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
};
-static INLINE unsigned
+static inline unsigned
get_next_face(unsigned face, int idx)
{
return face_array[face][idx];
@@ -705,7 +705,7 @@ get_next_face(unsigned face, int idx)
* return a new xcoord based on old face, old coords, cube size
* and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
*/
-static INLINE int
+static inline int
get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
{
if ((face == 0 && fall_off_index != 1) ||
@@ -743,7 +743,7 @@ get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
* return a new ycoord based on old face, old coords, cube size
* and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
*/
-static INLINE int
+static inline int
get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
{
if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
@@ -771,7 +771,7 @@ get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
/* Gather a quad of adjacent texels within a tile:
*/
-static INLINE void
+static inline void
get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr,
unsigned x, unsigned y,
@@ -795,7 +795,7 @@ get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
/* Gather a quad of potentially non-adjacent texels:
*/
-static INLINE void
+static inline void
get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr,
int x0, int y0,
@@ -810,7 +810,7 @@ get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
/* Can involve a lot of unnecessary checks for border color:
*/
-static INLINE void
+static inline void
get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr,
@@ -828,7 +828,7 @@ get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
/* 3d variants:
*/
-static INLINE const float *
+static inline const float *
get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr, int x, int y, int z)
{
@@ -846,7 +846,7 @@ get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
}
-static INLINE const float *
+static inline const float *
get_texel_3d(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y, int z)
@@ -866,7 +866,7 @@ get_texel_3d(const struct sp_sampler_view *sp_sview,
/* Get texel pointer for 1D array texture */
-static INLINE const float *
+static inline const float *
get_texel_1d_array(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y)
@@ -884,7 +884,7 @@ get_texel_1d_array(const struct sp_sampler_view *sp_sview,
/* Get texel pointer for 2D array texture */
-static INLINE const float *
+static inline const float *
get_texel_2d_array(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y, int layer)
@@ -905,7 +905,7 @@ get_texel_2d_array(const struct sp_sampler_view *sp_sview,
}
-static INLINE const float *
+static inline const float *
get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
union tex_tile_address addr, int x, int y,
float *corner, int layer, unsigned face)
@@ -960,7 +960,7 @@ get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
/* Get texel pointer for cube array texture */
-static INLINE const float *
+static inline const float *
get_texel_cube_array(const struct sp_sampler_view *sp_sview,
const struct sp_sampler *sp_samp,
union tex_tile_address addr, int x, int y, int layer)
@@ -986,7 +986,7 @@ get_texel_cube_array(const struct sp_sampler_view *sp_sview,
* If level = 2, then we'll return 64 (the width at level=2).
* Return 1 if level > base_pot.
*/
-static INLINE unsigned
+static inline unsigned
pot_level_size(unsigned base_pot, unsigned level)
{
return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
@@ -1016,7 +1016,7 @@ print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZ
/* Some image-filter fastpaths:
*/
-static INLINE void
+static inline void
img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
const struct img_filter_args *args,
@@ -1070,7 +1070,7 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
}
-static INLINE void
+static inline void
img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
const struct img_filter_args *args,
@@ -1104,7 +1104,7 @@ img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
}
-static INLINE void
+static inline void
img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
const struct img_filter_args *args,
@@ -1819,7 +1819,7 @@ img_filter_3d_linear(struct sp_sampler_view *sp_sview,
* \param lod_in per-fragment lod_bias or explicit_lod.
* \param lod returns the per-fragment lod.
*/
-static INLINE void
+static inline void
compute_lod(const struct pipe_sampler_state *sampler,
enum tgsi_sampler_control control,
const float biased_lambda,
@@ -1859,7 +1859,7 @@ compute_lod(const struct pipe_sampler_state *sampler,
* \param lod_in per-fragment lod_bias or explicit_lod.
* \param lod results per-fragment lod.
*/
-static INLINE void
+static inline void
compute_lambda_lod(struct sp_sampler_view *sp_sview,
struct sp_sampler *sp_samp,
const float s[TGSI_QUAD_SIZE],
@@ -1906,7 +1906,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
}
}
-static INLINE unsigned
+static inline unsigned
get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
{
/* gather component is stored in lod_in slot as unsigned */
@@ -2789,7 +2789,7 @@ get_linear_wrap(unsigned mode)
/**
* Is swizzling needed for the given state key?
*/
-static INLINE bool
+static inline bool
any_swizzle(const struct pipe_sampler_view *view)
{
return (view->swizzle_r != PIPE_SWIZZLE_RED ||
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index 4a421a8f882..21f38b2f859 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -185,7 +185,7 @@ sp_flush_tex_tile_cache(struct softpipe_tex_tile_cache *tc)
* This is basically a direct-map cache.
* XXX There's probably lots of ways in which we can improve this.
*/
-static INLINE uint
+static inline uint
tex_cache_pos( union tex_tile_address addr )
{
uint entry = (addr.bits.x +
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
index 2233effc439..b7ad222d715 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.h
@@ -127,7 +127,7 @@ extern const struct softpipe_tex_cached_tile *
sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
union tex_tile_address addr );
-static INLINE union tex_tile_address
+static inline union tex_tile_address
tex_tile_address( unsigned x,
unsigned y,
unsigned z,
@@ -147,7 +147,7 @@ tex_tile_address( unsigned x,
/* Quickly retrieve tile if it matches last lookup.
*/
-static INLINE const struct softpipe_tex_cached_tile *
+static inline const struct softpipe_tex_cached_tile *
sp_get_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
union tex_tile_address addr )
{
diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h
index 1701bf574d9..fbf741a9c72 100644
--- a/src/gallium/drivers/softpipe/sp_texture.h
+++ b/src/gallium/drivers/softpipe/sp_texture.h
@@ -81,13 +81,13 @@ struct softpipe_transfer
/** cast wrappers */
-static INLINE struct softpipe_resource *
+static inline struct softpipe_resource *
softpipe_resource(struct pipe_resource *pt)
{
return (struct softpipe_resource *) pt;
}
-static INLINE struct softpipe_transfer *
+static inline struct softpipe_transfer *
softpipe_transfer(struct pipe_transfer *pt)
{
return (struct softpipe_transfer *) pt;
@@ -99,7 +99,7 @@ softpipe_transfer(struct pipe_transfer *pt)
* This is a short-cut instead of using map()/unmap(), which should
* probably be fixed.
*/
-static INLINE void *
+static inline void *
softpipe_resource_data(struct pipe_resource *pt)
{
if (!pt)
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index b763f526e61..9cc8ac12525 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -52,7 +52,7 @@ sp_alloc_tile(struct softpipe_tile_cache *tc);
(((x) + (y) * 5 + (l) * 10) % NUM_ENTRIES)
-static INLINE int addr_to_clear_pos(union tile_address addr)
+static inline int addr_to_clear_pos(union tile_address addr)
{
int pos;
pos = addr.bits.layer * (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE);
@@ -63,7 +63,7 @@ static INLINE int addr_to_clear_pos(union tile_address addr)
/**
* Is the tile at (x,y) in cleared state?
*/
-static INLINE uint
+static inline uint
is_clear_flag_set(const uint *bitvec, union tile_address addr, unsigned max)
{
int pos, bit;
@@ -77,7 +77,7 @@ is_clear_flag_set(const uint *bitvec, union tile_address addr, unsigned max)
/**
* Mark the tile at (x,y) as not cleared.
*/
-static INLINE void
+static inline void
clear_clear_flag(uint *bitvec, union tile_address addr, unsigned max)
{
int pos;
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h
index 167e1ffcada..2c0bafad651 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.h
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.h
@@ -128,7 +128,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
union tile_address addr );
-static INLINE union tile_address
+static inline union tile_address
tile_address( unsigned x,
unsigned y, unsigned layer )
{
@@ -143,7 +143,7 @@ tile_address( unsigned x,
/* Quickly retrieve tile if it matches last lookup.
*/
-static INLINE struct softpipe_cached_tile *
+static inline struct softpipe_cached_tile *
sp_get_cached_tile(struct softpipe_tile_cache *tc,
int x, int y, int layer )
{
diff --git a/src/gallium/drivers/svga/Makefile.am b/src/gallium/drivers/svga/Makefile.am
index e0a8cad7208..d46de95e4b4 100644
--- a/src/gallium/drivers/svga/Makefile.am
+++ b/src/gallium/drivers/svga/Makefile.am
@@ -20,8 +20,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript
index bb4d034f1eb..0ee624616f9 100644
--- a/src/gallium/drivers/svga/SConscript
+++ b/src/gallium/drivers/svga/SConscript
@@ -11,7 +11,6 @@ if env['suncc']:
if env['gcc'] or env['clang']:
env.Append(CPPDEFINES = [
'HAVE_STDINT_H',
- 'HAVE_SYS_TYPES_H',
])
env.Prepend(CPPPATH = [
diff --git a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
index 355edfdb702..5e00906ce36 100644
--- a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
+++ b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
@@ -507,7 +507,7 @@ static const uint32 SVGA3D_OUTPUT_REG_DEPTH_NUM_PS20 = 1;
*----------------------------------------------------------------------
*/
-static INLINE SVGA3dShaderRegType
+static inline SVGA3dShaderRegType
SVGA3dShaderGetRegType(uint32 token)
{
SVGA3dShaderSrcToken src;
diff --git a/src/gallium/drivers/svga/include/svga_overlay.h b/src/gallium/drivers/svga/include/svga_overlay.h
index 0f242dd402c..ccbf7912e6d 100644
--- a/src/gallium/drivers/svga/include/svga_overlay.h
+++ b/src/gallium/drivers/svga/include/svga_overlay.h
@@ -133,7 +133,7 @@ struct {
*----------------------------------------------------------------------
*/
-static INLINE Bool
+static inline Bool
VMwareVideoGetAttributes(const SVGAOverlayFormat format, // IN
uint32 *width, // IN / OUT
uint32 *height, // IN / OUT
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
index 474b75c3c86..b271832171d 100644
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -57,7 +57,7 @@
*----------------------------------------------------------------------
*/
-static INLINE void
+static inline void
surface_to_surfaceid(struct svga_winsys_context *swc, // IN
struct pipe_surface *surface, // IN
SVGA3dSurfaceImageId *id, // OUT
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 630f5f77d66..71f038df8c1 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -485,20 +485,20 @@ svga_context_create(struct pipe_screen *screen,
* Inline conversion functions. These are better-typed than the
* macros used previously:
*/
-static INLINE struct svga_context *
+static inline struct svga_context *
svga_context( struct pipe_context *pipe )
{
return (struct svga_context *)pipe;
}
-static INLINE boolean
+static inline boolean
svga_have_gb_objects(const struct svga_context *svga)
{
return svga_screen(svga->pipe.screen)->sws->have_gb_objects;
}
-static INLINE boolean
+static inline boolean
svga_have_gb_dma(const struct svga_context *svga)
{
return svga_screen(svga->pipe.screen)->sws->have_gb_dma;
diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h
index 3a3fcd8fae2..82c9b602d5d 100644
--- a/src/gallium/drivers/svga/svga_debug.h
+++ b/src/gallium/drivers/svga/svga_debug.h
@@ -53,7 +53,7 @@ extern int SVGA_DEBUG;
#define DBSTR(x) ""
#endif
-static INLINE void
+static inline void
SVGA_DBG( unsigned flag, const char *fmt, ... )
{
#ifdef DEBUG
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
index 1b054038e9f..9ab87e8259a 100644
--- a/src/gallium/drivers/svga/svga_draw_private.h
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -57,7 +57,7 @@ static const unsigned svga_hw_prims =
* PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP or PIPE_PRIM_POLYGON. We convert
* those to other types of primitives with index/translation code.
*/
-static INLINE unsigned
+static inline unsigned
svga_translate_prim(unsigned mode, unsigned vcount,unsigned *prim_count)
{
switch (mode) {
diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c
index 594eec7166e..2890516c0cf 100644
--- a/src/gallium/drivers/svga/svga_pipe_blend.c
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -33,7 +33,7 @@
#include "svga_hw_reg.h"
-static INLINE unsigned
+static inline unsigned
svga_translate_blend_factor(unsigned factor)
{
switch (factor) {
@@ -58,7 +58,7 @@ svga_translate_blend_factor(unsigned factor)
}
}
-static INLINE unsigned
+static inline unsigned
svga_translate_blend_func(unsigned mode)
{
switch (mode) {
diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
index cb07dbe09a3..8db21fd7476 100644
--- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -32,7 +32,7 @@
#include "svga_hw_reg.h"
-static INLINE unsigned
+static inline unsigned
svga_translate_compare_func(unsigned func)
{
switch (func) {
@@ -50,7 +50,7 @@ svga_translate_compare_func(unsigned func)
}
}
-static INLINE unsigned
+static inline unsigned
svga_translate_stencil_op(unsigned op)
{
switch (op) {
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
index a97a9c46cf8..208a2cd14bf 100644
--- a/src/gallium/drivers/svga/svga_pipe_query.c
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -59,7 +59,7 @@ struct svga_query {
/** cast wrapper */
-static INLINE struct svga_query *
+static inline struct svga_query *
svga_query( struct pipe_query *q )
{
return (struct svga_query *)q;
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index 8a87bb467aa..effd490dd22 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -35,7 +35,7 @@
#include "svga_debug.h"
-static INLINE unsigned
+static inline unsigned
translate_wrap_mode(unsigned wrap)
{
switch (wrap) {
@@ -68,7 +68,7 @@ translate_wrap_mode(unsigned wrap)
}
}
-static INLINE unsigned translate_img_filter( unsigned filter )
+static inline unsigned translate_img_filter( unsigned filter )
{
switch (filter) {
case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
@@ -79,7 +79,7 @@ static INLINE unsigned translate_img_filter( unsigned filter )
}
}
-static INLINE unsigned translate_mip_filter( unsigned filter )
+static inline unsigned translate_mip_filter( unsigned filter )
{
switch (filter) {
case PIPE_TEX_MIPFILTER_NONE: return SVGA3D_TEX_FILTER_NONE;
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.c b/src/gallium/drivers/svga/svga_resource_buffer.c
index d2c7762e7ff..13f85cddbd5 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -45,7 +45,7 @@
* Vertex and index buffers need hardware backing. Constant buffers
* do not. No other types of buffers currently supported.
*/
-static INLINE boolean
+static inline boolean
svga_buffer_needs_hw_storage(unsigned usage)
{
return usage & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER);
diff --git a/src/gallium/drivers/svga/svga_resource_buffer.h b/src/gallium/drivers/svga/svga_resource_buffer.h
index 83b3d342aec..e838beb6661 100644
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -190,7 +190,7 @@ struct svga_buffer
};
-static INLINE struct svga_buffer *
+static inline struct svga_buffer *
svga_buffer(struct pipe_resource *buffer)
{
if (buffer) {
@@ -205,7 +205,7 @@ svga_buffer(struct pipe_resource *buffer)
* Returns TRUE for user buffers. We may
* decide to use an alternate upload path for these buffers.
*/
-static INLINE boolean
+static inline boolean
svga_buffer_is_user_buffer( struct pipe_resource *buffer )
{
if (buffer) {
@@ -219,7 +219,7 @@ svga_buffer_is_user_buffer( struct pipe_resource *buffer )
* Returns a pointer to a struct svga_winsys_screen given a
* struct svga_buffer.
*/
-static INLINE struct svga_winsys_screen *
+static inline struct svga_winsys_screen *
svga_buffer_winsys_screen(struct svga_buffer *sbuf)
{
return svga_screen(sbuf->b.b.screen)->sws;
@@ -230,7 +230,7 @@ svga_buffer_winsys_screen(struct svga_buffer *sbuf)
* Returns whether a buffer has hardware storage that is
* visible to the GPU.
*/
-static INLINE boolean
+static inline boolean
svga_buffer_has_hw_storage(struct svga_buffer *sbuf)
{
if (svga_buffer_winsys_screen(sbuf)->have_gb_objects)
@@ -242,7 +242,7 @@ svga_buffer_has_hw_storage(struct svga_buffer *sbuf)
/**
* Map the hardware storage of a buffer.
*/
-static INLINE void *
+static inline void *
svga_buffer_hw_storage_map(struct svga_context *svga,
struct svga_buffer *sbuf,
unsigned flags, boolean *retry)
@@ -259,7 +259,7 @@ svga_buffer_hw_storage_map(struct svga_context *svga,
/**
* Unmap the hardware storage of a buffer.
*/
-static INLINE void
+static inline void
svga_buffer_hw_storage_unmap(struct svga_context *svga,
struct svga_buffer *sbuf)
{
diff --git a/src/gallium/drivers/svga/svga_resource_texture.h b/src/gallium/drivers/svga/svga_resource_texture.h
index 1ff42fabab9..19dadfb8828 100644
--- a/src/gallium/drivers/svga/svga_resource_texture.h
+++ b/src/gallium/drivers/svga/svga_resource_texture.h
@@ -106,7 +106,7 @@ struct svga_transfer
};
-static INLINE struct svga_texture *svga_texture( struct pipe_resource *resource )
+static inline struct svga_texture *svga_texture( struct pipe_resource *resource )
{
struct svga_texture *tex = (struct svga_texture *)resource;
assert(tex == NULL || tex->b.vtbl == &svga_texture_vtbl);
@@ -114,7 +114,7 @@ static INLINE struct svga_texture *svga_texture( struct pipe_resource *resource
}
-static INLINE struct svga_transfer *
+static inline struct svga_transfer *
svga_transfer(struct pipe_transfer *transfer)
{
assert(transfer);
@@ -127,7 +127,7 @@ svga_transfer(struct pipe_transfer *transfer)
* This is used to track updates to textures when we draw into
* them via a surface.
*/
-static INLINE void
+static inline void
svga_age_texture_view(struct svga_texture *tex, unsigned level)
{
assert(level < Elements(tex->view_age));
@@ -138,7 +138,7 @@ svga_age_texture_view(struct svga_texture *tex, unsigned level)
/**
* Mark the given texture face/level as being defined.
*/
-static INLINE void
+static inline void
svga_define_texture_level(struct svga_texture *tex,
unsigned face,unsigned level)
{
@@ -148,7 +148,7 @@ svga_define_texture_level(struct svga_texture *tex,
}
-static INLINE bool
+static inline bool
svga_is_texture_level_defined(const struct svga_texture *tex,
unsigned face, unsigned level)
{
@@ -177,7 +177,7 @@ check_face_level(const struct svga_texture *tex,
}
-static INLINE void
+static inline void
svga_set_texture_rendered_to(struct svga_texture *tex,
unsigned face, unsigned level)
{
@@ -186,7 +186,7 @@ svga_set_texture_rendered_to(struct svga_texture *tex,
}
-static INLINE void
+static inline void
svga_clear_texture_rendered_to(struct svga_texture *tex,
unsigned face, unsigned level)
{
@@ -195,7 +195,7 @@ svga_clear_texture_rendered_to(struct svga_texture *tex,
}
-static INLINE boolean
+static inline boolean
svga_was_texture_rendered_to(const struct svga_texture *tex,
unsigned face, unsigned level)
{
diff --git a/src/gallium/drivers/svga/svga_sampler_view.h b/src/gallium/drivers/svga/svga_sampler_view.h
index 2087c1be85e..7f14323f84f 100644
--- a/src/gallium/drivers/svga/svga_sampler_view.h
+++ b/src/gallium/drivers/svga/svga_sampler_view.h
@@ -86,7 +86,7 @@ svga_destroy_sampler_view_priv(struct svga_sampler_view *v);
void
svga_debug_describe_sampler_view(char *buf, const struct svga_sampler_view *sv);
-static INLINE void
+static inline void
svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v)
{
struct svga_sampler_view *old = *ptr;
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 56e486786df..66c3deaa9e7 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -309,6 +309,10 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
case PIPE_CAP_UMA:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
}
@@ -443,7 +447,9 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
return 0;
case PIPE_SHADER_GEOMETRY:
case PIPE_SHADER_COMPUTE:
- /* no support for geometry or compute shaders at this time */
+ case PIPE_SHADER_TESS_CTRL:
+ case PIPE_SHADER_TESS_EVAL:
+ /* no support for geometry, tess or compute shaders at this time */
return 0;
default:
debug_printf("Unexpected shader type (%u) query\n", shader);
@@ -542,15 +548,6 @@ svga_fence_reference(struct pipe_screen *screen,
}
-static boolean
-svga_fence_signalled(struct pipe_screen *screen,
- struct pipe_fence_handle *fence)
-{
- struct svga_winsys_screen *sws = svga_screen(screen)->sws;
- return sws->fence_signalled(sws, fence, 0) == 0;
-}
-
-
static boolean
svga_fence_finish(struct pipe_screen *screen,
struct pipe_fence_handle *fence,
@@ -558,6 +555,9 @@ svga_fence_finish(struct pipe_screen *screen,
{
struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+ if (!timeout)
+ return sws->fence_signalled(sws, fence, 0) == 0;
+
SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n",
__FUNCTION__, fence);
@@ -645,7 +645,6 @@ svga_screen_create(struct svga_winsys_screen *sws)
screen->is_format_supported = svga_is_format_supported;
screen->context_create = svga_context_create;
screen->fence_reference = svga_fence_reference;
- screen->fence_signalled = svga_fence_signalled;
screen->fence_finish = svga_fence_finish;
screen->get_driver_query_info = svga_get_driver_query_info;
svgascreen->sws = sws;
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h
index b85191c4b26..ea1e743dfe5 100644
--- a/src/gallium/drivers/svga/svga_screen.h
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -82,7 +82,7 @@ struct svga_screen
#ifndef DEBUG
/** cast wrapper */
-static INLINE struct svga_screen *
+static inline struct svga_screen *
svga_screen(struct pipe_screen *pscreen)
{
return (struct svga_screen *) pscreen;
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
index f63f7836187..3c765394a88 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -76,7 +76,7 @@ surface_size(const struct svga_host_surface_cache_key *key)
/**
* Compute the bucket for this key.
*/
-static INLINE unsigned
+static inline unsigned
svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key)
{
return util_hash_crc32(key, sizeof *key) % SVGA_HOST_SURFACE_CACHE_BUCKETS;
diff --git a/src/gallium/drivers/svga/svga_shader.h b/src/gallium/drivers/svga/svga_shader.h
index fd500ae4401..5102159b96a 100644
--- a/src/gallium/drivers/svga/svga_shader.h
+++ b/src/gallium/drivers/svga/svga_shader.h
@@ -44,7 +44,7 @@ svga_destroy_shader_variant(struct svga_context *svga,
/**
* Check if a shader's bytecode exceeds the device limits.
*/
-static INLINE boolean
+static inline boolean
svga_shader_too_large(const struct svga_context *svga,
const struct svga_shader_variant *variant)
{
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 566a79407e5..8cdce742b3b 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -41,7 +41,7 @@
-static INLINE int
+static inline int
compare_fs_keys(const struct svga_fs_compile_key *a,
const struct svga_fs_compile_key *b)
{
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
index fb56b3d36ba..ebb98373e2b 100644
--- a/src/gallium/drivers/svga/svga_state_rss.c
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -61,7 +61,7 @@ do { \
} while (0)
-static INLINE void
+static inline void
svga_queue_rs( struct rs_queue *q,
unsigned rss,
unsigned value )
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
index 0ab571c0588..41334bd7cb9 100644
--- a/src/gallium/drivers/svga/svga_state_tss.c
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -274,7 +274,7 @@ do { \
} while (0)
-static INLINE void
+static inline void
svga_queue_tss( struct ts_queue *q,
unsigned unit,
unsigned tss,
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 545c9d7420f..c2a0f1ee6b1 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -41,7 +41,7 @@
#include "svga_hw_reg.h"
-static INLINE int
+static inline int
compare_vs_keys(const struct svga_vs_compile_key *a,
const struct svga_vs_compile_key *b)
{
diff --git a/src/gallium/drivers/svga/svga_surface.h b/src/gallium/drivers/svga/svga_surface.h
index 7b8f6f018d2..2fa72a1c8f0 100644
--- a/src/gallium/drivers/svga/svga_surface.h
+++ b/src/gallium/drivers/svga/svga_surface.h
@@ -84,7 +84,7 @@ svga_texture_copy_handle(struct svga_context *svga,
unsigned width, unsigned height, unsigned depth);
-static INLINE struct svga_surface *
+static inline struct svga_surface *
svga_surface(struct pipe_surface *surface)
{
assert(surface);
@@ -92,7 +92,7 @@ svga_surface(struct pipe_surface *surface)
}
-static INLINE const struct svga_surface *
+static inline const struct svga_surface *
svga_surface_const(const struct pipe_surface *surface)
{
assert(surface);
diff --git a/src/gallium/drivers/svga/svga_swtnl_private.h b/src/gallium/drivers/svga/svga_swtnl_private.h
index 608950d7af6..e2106e1e8e6 100644
--- a/src/gallium/drivers/svga/svga_swtnl_private.h
+++ b/src/gallium/drivers/svga/svga_swtnl_private.h
@@ -76,7 +76,7 @@ struct svga_vbuf_render {
/**
* Basically a cast wrapper.
*/
-static INLINE struct svga_vbuf_render *
+static inline struct svga_vbuf_render *
svga_vbuf_render( struct vbuf_render *render )
{
assert(render);
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 9aafd851264..2e2ff5e4673 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -84,7 +84,7 @@ svga_shader_expand(struct svga_shader_emitter *emit)
}
-static INLINE boolean
+static inline boolean
reserve(struct svga_shader_emitter *emit, unsigned nr_dwords)
{
if (emit->ptr - emit->buf + nr_dwords * sizeof(unsigned) >= emit->size) {
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index e7a2a134ca5..5c47a4ad39f 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -124,7 +124,7 @@ struct svga_shader_variant
* The real use of this information is matching vertex elements to
* fragment shader inputs in the case where vertex shader is disabled.
*/
-static INLINE void svga_generate_vdecl_semantics( unsigned idx,
+static inline void svga_generate_vdecl_semantics( unsigned idx,
unsigned *usage,
unsigned *usage_index )
{
@@ -140,12 +140,12 @@ static INLINE void svga_generate_vdecl_semantics( unsigned idx,
-static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
+static inline unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
{
return sizeof *key;
}
-static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
+static inline unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
{
return (const char *)&key->tex[key->num_textures] - (const char *)key;
}
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
index 1894296e6d7..1a1dac23507 100644
--- a/src/gallium/drivers/svga/svga_tgsi_emit.h
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -167,7 +167,7 @@ svga_translate_decl_sm30(struct svga_shader_emitter *emit,
/** Emit the given SVGA3dShaderInstToken opcode */
-static INLINE boolean
+static inline boolean
emit_instruction(struct svga_shader_emitter *emit,
SVGA3dShaderInstToken opcode)
{
@@ -176,7 +176,7 @@ emit_instruction(struct svga_shader_emitter *emit,
/** Generate a SVGA3dShaderInstToken for the given SVGA3D shader opcode */
-static INLINE SVGA3dShaderInstToken
+static inline SVGA3dShaderInstToken
inst_token(unsigned opcode)
{
SVGA3dShaderInstToken inst;
@@ -192,7 +192,7 @@ inst_token(unsigned opcode)
* Generate a SVGA3dShaderInstToken for the given SVGA3D shader opcode
* with the predication flag set.
*/
-static INLINE SVGA3dShaderInstToken
+static inline SVGA3dShaderInstToken
inst_token_predicated(unsigned opcode)
{
SVGA3dShaderInstToken inst;
@@ -209,7 +209,7 @@ inst_token_predicated(unsigned opcode)
* Generate a SVGA3dShaderInstToken for a SETP instruction (set predicate)
* using the given comparison operator (one of SVGA3DOPCOMP_xx).
*/
-static INLINE SVGA3dShaderInstToken
+static inline SVGA3dShaderInstToken
inst_token_setp(unsigned operator)
{
SVGA3dShaderInstToken inst;
@@ -227,7 +227,7 @@ inst_token_setp(unsigned operator)
* Note that this function is used to create tokens for output registers,
* temp registers AND constants (see emit_def_const()).
*/
-static INLINE SVGA3dShaderDestToken
+static inline SVGA3dShaderDestToken
dst_register(unsigned file, int number)
{
SVGA3dShaderDestToken dest;
@@ -255,7 +255,7 @@ dst_register(unsigned file, int number)
* Apply a writemask to the given SVGA3dShaderDestToken, returning a
* new SVGA3dShaderDestToken.
*/
-static INLINE SVGA3dShaderDestToken
+static inline SVGA3dShaderDestToken
writemask(SVGA3dShaderDestToken dest, unsigned mask)
{
assert(dest.mask & mask);
@@ -265,7 +265,7 @@ writemask(SVGA3dShaderDestToken dest, unsigned mask)
/** Create a SVGA3dShaderSrcToken given a register file and number */
-static INLINE SVGA3dShaderSrcToken
+static inline SVGA3dShaderSrcToken
src_token(unsigned file, int number)
{
SVGA3dShaderSrcToken src;
@@ -289,7 +289,7 @@ src_token(unsigned file, int number)
/** Create a src_register given a register file and register number */
-static INLINE struct src_register
+static inline struct src_register
src_register(unsigned file, int number)
{
struct src_register src;
@@ -301,7 +301,7 @@ src_register(unsigned file, int number)
}
/** Translate src_register into SVGA3dShaderDestToken */
-static INLINE SVGA3dShaderDestToken
+static inline SVGA3dShaderDestToken
dst(struct src_register src)
{
return dst_register(SVGA3dShaderGetRegType(src.base.value), src.base.num);
@@ -309,7 +309,7 @@ dst(struct src_register src)
/** Translate SVGA3dShaderDestToken to a src_register */
-static INLINE struct src_register
+static inline struct src_register
src(SVGA3dShaderDestToken dst)
{
return src_register(SVGA3dShaderGetRegType(dst.value), dst.num);
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h
index 5db64bf135b..0a2e3d5f345 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader.h
+++ b/src/gallium/drivers/svga/svgadump/svga_shader.h
@@ -56,7 +56,7 @@ struct sh_reg
unsigned is_reg:1;
};
-static INLINE unsigned
+static inline unsigned
sh_reg_type( struct sh_reg reg )
{
return reg.type_lo | (reg.type_hi << 3);
@@ -138,7 +138,7 @@ struct sh_dstreg
unsigned is_reg:1;
};
-static INLINE unsigned
+static inline unsigned
sh_dstreg_type( struct sh_dstreg reg )
{
return reg.type_lo | (reg.type_hi << 3);
@@ -169,7 +169,7 @@ struct sh_srcreg
unsigned is_reg:1;
};
-static INLINE unsigned
+static inline unsigned
sh_srcreg_type( struct sh_srcreg reg )
{
return reg.type_lo | (reg.type_hi << 3);
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 0013c963e7a..7f6d0645112 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -49,13 +49,13 @@ struct trace_query
};
-static INLINE struct trace_query *
+static inline struct trace_query *
trace_query(struct pipe_query *query) {
return (struct trace_query *)query;
}
-static INLINE struct pipe_query *
+static inline struct pipe_query *
trace_query_unwrap(struct pipe_query *query)
{
if (query) {
@@ -66,7 +66,7 @@ trace_query_unwrap(struct pipe_query *query)
}
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
trace_resource_unwrap(struct trace_context *tr_ctx,
struct pipe_resource *resource)
{
@@ -82,7 +82,7 @@ trace_resource_unwrap(struct trace_context *tr_ctx,
}
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
trace_surface_unwrap(struct trace_context *tr_ctx,
struct pipe_surface *surface)
{
@@ -105,7 +105,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
}
-static INLINE void
+static inline void
trace_context_draw_vbo(struct pipe_context *_pipe,
const struct pipe_draw_info *info)
{
@@ -125,7 +125,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe,
}
-static INLINE struct pipe_query *
+static inline struct pipe_query *
trace_context_create_query(struct pipe_context *_pipe,
unsigned query_type,
unsigned index)
@@ -163,7 +163,7 @@ trace_context_create_query(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_destroy_query(struct pipe_context *_pipe,
struct pipe_query *_query)
{
@@ -185,7 +185,7 @@ trace_context_destroy_query(struct pipe_context *_pipe,
}
-static INLINE boolean
+static inline boolean
trace_context_begin_query(struct pipe_context *_pipe,
struct pipe_query *query)
{
@@ -207,7 +207,7 @@ trace_context_begin_query(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_end_query(struct pipe_context *_pipe,
struct pipe_query *query)
{
@@ -227,7 +227,7 @@ trace_context_end_query(struct pipe_context *_pipe,
}
-static INLINE boolean
+static inline boolean
trace_context_get_query_result(struct pipe_context *_pipe,
struct pipe_query *_query,
boolean wait,
@@ -262,7 +262,7 @@ trace_context_get_query_result(struct pipe_context *_pipe,
}
-static INLINE void *
+static inline void *
trace_context_create_blend_state(struct pipe_context *_pipe,
const struct pipe_blend_state *state)
{
@@ -285,7 +285,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_blend_state(struct pipe_context *_pipe,
void *state)
{
@@ -303,7 +303,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_blend_state(struct pipe_context *_pipe,
void *state)
{
@@ -321,7 +321,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe,
}
-static INLINE void *
+static inline void *
trace_context_create_sampler_state(struct pipe_context *_pipe,
const struct pipe_sampler_state *state)
{
@@ -344,7 +344,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_sampler_states(struct pipe_context *_pipe,
unsigned shader,
unsigned start,
@@ -371,7 +371,7 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_sampler_state(struct pipe_context *_pipe,
void *state)
{
@@ -389,7 +389,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe,
}
-static INLINE void *
+static inline void *
trace_context_create_rasterizer_state(struct pipe_context *_pipe,
const struct pipe_rasterizer_state *state)
{
@@ -412,7 +412,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
void *state)
{
@@ -430,7 +430,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
void *state)
{
@@ -448,7 +448,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
}
-static INLINE void *
+static inline void *
trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
const struct pipe_depth_stencil_alpha_state *state)
{
@@ -471,7 +471,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
void *state)
{
@@ -489,7 +489,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
void *state)
{
@@ -508,7 +508,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
#define TRACE_SHADER_STATE(shader_type) \
- static INLINE void * \
+ static inline void * \
trace_context_create_##shader_type##_state(struct pipe_context *_pipe, \
const struct pipe_shader_state *state) \
{ \
@@ -524,7 +524,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
return result; \
} \
\
- static INLINE void \
+ static inline void \
trace_context_bind_##shader_type##_state(struct pipe_context *_pipe, \
void *state) \
{ \
@@ -537,7 +537,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
trace_dump_call_end(); \
} \
\
- static INLINE void \
+ static inline void \
trace_context_delete_##shader_type##_state(struct pipe_context *_pipe, \
void *state) \
{ \
@@ -559,7 +559,7 @@ TRACE_SHADER_STATE(tes)
#undef TRACE_SHADER_STATE
-static INLINE void *
+static inline void *
trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
unsigned num_elements,
const struct pipe_vertex_element *elements)
@@ -587,7 +587,7 @@ trace_context_create_vertex_elements_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
void *state)
{
@@ -605,7 +605,7 @@ trace_context_bind_vertex_elements_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
void *state)
{
@@ -623,7 +623,7 @@ trace_context_delete_vertex_elements_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_blend_color(struct pipe_context *_pipe,
const struct pipe_blend_color *state)
{
@@ -641,7 +641,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_stencil_ref(struct pipe_context *_pipe,
const struct pipe_stencil_ref *state)
{
@@ -659,7 +659,7 @@ trace_context_set_stencil_ref(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_clip_state(struct pipe_context *_pipe,
const struct pipe_clip_state *state)
{
@@ -676,7 +676,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static INLINE void
+static inline void
trace_context_set_sample_mask(struct pipe_context *_pipe,
unsigned sample_mask)
{
@@ -693,7 +693,7 @@ trace_context_set_sample_mask(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static INLINE void
+static inline void
trace_context_set_constant_buffer(struct pipe_context *_pipe,
uint shader, uint index,
struct pipe_constant_buffer *constant_buffer)
@@ -721,7 +721,7 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_framebuffer_state(struct pipe_context *_pipe,
const struct pipe_framebuffer_state *state)
{
@@ -751,7 +751,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_polygon_stipple(struct pipe_context *_pipe,
const struct pipe_poly_stipple *state)
{
@@ -769,7 +769,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_scissor_states(struct pipe_context *_pipe,
unsigned start_slot,
unsigned num_scissors,
@@ -791,7 +791,7 @@ trace_context_set_scissor_states(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_viewport_states(struct pipe_context *_pipe,
unsigned start_slot,
unsigned num_viewports,
@@ -938,7 +938,7 @@ trace_context_surface_destroy(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_sampler_views(struct pipe_context *_pipe,
unsigned shader,
unsigned start,
@@ -974,7 +974,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_vertex_buffers(struct pipe_context *_pipe,
unsigned start_slot, unsigned num_buffers,
const struct pipe_vertex_buffer *buffers)
@@ -1008,7 +1008,7 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_set_index_buffer(struct pipe_context *_pipe,
const struct pipe_index_buffer *ib)
{
@@ -1033,7 +1033,7 @@ trace_context_set_index_buffer(struct pipe_context *_pipe,
}
-static INLINE struct pipe_stream_output_target *
+static inline struct pipe_stream_output_target *
trace_context_create_stream_output_target(struct pipe_context *_pipe,
struct pipe_resource *res,
unsigned buffer_offset,
@@ -1063,7 +1063,7 @@ trace_context_create_stream_output_target(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_stream_output_target_destroy(
struct pipe_context *_pipe,
struct pipe_stream_output_target *target)
@@ -1082,7 +1082,7 @@ trace_context_stream_output_target_destroy(
}
-static INLINE void
+static inline void
trace_context_set_stream_output_targets(struct pipe_context *_pipe,
unsigned num_targets,
struct pipe_stream_output_target **tgs,
@@ -1104,7 +1104,7 @@ trace_context_set_stream_output_targets(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_resource_copy_region(struct pipe_context *_pipe,
struct pipe_resource *dst,
unsigned dst_level,
@@ -1139,7 +1139,7 @@ trace_context_resource_copy_region(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_blit(struct pipe_context *_pipe,
const struct pipe_blit_info *_info)
{
@@ -1181,7 +1181,7 @@ trace_context_flush_resource(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_clear(struct pipe_context *_pipe,
unsigned buffers,
const union pipe_color_union *color,
@@ -1210,7 +1210,7 @@ trace_context_clear(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_clear_render_target(struct pipe_context *_pipe,
struct pipe_surface *dst,
const union pipe_color_union *color,
@@ -1237,7 +1237,7 @@ trace_context_clear_render_target(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static INLINE void
+static inline void
trace_context_clear_depth_stencil(struct pipe_context *_pipe,
struct pipe_surface *dst,
unsigned clear_flags,
@@ -1269,7 +1269,7 @@ trace_context_clear_depth_stencil(struct pipe_context *_pipe,
trace_dump_call_end();
}
-static INLINE void
+static inline void
trace_context_flush(struct pipe_context *_pipe,
struct pipe_fence_handle **fence,
unsigned flags)
@@ -1291,7 +1291,7 @@ trace_context_flush(struct pipe_context *_pipe,
}
-static INLINE void
+static inline void
trace_context_destroy(struct pipe_context *_pipe)
{
struct trace_context *tr_ctx = trace_context(_pipe);
diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h
index 1e5ad88d034..ad57d9d5243 100644
--- a/src/gallium/drivers/trace/tr_context.h
+++ b/src/gallium/drivers/trace/tr_context.h
@@ -54,7 +54,7 @@ void
trace_context_check(const struct pipe_context *pipe);
-static INLINE struct trace_context *
+static inline struct trace_context *
trace_context(struct pipe_context *pipe)
{
assert(pipe);
diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index 753b92d8b54..601e2cbbec5 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -64,7 +64,7 @@ static long unsigned call_no = 0;
static boolean dumping = FALSE;
-static INLINE void
+static inline void
trace_dump_write(const char *buf, size_t size)
{
if (stream) {
@@ -73,14 +73,14 @@ trace_dump_write(const char *buf, size_t size)
}
-static INLINE void
+static inline void
trace_dump_writes(const char *s)
{
trace_dump_write(s, strlen(s));
}
-static INLINE void
+static inline void
trace_dump_writef(const char *format, ...)
{
static char buf[1024];
@@ -93,7 +93,7 @@ trace_dump_writef(const char *format, ...)
}
-static INLINE void
+static inline void
trace_dump_escape(const char *str)
{
const unsigned char *p = (const unsigned char *)str;
@@ -117,7 +117,7 @@ trace_dump_escape(const char *str)
}
-static INLINE void
+static inline void
trace_dump_indent(unsigned level)
{
unsigned i;
@@ -126,14 +126,14 @@ trace_dump_indent(unsigned level)
}
-static INLINE void
+static inline void
trace_dump_newline(void)
{
trace_dump_writes("\n");
}
-static INLINE void
+static inline void
trace_dump_tag(const char *name)
{
trace_dump_writes("<");
@@ -142,7 +142,7 @@ trace_dump_tag(const char *name)
}
-static INLINE void
+static inline void
trace_dump_tag_begin(const char *name)
{
trace_dump_writes("<");
@@ -150,7 +150,7 @@ trace_dump_tag_begin(const char *name)
trace_dump_writes(">");
}
-static INLINE void
+static inline void
trace_dump_tag_begin1(const char *name,
const char *attr1, const char *value1)
{
@@ -164,7 +164,7 @@ trace_dump_tag_begin1(const char *name,
}
-static INLINE void
+static inline void
trace_dump_tag_begin2(const char *name,
const char *attr1, const char *value1,
const char *attr2, const char *value2)
@@ -183,7 +183,7 @@ trace_dump_tag_begin2(const char *name,
}
-static INLINE void
+static inline void
trace_dump_tag_begin3(const char *name,
const char *attr1, const char *value1,
const char *attr2, const char *value2,
@@ -207,7 +207,7 @@ trace_dump_tag_begin3(const char *name,
}
-static INLINE void
+static inline void
trace_dump_tag_end(const char *name)
{
trace_dump_writes("");
diff --git a/src/gallium/drivers/trace/tr_dump_defines.h b/src/gallium/drivers/trace/tr_dump_defines.h
index 0c83c2b68f1..b38d63eac59 100644
--- a/src/gallium/drivers/trace/tr_dump_defines.h
+++ b/src/gallium/drivers/trace/tr_dump_defines.h
@@ -34,7 +34,7 @@
#include "tr_dump.h"
-static INLINE void
+static inline void
trace_dump_format(enum pipe_format format)
{
if (!trace_dumping_enabled_locked())
@@ -44,7 +44,7 @@ trace_dump_format(enum pipe_format format)
}
-static INLINE void
+static inline void
trace_dump_query_type(unsigned value)
{
if (!trace_dumping_enabled_locked())
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index 266626defa8..1d86a378eea 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -369,29 +369,6 @@ trace_screen_fence_reference(struct pipe_screen *_screen,
}
-static boolean
-trace_screen_fence_signalled(struct pipe_screen *_screen,
- struct pipe_fence_handle *fence)
-{
- struct trace_screen *tr_scr = trace_screen(_screen);
- struct pipe_screen *screen = tr_scr->screen;
- int result;
-
- trace_dump_call_begin("pipe_screen", "fence_signalled");
-
- trace_dump_arg(ptr, screen);
- trace_dump_arg(ptr, fence);
-
- result = screen->fence_signalled(screen, fence);
-
- trace_dump_ret(bool, result);
-
- trace_dump_call_end();
-
- return result;
-}
-
-
static boolean
trace_screen_fence_finish(struct pipe_screen *_screen,
struct pipe_fence_handle *fence,
@@ -503,7 +480,6 @@ trace_screen_create(struct pipe_screen *screen)
tr_scr->base.resource_get_handle = trace_screen_resource_get_handle;
tr_scr->base.resource_destroy = trace_screen_resource_destroy;
tr_scr->base.fence_reference = trace_screen_fence_reference;
- tr_scr->base.fence_signalled = trace_screen_fence_signalled;
tr_scr->base.fence_finish = trace_screen_fence_finish;
tr_scr->base.flush_frontbuffer = trace_screen_flush_frontbuffer;
tr_scr->base.get_timestamp = trace_screen_get_timestamp;
diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h
index 5e45c3c2f8f..e48b7b39e24 100644
--- a/src/gallium/drivers/trace/tr_texture.h
+++ b/src/gallium/drivers/trace/tr_texture.h
@@ -85,7 +85,7 @@ struct trace_transfer
};
-static INLINE struct trace_resource *
+static inline struct trace_resource *
trace_resource(struct pipe_resource *texture)
{
if(!texture)
@@ -95,7 +95,7 @@ trace_resource(struct pipe_resource *texture)
}
-static INLINE struct trace_surface *
+static inline struct trace_surface *
trace_surface(struct pipe_surface *surface)
{
if(!surface)
@@ -105,7 +105,7 @@ trace_surface(struct pipe_surface *surface)
}
-static INLINE struct trace_sampler_view *
+static inline struct trace_sampler_view *
trace_sampler_view(struct pipe_sampler_view *sampler_view)
{
if (!sampler_view)
@@ -114,7 +114,7 @@ trace_sampler_view(struct pipe_sampler_view *sampler_view)
}
-static INLINE struct trace_transfer *
+static inline struct trace_transfer *
trace_transfer(struct pipe_transfer *transfer)
{
if(!transfer)
diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am
index 3f62ce21a9f..f4a57ba3404 100644
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
@@ -30,10 +28,10 @@ SIM_LDFLAGS = -lsimpenrose
endif
AM_CFLAGS = \
+ -I$(top_builddir)/src/glsl/nir \
$(LIBDRM_CFLAGS) \
$(GALLIUM_DRIVER_CFLAGS) \
$(SIM_CFLAGS) \
- -I$(top_srcdir)/src/mesa/ \
$()
noinst_LTLIBRARIES = libvc4.la
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 1eb029e67e7..6fb40c20562 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -19,6 +19,8 @@ C_SOURCES := \
vc4_fence.c \
vc4_formats.c \
vc4_job.c \
+ vc4_nir_lower_blend.c \
+ vc4_nir_lower_io.c \
vc4_opt_algebraic.c \
vc4_opt_constant_folding.c \
vc4_opt_copy_propagation.c \
@@ -49,4 +51,5 @@ C_SOURCES := \
vc4_state.c \
vc4_tiling.c \
vc4_tiling.h \
+ vc4_uniforms.c \
$()
diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h
index 1fd8aa9fb28..ffc973735ae 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@@ -26,17 +26,6 @@
#include "vc4_simulator_validate.h"
-enum vc4_bo_mode {
- VC4_MODE_UNDECIDED,
- VC4_MODE_RENDER,
- VC4_MODE_SHADER,
-};
-
-struct vc4_bo_exec_state {
- struct drm_gem_cma_object *bo;
- enum vc4_bo_mode mode;
-};
-
struct vc4_exec_info {
/* Sequence number for this bin/render job. */
uint64_t seqno;
@@ -47,7 +36,7 @@ struct vc4_exec_info {
/* This is the array of BOs that were looked up at the start of exec.
* Command validation will use indices into this array.
*/
- struct vc4_bo_exec_state *bo;
+ struct drm_gem_cma_object **bo;
uint32_t bo_count;
/* List of other BOs used in the job that need to be released
@@ -72,7 +61,6 @@ struct vc4_exec_info {
* command lists.
*/
struct vc4_shader_state {
- uint8_t packet;
uint32_t addr;
/* Maximum vertex index referenced by any primitive using this
* shader state.
@@ -88,6 +76,7 @@ struct vc4_exec_info {
bool found_tile_binning_mode_config_packet;
bool found_start_tile_binning_packet;
bool found_increment_semaphore_packet;
+ bool found_flush;
uint8_t bin_tiles_x, bin_tiles_y;
struct drm_gem_cma_object *tile_bo;
uint32_t tile_alloc_offset;
@@ -99,6 +88,9 @@ struct vc4_exec_info {
uint32_t ct0ca, ct0ea;
uint32_t ct1ca, ct1ea;
+ /* Pointer to the unvalidated bin CL (if present). */
+ void *bin_u;
+
/* Pointers to the shader recs. These paddr gets incremented as CL
* packets are relocated in validate_gl_shader_state, and the vaddrs
* (u and v) get incremented and size decremented as the shader recs
@@ -168,10 +160,8 @@ vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
-bool vc4_use_bo(struct vc4_exec_info *exec,
- uint32_t hindex,
- enum vc4_bo_mode mode,
- struct drm_gem_cma_object **obj);
+struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
+ uint32_t hindex);
int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
diff --git a/src/gallium/drivers/vc4/kernel/vc4_gem.c b/src/gallium/drivers/vc4/kernel/vc4_gem.c
index e4b7fea5968..93f9ec7ed9b 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_gem.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_gem.c
@@ -112,6 +112,8 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
exec->ct0ca = exec->exec_bo->paddr + bin_offset;
+ exec->bin_u = bin;
+
exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
exec->shader_rec_size = args->shader_rec_size;
diff --git a/src/gallium/drivers/vc4/kernel/vc4_packet.h b/src/gallium/drivers/vc4/kernel/vc4_packet.h
index 88cfc0fa9f0..771e2b78761 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_packet.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_packet.h
@@ -88,16 +88,22 @@ enum vc4_packet {
#define VC4_PACKET_START_TILE_BINNING_SIZE 1
#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1
#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1
+#define VC4_PACKET_BRANCH_SIZE 5
#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5
#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1
#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1
+#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5
+#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5
#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7
#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7
#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14
#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10
+#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1
+#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1
#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2
#define VC4_PACKET_GL_SHADER_STATE_SIZE 5
#define VC4_PACKET_NV_SHADER_STATE_SIZE 5
+#define VC4_PACKET_VG_SHADER_STATE_SIZE 5
#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4
#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5
#define VC4_PACKET_POINT_SIZE_SIZE 5
@@ -106,6 +112,7 @@ enum vc4_packet {
#define VC4_PACKET_DEPTH_OFFSET_SIZE 5
#define VC4_PACKET_CLIP_WINDOW_SIZE 9
#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5
+#define VC4_PACKET_Z_CLIPPING_SIZE 9
#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9
#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9
#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16
@@ -134,6 +141,16 @@ enum vc4_packet {
#define VC4_TILING_FORMAT_LT 2
/** @} */
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF (1 << 3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0)
+
/** @{
*
* byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
diff --git a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
index e2d907ad91f..b827eb7e9e1 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
@@ -100,7 +100,8 @@ static void emit_tile(struct vc4_exec_info *exec,
struct vc4_rcl_setup *setup,
uint8_t x, uint8_t y, bool first, bool last)
{
- bool has_bin = exec->args->bin_cl_size != 0;
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
/* Note that the load doesn't actually occur until the
* tile coords packet is processed, and only one load
@@ -108,10 +109,9 @@ static void emit_tile(struct vc4_exec_info *exec,
*/
if (setup->color_read) {
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->color_read.bits);
+ rcl_u16(setup, args->color_read.bits);
rcl_u32(setup,
- setup->color_read->paddr +
- exec->args->color_read.offset);
+ setup->color_read->paddr + args->color_read.offset);
}
if (setup->zs_read) {
@@ -122,9 +122,8 @@ static void emit_tile(struct vc4_exec_info *exec,
}
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->zs_read.bits);
- rcl_u32(setup,
- setup->zs_read->paddr + exec->args->zs_read.offset);
+ rcl_u16(setup, args->zs_read.bits);
+ rcl_u32(setup, setup->zs_read->paddr + args->zs_read.offset);
}
/* Clipping depends on tile coordinates having been
@@ -147,11 +146,11 @@ static void emit_tile(struct vc4_exec_info *exec,
if (setup->zs_write) {
rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
- rcl_u16(setup, exec->args->zs_write.bits |
+ rcl_u16(setup, args->zs_write.bits |
(setup->color_ms_write ?
VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0));
rcl_u32(setup,
- (setup->zs_write->paddr + exec->args->zs_write.offset) |
+ (setup->zs_write->paddr + args->zs_write.offset) |
((last && !setup->color_ms_write) ?
VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
}
@@ -172,11 +171,12 @@ static void emit_tile(struct vc4_exec_info *exec,
static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
struct vc4_rcl_setup *setup)
{
- bool has_bin = exec->args->bin_cl_size != 0;
- uint8_t min_x_tile = exec->args->min_x_tile;
- uint8_t min_y_tile = exec->args->min_y_tile;
- uint8_t max_x_tile = exec->args->max_x_tile;
- uint8_t max_y_tile = exec->args->max_y_tile;
+ struct drm_vc4_submit_cl *args = exec->args;
+ bool has_bin = args->bin_cl_size != 0;
+ uint8_t min_x_tile = args->min_x_tile;
+ uint8_t min_y_tile = args->min_y_tile;
+ uint8_t max_x_tile = args->max_x_tile;
+ uint8_t max_y_tile = args->max_y_tile;
uint8_t xtiles = max_x_tile - min_x_tile + 1;
uint8_t ytiles = max_y_tile - min_y_tile + 1;
uint8_t x, y;
@@ -185,7 +185,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
- if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
size += VC4_PACKET_CLEAR_COLORS_SIZE +
VC4_PACKET_TILE_COORDINATES_SIZE +
VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
@@ -208,7 +208,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
}
if (setup->zs_write)
- loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+ loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
if (setup->color_ms_write) {
if (setup->zs_write)
loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
@@ -226,23 +226,23 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
rcl_u32(setup,
(setup->color_ms_write ?
(setup->color_ms_write->paddr +
- exec->args->color_ms_write.offset) :
+ args->color_ms_write.offset) :
0));
- rcl_u16(setup, exec->args->width);
- rcl_u16(setup, exec->args->height);
- rcl_u16(setup, exec->args->color_ms_write.bits);
+ rcl_u16(setup, args->width);
+ rcl_u16(setup, args->height);
+ rcl_u16(setup, args->color_ms_write.bits);
/* The tile buffer gets cleared when the previous tile is stored. If
* the clear values changed between frames, then the tile buffer has
* stale clear values in it, so we have to do a store in None mode (no
* writes) so that we trigger the tile buffer clear.
*/
- if (exec->args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+ if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
- rcl_u32(setup, exec->args->clear_color[0]);
- rcl_u32(setup, exec->args->clear_color[1]);
- rcl_u32(setup, exec->args->clear_z);
- rcl_u8(setup, exec->args->clear_s);
+ rcl_u32(setup, args->clear_color[0]);
+ rcl_u32(setup, args->clear_color[1]);
+ rcl_u32(setup, args->clear_z);
+ rcl_u8(setup, args->clear_s);
vc4_tile_coordinates(setup, 0, 0);
@@ -286,7 +286,8 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
if (surf->hindex == ~0)
return 0;
- if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
+ *obj = vc4_use_bo(exec, surf->hindex);
+ if (!*obj)
return -EINVAL;
if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
@@ -365,7 +366,8 @@ vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec,
if (surf->hindex == ~0)
return 0;
- if (!vc4_use_bo(exec, surf->hindex, VC4_MODE_RENDER, obj))
+ *obj = vc4_use_bo(exec, surf->hindex);
+ if (!*obj)
return -EINVAL;
if (tiling > VC4_TILING_FORMAT_LT) {
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c
index a0b67a7e50b..b248831113c 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -94,42 +94,42 @@ size_is_lt(uint32_t width, uint32_t height, int cpp)
height <= 4 * utile_height(cpp));
}
-bool
-vc4_use_bo(struct vc4_exec_info *exec,
- uint32_t hindex,
- enum vc4_bo_mode mode,
- struct drm_gem_cma_object **obj)
+struct drm_gem_cma_object *
+vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
{
- *obj = NULL;
+ struct drm_gem_cma_object *obj;
+ struct drm_vc4_bo *bo;
if (hindex >= exec->bo_count) {
DRM_ERROR("BO index %d greater than BO count %d\n",
hindex, exec->bo_count);
- return false;
+ return NULL;
+ }
+ obj = exec->bo[hindex];
+ bo = to_vc4_bo(&obj->base);
+
+ if (bo->validated_shader) {
+ DRM_ERROR("Trying to use shader BO as something other than "
+ "a shader\n");
+ return NULL;
}
- if (exec->bo[hindex].mode != mode) {
- if (exec->bo[hindex].mode == VC4_MODE_UNDECIDED) {
- exec->bo[hindex].mode = mode;
- } else {
- DRM_ERROR("BO index %d reused with mode %d vs %d\n",
- hindex, exec->bo[hindex].mode, mode);
- return false;
- }
- }
+ return obj;
+}
- *obj = exec->bo[hindex].bo;
- return true;
+static struct drm_gem_cma_object *
+vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
+{
+ return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
}
static bool
-vc4_use_handle(struct vc4_exec_info *exec,
- uint32_t gem_handles_packet_index,
- enum vc4_bo_mode mode,
- struct drm_gem_cma_object **obj)
+validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
{
- return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index],
- mode, obj);
+ /* Note that the untrusted pointer passed to these functions is
+ * incremented past the packet byte.
+ */
+ return (untrusted - 1 == exec->bin_u + pos);
}
static uint32_t
@@ -201,14 +201,15 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
return true;
}
+
static int
-validate_flush_all(VALIDATE_ARGS)
+validate_flush(VALIDATE_ARGS)
{
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("VC4_PACKET_FLUSH_ALL after "
- "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
+ DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
return -EINVAL;
}
+ exec->found_flush = true;
return 0;
}
@@ -233,17 +234,13 @@ validate_start_tile_binning(VALIDATE_ARGS)
static int
validate_increment_semaphore(VALIDATE_ARGS)
{
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
+ DRM_ERROR("Bin CL must end with "
+ "VC4_PACKET_INCREMENT_SEMAPHORE\n");
return -EINVAL;
}
exec->found_increment_semaphore_packet = true;
- /* Once we've found the semaphore increment, there should be one FLUSH
- * then the end of the command list. The FLUSH actually triggers the
- * increment, so we only need to make sure there
- */
-
return 0;
}
@@ -257,11 +254,6 @@ validate_indexed_prim_list(VALIDATE_ARGS)
uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
struct vc4_shader_state *shader_state;
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
- return -EINVAL;
- }
-
/* Check overflow condition */
if (exec->shader_state_count == 0) {
DRM_ERROR("shader state must precede primitives\n");
@@ -272,7 +264,8 @@ validate_indexed_prim_list(VALIDATE_ARGS)
if (max_index > shader_state->max_index)
shader_state->max_index = max_index;
- if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, &ib))
+ ib = vc4_use_handle(exec, 0);
+ if (!ib)
return -EINVAL;
if (offset > ib->base.size ||
@@ -295,11 +288,6 @@ validate_gl_array_primitive(VALIDATE_ARGS)
uint32_t max_index;
struct vc4_shader_state *shader_state;
- if (exec->found_increment_semaphore_packet) {
- DRM_ERROR("Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n");
- return -EINVAL;
- }
-
/* Check overflow condition */
if (exec->shader_state_count == 0) {
DRM_ERROR("shader state must precede primitives\n");
@@ -329,7 +317,6 @@ validate_gl_shader_state(VALIDATE_ARGS)
return -EINVAL;
}
- exec->shader_state[i].packet = VC4_PACKET_GL_SHADER_STATE;
exec->shader_state[i].addr = *(uint32_t *)untrusted;
exec->shader_state[i].max_index = 0;
@@ -347,31 +334,6 @@ validate_gl_shader_state(VALIDATE_ARGS)
return 0;
}
-static int
-validate_nv_shader_state(VALIDATE_ARGS)
-{
- uint32_t i = exec->shader_state_count++;
-
- if (i >= exec->shader_state_size) {
- DRM_ERROR("More requests for shader states than declared\n");
- return -EINVAL;
- }
-
- exec->shader_state[i].packet = VC4_PACKET_NV_SHADER_STATE;
- exec->shader_state[i].addr = *(uint32_t *)untrusted;
-
- if (exec->shader_state[i].addr & 15) {
- DRM_ERROR("NV shader state address 0x%08x misaligned\n",
- exec->shader_state[i].addr);
- return -EINVAL;
- }
-
- *(uint32_t *)validated = (exec->shader_state[i].addr +
- exec->shader_rec_p);
-
- return 0;
-}
-
static int
validate_tile_binning_config(VALIDATE_ARGS)
{
@@ -473,8 +435,8 @@ static const struct cmd_info {
} cmd_info[] = {
VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL),
- VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL),
- VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", validate_flush),
+ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning),
VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore),
@@ -488,7 +450,7 @@ static const struct cmd_info {
VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state),
- VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state),
+ /* We don't support validating NV shader states. */
VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL),
VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL),
@@ -525,7 +487,7 @@ vc4_validate_bin_cl(struct drm_device *dev,
u8 cmd = *(uint8_t *)src_pkt;
const struct cmd_info *info;
- if (cmd > ARRAY_SIZE(cmd_info)) {
+ if (cmd >= ARRAY_SIZE(cmd_info)) {
DRM_ERROR("0x%08x: packet %d out of bounds\n",
src_offset, cmd);
return -EINVAL;
@@ -580,8 +542,16 @@ vc4_validate_bin_cl(struct drm_device *dev,
return -EINVAL;
}
- if (!exec->found_increment_semaphore_packet) {
- DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE\n");
+ /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
+ * semaphore is used to trigger the render CL to start up, and the
+ * FLUSH is what caps the bin lists with
+ * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
+ * render CL when they get called to) and actually triggers the queued
+ * semaphore increment.
+ */
+ if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
+ DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
+ "VC4_PACKET_FLUSH\n");
return -EINVAL;
}
@@ -612,18 +582,19 @@ reloc_tex(struct vc4_exec_info *exec,
uint32_t cube_map_stride = 0;
enum vc4_texture_data_type type;
- if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, &tex))
+ tex = vc4_use_bo(exec, texture_handle_index);
+ if (!tex)
return false;
if (sample->is_direct) {
uint32_t remaining_size = tex->base.size - p0;
if (p0 > tex->base.size - 4) {
DRM_ERROR("UBO offset greater than UBO size\n");
- return false;
+ goto fail;
}
if (p1 > remaining_size - 4) {
DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
- return false;
+ goto fail;
}
*validated_p0 = tex->paddr + p0;
return true;
@@ -642,14 +613,14 @@ reloc_tex(struct vc4_exec_info *exec,
VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
if (cube_map_stride) {
DRM_ERROR("Cube map stride set twice\n");
- return false;
+ goto fail;
}
cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
}
if (!cube_map_stride) {
DRM_ERROR("Cube map stride not set\n");
- return false;
+ goto fail;
}
}
@@ -683,7 +654,7 @@ reloc_tex(struct vc4_exec_info *exec,
case VC4_TEXTURE_TYPE_YUV422R:
default:
DRM_ERROR("Texture format %d unsupported\n", type);
- return false;
+ goto fail;
}
utile_w = utile_width(cpp);
utile_h = utile_height(cpp);
@@ -699,7 +670,7 @@ reloc_tex(struct vc4_exec_info *exec,
if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
tiling_format, width, height, cpp)) {
- return false;
+ goto fail;
}
/* The mipmap levels are stored before the base of the texture. Make
@@ -740,7 +711,7 @@ reloc_tex(struct vc4_exec_info *exec,
i, level_width, level_height,
aligned_width, aligned_height,
level_size, offset);
- return false;
+ goto fail;
}
offset -= level_size;
@@ -749,54 +720,37 @@ reloc_tex(struct vc4_exec_info *exec,
*validated_p0 = tex->paddr + p0;
return true;
+ fail:
+ DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
+ DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
+ DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
+ DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
+ return false;
}
static int
-validate_shader_rec(struct drm_device *dev,
- struct vc4_exec_info *exec,
- struct vc4_shader_state *state)
+validate_gl_shader_rec(struct drm_device *dev,
+ struct vc4_exec_info *exec,
+ struct vc4_shader_state *state)
{
uint32_t *src_handles;
void *pkt_u, *pkt_v;
- enum shader_rec_reloc_type {
- RELOC_CODE,
- RELOC_VBO,
+ static const uint32_t shader_reloc_offsets[] = {
+ 4, /* fs */
+ 16, /* vs */
+ 28, /* cs */
};
- struct shader_rec_reloc {
- enum shader_rec_reloc_type type;
- uint32_t offset;
- };
- static const struct shader_rec_reloc gl_relocs[] = {
- { RELOC_CODE, 4 }, /* fs */
- { RELOC_CODE, 16 }, /* vs */
- { RELOC_CODE, 28 }, /* cs */
- };
- static const struct shader_rec_reloc nv_relocs[] = {
- { RELOC_CODE, 4 }, /* fs */
- { RELOC_VBO, 12 }
- };
- const struct shader_rec_reloc *relocs;
- struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
- uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size;
+ uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
+ struct drm_gem_cma_object *bo[shader_reloc_count + 8];
+ uint32_t nr_attributes, nr_relocs, packet_size;
int i;
- struct vc4_validated_shader_info *validated_shader = NULL;
- if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
- relocs = nv_relocs;
- nr_fixed_relocs = ARRAY_SIZE(nv_relocs);
-
- packet_size = 16;
- } else {
- relocs = gl_relocs;
- nr_fixed_relocs = ARRAY_SIZE(gl_relocs);
-
- nr_attributes = state->addr & 0x7;
- if (nr_attributes == 0)
- nr_attributes = 8;
- packet_size = gl_shader_rec_size(state->addr);
- }
- nr_relocs = nr_fixed_relocs + nr_attributes;
+ nr_attributes = state->addr & 0x7;
+ if (nr_attributes == 0)
+ nr_attributes = 8;
+ packet_size = gl_shader_rec_size(state->addr);
+ nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
if (nr_relocs * 4 > exec->shader_rec_size) {
DRM_ERROR("overflowed shader recs reading %d handles "
"from %d bytes left\n",
@@ -826,21 +780,30 @@ validate_shader_rec(struct drm_device *dev,
exec->shader_rec_v += roundup(packet_size, 16);
exec->shader_rec_size -= packet_size;
- for (i = 0; i < nr_relocs; i++) {
- enum vc4_bo_mode mode;
-
- if (i < nr_fixed_relocs && relocs[i].type == RELOC_CODE)
- mode = VC4_MODE_SHADER;
- else
- mode = VC4_MODE_RENDER;
-
- if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) {
- return false;
- }
+ if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
+ DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
+ return -EINVAL;
}
- for (i = 0; i < nr_fixed_relocs; i++) {
- uint32_t o = relocs[i].offset;
+ for (i = 0; i < shader_reloc_count; i++) {
+ if (src_handles[i] > exec->bo_count) {
+ DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
+ return -EINVAL;
+ }
+
+ bo[i] = exec->bo[src_handles[i]];
+ if (!bo[i])
+ return -EINVAL;
+ }
+ for (i = shader_reloc_count; i < nr_relocs; i++) {
+ bo[i] = vc4_use_bo(exec, src_handles[i]);
+ if (!bo[i])
+ return -EINVAL;
+ }
+
+ for (i = 0; i < shader_reloc_count; i++) {
+ struct vc4_validated_shader_info *validated_shader;
+ uint32_t o = shader_reloc_offsets[i];
uint32_t src_offset = *(uint32_t *)(pkt_u + o);
uint32_t *texture_handles_u;
void *uniform_data_u;
@@ -848,58 +811,50 @@ validate_shader_rec(struct drm_device *dev,
*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
- switch (relocs[i].type) {
- case RELOC_CODE:
- if (src_offset != 0) {
- DRM_ERROR("Shaders must be at offset 0 of "
- "the BO.\n");
- goto fail;
- }
-
- kfree(validated_shader);
- validated_shader = vc4_validate_shader(bo[i]);
- if (!validated_shader)
- goto fail;
-
- if (validated_shader->uniforms_src_size >
- exec->uniforms_size) {
- DRM_ERROR("Uniforms src buffer overflow\n");
- goto fail;
- }
-
- texture_handles_u = exec->uniforms_u;
- uniform_data_u = (texture_handles_u +
- validated_shader->num_texture_samples);
-
- memcpy(exec->uniforms_v, uniform_data_u,
- validated_shader->uniforms_size);
-
- for (tex = 0;
- tex < validated_shader->num_texture_samples;
- tex++) {
- if (!reloc_tex(exec,
- uniform_data_u,
- &validated_shader->texture_samples[tex],
- texture_handles_u[tex])) {
- goto fail;
- }
- }
-
- *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
-
- exec->uniforms_u += validated_shader->uniforms_src_size;
- exec->uniforms_v += validated_shader->uniforms_size;
- exec->uniforms_p += validated_shader->uniforms_size;
-
- break;
-
- case RELOC_VBO:
- break;
+ if (src_offset != 0) {
+ DRM_ERROR("Shaders must be at offset 0 of "
+ "the BO.\n");
+ return -EINVAL;
}
+
+ validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
+ if (!validated_shader)
+ return -EINVAL;
+
+ if (validated_shader->uniforms_src_size >
+ exec->uniforms_size) {
+ DRM_ERROR("Uniforms src buffer overflow\n");
+ return -EINVAL;
+ }
+
+ texture_handles_u = exec->uniforms_u;
+ uniform_data_u = (texture_handles_u +
+ validated_shader->num_texture_samples);
+
+ memcpy(exec->uniforms_v, uniform_data_u,
+ validated_shader->uniforms_size);
+
+ for (tex = 0;
+ tex < validated_shader->num_texture_samples;
+ tex++) {
+ if (!reloc_tex(exec,
+ uniform_data_u,
+ &validated_shader->texture_samples[tex],
+ texture_handles_u[tex])) {
+ return -EINVAL;
+ }
+ }
+
+ *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
+
+ exec->uniforms_u += validated_shader->uniforms_src_size;
+ exec->uniforms_v += validated_shader->uniforms_size;
+ exec->uniforms_p += validated_shader->uniforms_size;
}
for (i = 0; i < nr_attributes; i++) {
- struct drm_gem_cma_object *vbo = bo[nr_fixed_relocs + i];
+ struct drm_gem_cma_object *vbo =
+ bo[ARRAY_SIZE(shader_reloc_offsets) + i];
uint32_t o = 36 + i * 8;
uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
@@ -929,13 +884,7 @@ validate_shader_rec(struct drm_device *dev,
*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
}
- kfree(validated_shader);
-
return 0;
-
-fail:
- kfree(validated_shader);
- return -EINVAL;
}
int
@@ -946,7 +895,7 @@ vc4_validate_shader_recs(struct drm_device *dev,
int ret = 0;
for (i = 0; i < exec->shader_state_count; i++) {
- ret = validate_shader_rec(dev, exec, &exec->shader_state[i]);
+ ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
if (ret)
return ret;
}
diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c
index d29e2c9c318..e52a1941730 100644
--- a/src/gallium/drivers/vc4/vc4_blit.c
+++ b/src/gallium/drivers/vc4/vc4_blit.c
@@ -94,7 +94,7 @@ vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
struct vc4_context *vc4 = vc4_context(ctx);
if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
- fprintf(stderr, "blit unsupported %s -> %s",
+ fprintf(stderr, "blit unsupported %s -> %s\n",
util_format_short_name(info->src.resource->format),
util_format_short_name(info->dst.resource->format));
return false;
@@ -135,7 +135,7 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
info.dst.resource->nr_samples <= 1 &&
!util_format_is_depth_or_stencil(info.src.resource->format) &&
!util_format_is_pure_integer(info.src.resource->format)) {
- fprintf(stderr, "color resolve unimplemented");
+ fprintf(stderr, "color resolve unimplemented\n");
return;
}
@@ -147,7 +147,7 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
}
if (info.mask & PIPE_MASK_S) {
- fprintf(stderr, "cannot blit stencil, skipping");
+ fprintf(stderr, "cannot blit stencil, skipping\n");
info.mask &= ~PIPE_MASK_S;
}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index cbdb9e89cf6..f7b41f5816d 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2014 Broadcom
+ * Copyright © 2014-2015 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -94,7 +94,7 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
* allocate something new instead, since we assume that the
* user will proceed to CPU map it and fill it with stuff.
*/
- if (!vc4_bo_wait(bo, 0)) {
+ if (!vc4_bo_wait(bo, 0, NULL)) {
pipe_mutex_unlock(cache->lock);
return NULL;
}
@@ -381,15 +381,57 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo)
}
struct vc4_bo *
-vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data, uint32_t size,
- const char *name)
+vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data, uint32_t size)
{
- void *map;
struct vc4_bo *bo;
+ int ret;
+
+ bo = CALLOC_STRUCT(vc4_bo);
+ if (!bo)
+ return NULL;
+
+ pipe_reference_init(&bo->reference, 1);
+ bo->screen = screen;
+ bo->size = align(size, 4096);
+ bo->name = "code";
+ bo->private = false; /* Make sure it doesn't go back to the cache. */
+
+ if (!using_vc4_simulator) {
+ struct drm_vc4_create_shader_bo create = {
+ .size = size,
+ .data = (uintptr_t)data,
+ };
+
+ ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_CREATE_SHADER_BO,
+ &create);
+ bo->handle = create.handle;
+ } else {
+ struct drm_mode_create_dumb create;
+ memset(&create, 0, sizeof(create));
+
+ create.width = 128;
+ create.bpp = 8;
+ create.height = (size + 127) / 128;
+
+ ret = drmIoctl(screen->fd, DRM_IOCTL_MODE_CREATE_DUMB, &create);
+ bo->handle = create.handle;
+ assert(create.size >= size);
+
+ vc4_bo_map(bo);
+ memcpy(bo->map, data, size);
+ }
+ if (ret != 0) {
+ fprintf(stderr, "create shader ioctl failure\n");
+ abort();
+ }
+
+ screen->bo_count++;
+ screen->bo_size += bo->size;
+ if (dump_stats) {
+ fprintf(stderr, "Allocated shader %dkb:\n", size / 1024);
+ vc4_bo_dump_stats(screen);
+ }
- bo = vc4_bo_alloc(screen, size, name);
- map = vc4_bo_map(bo);
- memcpy(map, data, size);
return bo;
}
@@ -413,63 +455,91 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
return true;
}
+static int vc4_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns)
+{
+ if (using_vc4_simulator)
+ return 0;
+
+ struct drm_vc4_wait_seqno wait = {
+ .seqno = seqno,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
+
+}
+
bool
-vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns)
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason)
{
if (screen->finished_seqno >= seqno)
return true;
- struct drm_vc4_wait_seqno wait;
- memset(&wait, 0, sizeof(wait));
- wait.seqno = seqno;
- wait.timeout_ns = timeout_ns;
-
- int ret;
- if (!using_vc4_simulator)
- ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
- else {
- wait.seqno = screen->finished_seqno;
- ret = 0;
+ if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc4_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on seqno %lld for %s\n",
+ (long long)seqno, reason);
+ }
}
- if (ret == 0) {
- screen->finished_seqno = wait.seqno;
- return true;
+ int ret = vc4_wait_seqno_ioctl(screen->fd, seqno, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
+
+ return false;
}
- if (errno != ETIME) {
- fprintf(stderr, "wait failed: %d\n", ret);
- abort();
- }
+ screen->finished_seqno = seqno;
+ return true;
+}
+
+static int vc4_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
+{
+ if (using_vc4_simulator)
+ return 0;
+
+ struct drm_vc4_wait_bo wait = {
+ .handle = handle,
+ .timeout_ns = timeout_ns,
+ };
+ int ret = drmIoctl(fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
+ if (ret == -1)
+ return -errno;
+ else
+ return 0;
- return false;
}
bool
-vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns)
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns, const char *reason)
{
struct vc4_screen *screen = bo->screen;
- struct drm_vc4_wait_bo wait;
- memset(&wait, 0, sizeof(wait));
- wait.handle = bo->handle;
- wait.timeout_ns = timeout_ns;
-
- int ret;
- if (!using_vc4_simulator)
- ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
- else
- ret = 0;
-
- if (ret == 0)
- return true;
-
- if (errno != ETIME) {
- fprintf(stderr, "wait failed: %d\n", ret);
- abort();
+ if (unlikely(vc4_debug & VC4_DEBUG_PERF) && timeout_ns && reason) {
+ if (vc4_wait_bo_ioctl(screen->fd, bo->handle, 0) == -ETIME) {
+ fprintf(stderr, "Blocking on %s BO for %s\n",
+ bo->name, reason);
+ }
}
- return false;
+ int ret = vc4_wait_bo_ioctl(screen->fd, bo->handle, timeout_ns);
+ if (ret) {
+ if (ret != -ETIME) {
+ fprintf(stderr, "wait failed: %d\n", ret);
+ abort();
+ }
+
+ return false;
+ }
+
+ return true;
}
void *
@@ -515,7 +585,7 @@ vc4_bo_map(struct vc4_bo *bo)
{
void *map = vc4_bo_map_unsynchronized(bo);
- bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE);
+ bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE, "bo map");
if (!ok) {
fprintf(stderr, "BO wait for map failed\n");
abort();
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h
index 7320695ca8e..b77506e242a 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -58,8 +58,8 @@ struct vc4_bo {
struct vc4_bo *vc4_bo_alloc(struct vc4_screen *screen, uint32_t size,
const char *name);
-struct vc4_bo *vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data,
- uint32_t size, const char *name);
+struct vc4_bo *vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data,
+ uint32_t size);
void vc4_bo_last_unreference(struct vc4_bo *bo);
void vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time);
struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
@@ -113,10 +113,11 @@ void *
vc4_bo_map_unsynchronized(struct vc4_bo *bo);
bool
-vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns);
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns, const char *reason);
bool
-vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns);
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns,
+ const char *reason);
void
vc4_bufmgr_destroy(struct pipe_screen *pscreen);
diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c
index 0700e885cbf..ced4f2dfa86 100644
--- a/src/gallium/drivers/vc4/vc4_cl.c
+++ b/src/gallium/drivers/vc4/vc4_cl.c
@@ -36,11 +36,12 @@ vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl)
void
cl_ensure_space(struct vc4_cl *cl, uint32_t space)
{
- if ((cl->next - cl->base) + space <= cl->size)
+ uint32_t offset = cl_offset(cl);
+
+ if (offset + space <= cl->size)
return;
uint32_t size = MAX2(cl->size + space, cl->size * 2);
- uint32_t offset = cl->next -cl->base;
cl->base = reralloc(ralloc_parent(cl->base), cl->base, uint8_t, size);
cl->size = size;
@@ -60,15 +61,20 @@ vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo)
uint32_t hindex;
uint32_t *current_handles = vc4->bo_handles.base;
- for (hindex = 0;
- hindex < (vc4->bo_handles.next - vc4->bo_handles.base) / 4;
- hindex++) {
+ for (hindex = 0; hindex < cl_offset(&vc4->bo_handles) / 4; hindex++) {
if (current_handles[hindex] == bo->handle)
return hindex;
}
- cl_u32(&vc4->bo_handles, bo->handle);
- cl_ptr(&vc4->bo_pointers, vc4_bo_reference(bo));
+ struct vc4_cl_out *out;
+
+ out = cl_start(&vc4->bo_handles);
+ cl_u32(&out, bo->handle);
+ cl_end(&vc4->bo_handles, out);
+
+ out = cl_start(&vc4->bo_pointers);
+ cl_ptr(&out, vc4_bo_reference(bo));
+ cl_end(&vc4->bo_pointers, out);
return hindex;
}
diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index 4a50e790942..bf4be0efc29 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -33,12 +33,20 @@
struct vc4_bo;
+/**
+ * Undefined structure, used for typechecking that you're passing the pointers
+ * to these functions correctly.
+ */
+struct vc4_cl_out;
+
struct vc4_cl {
void *base;
- void *next;
+ struct vc4_cl_out *next;
+ struct vc4_cl_out *reloc_next;
uint32_t size;
- uint32_t reloc_next;
+#ifdef DEBUG
uint32_t reloc_count;
+#endif
};
void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl);
@@ -49,135 +57,149 @@ uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo);
struct PACKED unaligned_16 { uint16_t x; };
struct PACKED unaligned_32 { uint32_t x; };
-static inline void
-put_unaligned_32(void *ptr, uint32_t val)
+static inline uint32_t cl_offset(struct vc4_cl *cl)
{
- struct unaligned_32 *p = ptr;
+ return (char *)cl->next - (char *)cl->base;
+}
+
+static inline void
+cl_advance(struct vc4_cl_out **cl, uint32_t n)
+{
+ (*cl) = (struct vc4_cl_out *)((char *)(*cl) + n);
+}
+
+static inline struct vc4_cl_out *
+cl_start(struct vc4_cl *cl)
+{
+ return cl->next;
+}
+
+static inline void
+cl_end(struct vc4_cl *cl, struct vc4_cl_out *next)
+{
+ cl->next = next;
+ assert(cl_offset(cl) <= cl->size);
+}
+
+
+static inline void
+put_unaligned_32(struct vc4_cl_out *ptr, uint32_t val)
+{
+ struct unaligned_32 *p = (void *)ptr;
p->x = val;
}
static inline void
-put_unaligned_16(void *ptr, uint16_t val)
+put_unaligned_16(struct vc4_cl_out *ptr, uint16_t val)
{
- struct unaligned_16 *p = ptr;
+ struct unaligned_16 *p = (void *)ptr;
p->x = val;
}
static inline void
-cl_u8(struct vc4_cl *cl, uint8_t n)
+cl_u8(struct vc4_cl_out **cl, uint8_t n)
{
- assert((cl->next - cl->base) + 1 <= cl->size);
-
- *(uint8_t *)cl->next = n;
- cl->next++;
+ *(uint8_t *)(*cl) = n;
+ cl_advance(cl, 1);
}
static inline void
-cl_u16(struct vc4_cl *cl, uint16_t n)
+cl_u16(struct vc4_cl_out **cl, uint16_t n)
{
- assert((cl->next - cl->base) + 2 <= cl->size);
-
- put_unaligned_16(cl->next, n);
- cl->next += 2;
+ put_unaligned_16(*cl, n);
+ cl_advance(cl, 2);
}
static inline void
-cl_u32(struct vc4_cl *cl, uint32_t n)
+cl_u32(struct vc4_cl_out **cl, uint32_t n)
{
- assert((cl->next - cl->base) + 4 <= cl->size);
-
- put_unaligned_32(cl->next, n);
- cl->next += 4;
+ put_unaligned_32(*cl, n);
+ cl_advance(cl, 4);
}
static inline void
-cl_aligned_u32(struct vc4_cl *cl, uint32_t n)
+cl_aligned_u32(struct vc4_cl_out **cl, uint32_t n)
{
- assert((cl->next - cl->base) + 4 <= cl->size);
-
- *(uint32_t *)cl->next = n;
- cl->next += 4;
+ *(uint32_t *)(*cl) = n;
+ cl_advance(cl, 4);
}
static inline void
-cl_ptr(struct vc4_cl *cl, void *ptr)
+cl_ptr(struct vc4_cl_out **cl, void *ptr)
{
- assert((cl->next - cl->base) + sizeof(void *) <= cl->size);
-
- *(void **)cl->next = ptr;
- cl->next += sizeof(void *);
+ *(struct vc4_cl_out **)(*cl) = ptr;
+ cl_advance(cl, sizeof(void *));
}
static inline void
-cl_f(struct vc4_cl *cl, float f)
+cl_f(struct vc4_cl_out **cl, float f)
{
cl_u32(cl, fui(f));
}
static inline void
-cl_aligned_f(struct vc4_cl *cl, float f)
+cl_aligned_f(struct vc4_cl_out **cl, float f)
{
cl_aligned_u32(cl, fui(f));
}
static inline void
-cl_start_reloc(struct vc4_cl *cl, uint32_t n)
+cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
{
assert(n == 1 || n == 2);
+#ifdef DEBUG
assert(cl->reloc_count == 0);
cl->reloc_count = n;
+#endif
- cl_u8(cl, VC4_PACKET_GEM_HANDLES);
- cl->reloc_next = cl->next - cl->base;
- cl_u32(cl, 0); /* Space where hindex will be written. */
- cl_u32(cl, 0); /* Space where hindex will be written. */
+ cl_u8(out, VC4_PACKET_GEM_HANDLES);
+ cl->reloc_next = *out;
+ cl_u32(out, 0); /* Space where hindex will be written. */
+ cl_u32(out, 0); /* Space where hindex will be written. */
}
-static inline void
+static inline struct vc4_cl_out *
cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
{
+#ifdef DEBUG
assert(cl->reloc_count == 0);
cl->reloc_count = n;
- cl->reloc_next = cl->next - cl->base;
+#endif
+ cl->reloc_next = cl->next;
- /* Space where hindex will be written. */
- cl->next += n * 4;
+ /* Reserve the space where hindex will be written. */
+ cl_advance(&cl->next, n * 4);
+
+ return cl->next;
}
static inline void
-cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
-{
- *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
- cl->reloc_next += 4;
-
- cl->reloc_count--;
-
- cl_u32(cl, offset);
-}
-
-static inline void
-cl_aligned_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
-{
- *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
- cl->reloc_next += 4;
-
- cl->reloc_count--;
-
- cl_aligned_u32(cl, offset);
-}
-
-static inline void
-cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
+cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
struct vc4_bo *bo, uint32_t offset)
{
- cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
+ *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+ cl_advance(&cl->reloc_next, 4);
+
+#ifdef DEBUG
+ cl->reloc_count--;
+#endif
+
+ cl_u32(cl_out, offset);
}
static inline void
cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
- struct vc4_bo *bo, uint32_t offset)
+ struct vc4_cl_out **cl_out,
+ struct vc4_bo *bo, uint32_t offset)
{
- cl_aligned_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
+ *(uint32_t *)cl->reloc_next = vc4_gem_hindex(vc4, bo);
+ cl_advance(&cl->reloc_next, 4);
+
+#ifdef DEBUG
+ cl->reloc_count--;
+#endif
+
+ cl_aligned_u32(cl_out, offset);
}
void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 69055081daa..6d748010baf 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -34,7 +34,7 @@ dump_float(void *cl, uint32_t offset, uint32_t hw_offset)
void *f = cl + offset;
fprintf(stderr, "0x%08x 0x%08x: %f (0x%08x)\n",
- offset, hw_offset, *(float *)f, *(uint32_t *)f);
+ offset, hw_offset, uif(*(uint32_t *)f), *(uint32_t *)f);
}
static void
@@ -47,7 +47,33 @@ dump_VC4_PACKET_BRANCH_TO_SUB_LIST(void *cl, uint32_t offset, uint32_t hw_offset
}
static void
-dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+dump_loadstore_full(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ uint32_t bits = *(uint32_t *)(cl + offset);
+
+ fprintf(stderr, "0x%08x 0x%08x: addr 0x%08x%s%s%s%s\n",
+ offset, hw_offset,
+ bits & ~0xf,
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL) ? "" : " clear",
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_ZS) ? "" : " zs",
+ (bits & VC4_LOADSTORE_FULL_RES_DISABLE_COLOR) ? "" : " color",
+ (bits & VC4_LOADSTORE_FULL_RES_EOF) ? " eof" : "");
+}
+
+static void
+dump_VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_full(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_STORE_FULL_RES_TILE_BUFFER(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_full(cl, offset, hw_offset);
+}
+
+static void
+dump_loadstore_general(void *cl, uint32_t offset, uint32_t hw_offset)
{
uint8_t *bytes = cl + offset;
uint32_t *addr = cl + offset + 2;
@@ -124,6 +150,18 @@ dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw
(*addr & (1 << 3)) ? " EOF" : "");
}
+static void
+dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_general(cl, offset, hw_offset);
+}
+
+static void
+dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+ dump_loadstore_general(cl, offset, hw_offset);
+}
+
static void
dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
{
@@ -291,63 +329,63 @@ dump_VC4_PACKET_GEM_HANDLES(void *cl, uint32_t offset, uint32_t hw_offset)
offset, hw_offset, handles[0], handles[1]);
}
-#define PACKET_DUMP(name, size) [name] = { #name, size, dump_##name }
-#define PACKET(name, size) [name] = { #name, size, NULL }
+#define PACKET_DUMP(name) [name] = { #name, name ## _SIZE, dump_##name }
+#define PACKET(name) [name] = { #name, name ## _SIZE, NULL }
static const struct packet_info {
const char *name;
uint8_t size;
void (*dump_func)(void *cl, uint32_t offset, uint32_t hw_offset);
} packet_info[] = {
- PACKET(VC4_PACKET_HALT, 1),
- PACKET(VC4_PACKET_NOP, 1),
+ PACKET(VC4_PACKET_HALT),
+ PACKET(VC4_PACKET_NOP),
- PACKET(VC4_PACKET_FLUSH, 1),
- PACKET(VC4_PACKET_FLUSH_ALL, 1),
- PACKET(VC4_PACKET_START_TILE_BINNING, 1),
- PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1),
- PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 1),
+ PACKET(VC4_PACKET_FLUSH),
+ PACKET(VC4_PACKET_FLUSH_ALL),
+ PACKET(VC4_PACKET_START_TILE_BINNING),
+ PACKET(VC4_PACKET_INCREMENT_SEMAPHORE),
+ PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE),
- PACKET(VC4_PACKET_BRANCH, 5),
- PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST, 5),
+ PACKET(VC4_PACKET_BRANCH),
+ PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST),
- PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 1),
- PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 1),
- PACKET(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER, 5),
- PACKET(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER, 5),
- PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 7),
- PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 7),
+ PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER),
+ PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF),
+ PACKET_DUMP(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER),
+ PACKET_DUMP(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER),
+ PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL),
+ PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
- PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 14),
- PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 10),
+ PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE),
+ PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
- PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE, 48),
- PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE, 49),
+ PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE),
+ PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE),
- PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 2),
+ PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT),
- PACKET(VC4_PACKET_GL_SHADER_STATE, 5),
- PACKET(VC4_PACKET_NV_SHADER_STATE, 5),
- PACKET(VC4_PACKET_VG_SHADER_STATE, 5),
+ PACKET(VC4_PACKET_GL_SHADER_STATE),
+ PACKET(VC4_PACKET_NV_SHADER_STATE),
+ PACKET(VC4_PACKET_VG_SHADER_STATE),
- PACKET(VC4_PACKET_CONFIGURATION_BITS, 4),
- PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS, 5),
- PACKET_DUMP(VC4_PACKET_POINT_SIZE, 5),
- PACKET_DUMP(VC4_PACKET_LINE_WIDTH, 5),
- PACKET(VC4_PACKET_RHT_X_BOUNDARY, 3),
- PACKET(VC4_PACKET_DEPTH_OFFSET, 5),
- PACKET(VC4_PACKET_CLIP_WINDOW, 9),
- PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET, 5),
- PACKET(VC4_PACKET_Z_CLIPPING, 9),
- PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING, 9),
- PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING, 9),
+ PACKET(VC4_PACKET_CONFIGURATION_BITS),
+ PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS),
+ PACKET_DUMP(VC4_PACKET_POINT_SIZE),
+ PACKET_DUMP(VC4_PACKET_LINE_WIDTH),
+ PACKET(VC4_PACKET_RHT_X_BOUNDARY),
+ PACKET(VC4_PACKET_DEPTH_OFFSET),
+ PACKET(VC4_PACKET_CLIP_WINDOW),
+ PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET),
+ PACKET(VC4_PACKET_Z_CLIPPING),
+ PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING),
+ PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING),
- PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 16),
- PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 11),
- PACKET(VC4_PACKET_CLEAR_COLORS, 14),
- PACKET_DUMP(VC4_PACKET_TILE_COORDINATES, 3),
+ PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG),
+ PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG),
+ PACKET(VC4_PACKET_CLEAR_COLORS),
+ PACKET_DUMP(VC4_PACKET_TILE_COORDINATES),
- PACKET_DUMP(VC4_PACKET_GEM_HANDLES, 9),
+ PACKET_DUMP(VC4_PACKET_GEM_HANDLES),
};
void
@@ -359,7 +397,7 @@ vc4_dump_cl(void *cl, uint32_t size, bool is_render)
while (offset < size) {
uint8_t header = cmds[offset];
- if (header > ARRAY_SIZE(packet_info) ||
+ if (header >= ARRAY_SIZE(packet_info) ||
!packet_info[header].name) {
fprintf(stderr, "0x%08x 0x%08x: Unknown packet 0x%02x (%d)!\n",
offset, hw_offset, header, header);
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index 630f8e68896..fff63158c9d 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -61,9 +61,11 @@ vc4_flush(struct pipe_context *pctx)
* FLUSH completes.
*/
cl_ensure_space(&vc4->bcl, 8);
- cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
/* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
- cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
+ cl_u8(&bcl, VC4_PACKET_FLUSH);
+ cl_end(&vc4->bcl, bcl);
if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
pipe_surface_reference(&vc4->color_write, cbuf);
@@ -103,8 +105,10 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
vc4_flush(pctx);
if (fence) {
+ struct pipe_screen *screen = pctx->screen;
struct vc4_fence *f = vc4_fence_create(vc4->screen,
vc4->last_emit_seqno);
+ screen->fence_reference(screen, fence, NULL);
*fence = (struct pipe_fence_handle *)f;
}
}
@@ -126,8 +130,7 @@ vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo)
* they match.
*/
struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
- for (int i = 0; i < (vc4->bo_handles.next -
- vc4->bo_handles.base) / 4; i++) {
+ for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
if (referenced_bos[i] == bo) {
return true;
}
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index d5d6be16f6e..654c46f3c0d 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -67,7 +67,20 @@
#define VC4_DIRTY_CLIP (1 << 20)
#define VC4_DIRTY_UNCOMPILED_VS (1 << 21)
#define VC4_DIRTY_UNCOMPILED_FS (1 << 22)
-#define VC4_DIRTY_COMPILED_FS (1 << 24)
+#define VC4_DIRTY_COMPILED_CS (1 << 23)
+#define VC4_DIRTY_COMPILED_VS (1 << 24)
+#define VC4_DIRTY_COMPILED_FS (1 << 25)
+
+struct vc4_sampler_view {
+ struct pipe_sampler_view base;
+ uint32_t texture_p0;
+ uint32_t texture_p1;
+};
+
+struct vc4_sampler_state {
+ struct pipe_sampler_state base;
+ uint32_t texture_p1;
+};
struct vc4_texture_stateobj {
struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
@@ -121,6 +134,12 @@ struct vc4_compiled_shader {
struct vc4_ubo_range *ubo_ranges;
uint32_t num_ubo_ranges;
uint32_t ubo_size;
+ /**
+ * VC4_DIRTY_* flags that, when set in vc4->dirty, mean that the
+ * uniforms have to be rewritten (and therefore the shader state
+ * reemitted).
+ */
+ uint32_t uniform_dirty_bits;
/** bitmask of which inputs are color inputs, for flat shade handling. */
uint32_t color_inputs;
@@ -238,6 +257,11 @@ struct vc4_context {
*/
bool draw_call_queued;
+ /** Maximum index buffer valid for the current shader_rec. */
+ uint32_t max_index;
+ /** Last index bias baked into the current shader_rec. */
+ uint32_t last_index_bias;
+
struct primconvert_context *primconvert;
struct hash_table *fs_cache, *vs_cache;
@@ -246,6 +270,7 @@ struct vc4_context {
struct ra_regs *regs;
unsigned int reg_class_any;
+ unsigned int reg_class_r4_or_a;
unsigned int reg_class_a;
uint8_t prim_mode;
@@ -326,6 +351,18 @@ vc4_context(struct pipe_context *pcontext)
return (struct vc4_context *)pcontext;
}
+static inline struct vc4_sampler_view *
+vc4_sampler_view(struct pipe_sampler_view *psview)
+{
+ return (struct vc4_sampler_view *)psview;
+}
+
+static inline struct vc4_sampler_state *
+vc4_sampler_state(struct pipe_sampler_state *psampler)
+{
+ return (struct vc4_sampler_state *)psampler;
+}
+
struct pipe_context *vc4_context_create(struct pipe_screen *pscreen,
void *priv);
void vc4_draw_init(struct pipe_context *pctx);
@@ -337,6 +374,7 @@ void vc4_simulator_init(struct vc4_screen *screen);
int vc4_simulator_flush(struct vc4_context *vc4,
struct drm_vc4_submit_cl *args);
+void vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader);
void vc4_write_uniforms(struct vc4_context *vc4,
struct vc4_compiled_shader *shader,
struct vc4_constbuf_stateobj *cb,
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index 5e6d70d6f33..a4e5e092b1a 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -71,37 +71,40 @@ vc4_start_draw(struct vc4_context *vc4)
uint32_t height = vc4->framebuffer.height;
uint32_t tilew = align(width, 64) / 64;
uint32_t tileh = align(height, 64) / 64;
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
// Tile state data is 48 bytes per tile, I think it can be thrown away
// as soon as binning is finished.
- cl_u8(&vc4->bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
- cl_u32(&vc4->bcl, 0); /* tile alloc addr, filled by kernel */
- cl_u32(&vc4->bcl, 0); /* tile alloc size, filled by kernel */
- cl_u32(&vc4->bcl, 0); /* tile state addr, filled by kernel */
- cl_u8(&vc4->bcl, tilew);
- cl_u8(&vc4->bcl, tileh);
- cl_u8(&vc4->bcl, 0); /* flags, filled by kernel. */
+ cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
+ cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
+ cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
+ cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
+ cl_u8(&bcl, tilew);
+ cl_u8(&bcl, tileh);
+ cl_u8(&bcl, 0); /* flags, filled by kernel. */
/* START_TILE_BINNING resets the statechange counters in the hardware,
* which are what is used when a primitive is binned to a tile to
* figure out what new state packets need to be written to that tile's
* command list.
*/
- cl_u8(&vc4->bcl, VC4_PACKET_START_TILE_BINNING);
+ cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
/* Reset the current compressed primitives format. This gets modified
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
* of every tile.
*/
- cl_u8(&vc4->bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
- cl_u8(&vc4->bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
- VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
+ cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
+ cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
+ VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
vc4->needs_flush = true;
vc4->draw_call_queued = true;
vc4->draw_width = width;
vc4->draw_height = height;
+
+ cl_end(&vc4->bcl, bcl);
}
static void
@@ -118,6 +121,111 @@ vc4_update_shadow_textures(struct pipe_context *pctx,
}
}
+static void
+vc4_emit_gl_shader_state(struct vc4_context *vc4, const struct pipe_draw_info *info)
+{
+ /* VC4_DIRTY_VTXSTATE */
+ struct vc4_vertex_stateobj *vtx = vc4->vtx;
+ /* VC4_DIRTY_VTXBUF */
+ struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
+
+ /* The simulator throws a fit if VS or CS don't read an attribute, so
+ * we emit a dummy read.
+ */
+ uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
+ /* Emit the shader record. */
+ struct vc4_cl_out *shader_rec =
+ cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
+ /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
+ cl_u16(&shader_rec,
+ VC4_SHADER_FLAG_ENABLE_CLIPPING |
+ VC4_SHADER_FLAG_FS_SINGLE_THREAD |
+ ((info->mode == PIPE_PRIM_POINTS &&
+ vc4->rasterizer->base.point_size_per_vertex) ?
+ VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
+
+ /* VC4_DIRTY_COMPILED_FS */
+ cl_u8(&shader_rec, 0); /* fs num uniforms (unused) */
+ cl_u8(&shader_rec, vc4->prog.fs->num_inputs);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.fs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+ /* VC4_DIRTY_COMPILED_VS */
+ cl_u16(&shader_rec, 0); /* vs num uniforms */
+ cl_u8(&shader_rec, vc4->prog.vs->vattrs_live);
+ cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[8]);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.vs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+ /* VC4_DIRTY_COMPILED_CS */
+ cl_u16(&shader_rec, 0); /* cs num uniforms */
+ cl_u8(&shader_rec, vc4->prog.cs->vattrs_live);
+ cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[8]);
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, vc4->prog.cs->bo, 0);
+ cl_u32(&shader_rec, 0); /* UBO offset written by kernel */
+
+ uint32_t max_index = 0xffff;
+ for (int i = 0; i < vtx->num_elements; i++) {
+ struct pipe_vertex_element *elem = &vtx->pipe[i];
+ struct pipe_vertex_buffer *vb =
+ &vertexbuf->vb[elem->vertex_buffer_index];
+ struct vc4_resource *rsc = vc4_resource(vb->buffer);
+ /* not vc4->dirty tracked: vc4->last_index_bias */
+ uint32_t offset = (vb->buffer_offset +
+ elem->src_offset +
+ vb->stride * info->index_bias);
+ uint32_t vb_size = rsc->bo->size - offset;
+ uint32_t elem_size =
+ util_format_get_blocksize(elem->src_format);
+
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, rsc->bo, offset);
+ cl_u8(&shader_rec, elem_size - 1);
+ cl_u8(&shader_rec, vb->stride);
+ cl_u8(&shader_rec, vc4->prog.vs->vattr_offsets[i]);
+ cl_u8(&shader_rec, vc4->prog.cs->vattr_offsets[i]);
+
+ if (vb->stride > 0) {
+ max_index = MIN2(max_index,
+ (vb_size - elem_size) / vb->stride);
+ }
+ }
+
+ if (vtx->num_elements == 0) {
+ assert(num_elements_emit == 1);
+ struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
+ cl_reloc(vc4, &vc4->shader_rec, &shader_rec, bo, 0);
+ cl_u8(&shader_rec, 16 - 1); /* element size */
+ cl_u8(&shader_rec, 0); /* stride */
+ cl_u8(&shader_rec, 0); /* VS VPM offset */
+ cl_u8(&shader_rec, 0); /* CS VPM offset */
+ vc4_bo_unreference(&bo);
+ }
+ cl_end(&vc4->shader_rec, shader_rec);
+
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
+ /* the actual draw call. */
+ cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
+ assert(vtx->num_elements <= 8);
+ /* Note that number of attributes == 0 in the packet means 8
+ * attributes. This field also contains the offset into shader_rec.
+ */
+ cl_u32(&bcl, num_elements_emit & 0x7);
+ cl_end(&vc4->bcl, bcl);
+
+ vc4_write_uniforms(vc4, vc4->prog.fs,
+ &vc4->constbuf[PIPE_SHADER_FRAGMENT],
+ &vc4->fragtex);
+ vc4_write_uniforms(vc4, vc4->prog.vs,
+ &vc4->constbuf[PIPE_SHADER_VERTEX],
+ &vc4->verttex);
+ vc4_write_uniforms(vc4, vc4->prog.cs,
+ &vc4->constbuf[PIPE_SHADER_VERTEX],
+ &vc4->verttex);
+
+ vc4->last_index_bias = info->index_bias;
+ vc4->max_index = max_index;
+}
+
static void
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
{
@@ -138,9 +246,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
vc4_get_draw_cl_space(vc4);
- struct vc4_vertex_stateobj *vtx = vc4->vtx;
- struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
-
if (vc4->prim_mode != info->mode) {
vc4->prim_mode = info->mode;
vc4->dirty |= VC4_DIRTY_PRIM_MODE;
@@ -150,94 +255,27 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
vc4_update_compiled_shaders(vc4, info->mode);
vc4_emit_state(pctx);
+
+ if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
+ VC4_DIRTY_VTXSTATE |
+ VC4_DIRTY_PRIM_MODE |
+ VC4_DIRTY_RASTERIZER |
+ VC4_DIRTY_COMPILED_CS |
+ VC4_DIRTY_COMPILED_VS |
+ VC4_DIRTY_COMPILED_FS |
+ vc4->prog.cs->uniform_dirty_bits |
+ vc4->prog.vs->uniform_dirty_bits |
+ vc4->prog.fs->uniform_dirty_bits)) ||
+ vc4->last_index_bias != info->index_bias) {
+ vc4_emit_gl_shader_state(vc4, info);
+ }
+
vc4->dirty = 0;
- vc4_write_uniforms(vc4, vc4->prog.fs,
- &vc4->constbuf[PIPE_SHADER_FRAGMENT],
- &vc4->fragtex);
- vc4_write_uniforms(vc4, vc4->prog.vs,
- &vc4->constbuf[PIPE_SHADER_VERTEX],
- &vc4->verttex);
- vc4_write_uniforms(vc4, vc4->prog.cs,
- &vc4->constbuf[PIPE_SHADER_VERTEX],
- &vc4->verttex);
-
- /* The simulator throws a fit if VS or CS don't read an attribute, so
- * we emit a dummy read.
- */
- uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
- /* Emit the shader record. */
- cl_start_shader_reloc(&vc4->shader_rec, 3 + num_elements_emit);
- cl_u16(&vc4->shader_rec,
- VC4_SHADER_FLAG_ENABLE_CLIPPING |
- ((info->mode == PIPE_PRIM_POINTS &&
- vc4->rasterizer->base.point_size_per_vertex) ?
- VC4_SHADER_FLAG_VS_POINT_SIZE : 0));
- cl_u8(&vc4->shader_rec, 0); /* fs num uniforms (unused) */
- cl_u8(&vc4->shader_rec, vc4->prog.fs->num_inputs);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.fs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
- cl_u16(&vc4->shader_rec, 0); /* vs num uniforms */
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattrs_live);
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.vs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
- cl_u16(&vc4->shader_rec, 0); /* cs num uniforms */
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattrs_live);
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[8]);
- cl_reloc(vc4, &vc4->shader_rec, vc4->prog.cs->bo, 0);
- cl_u32(&vc4->shader_rec, 0); /* UBO offset written by kernel */
-
- uint32_t max_index = 0xffff;
- uint32_t vpm_offset = 0;
- for (int i = 0; i < vtx->num_elements; i++) {
- struct pipe_vertex_element *elem = &vtx->pipe[i];
- struct pipe_vertex_buffer *vb =
- &vertexbuf->vb[elem->vertex_buffer_index];
- struct vc4_resource *rsc = vc4_resource(vb->buffer);
- uint32_t offset = vb->buffer_offset + elem->src_offset;
- uint32_t vb_size = rsc->bo->size - offset;
- uint32_t elem_size =
- util_format_get_blocksize(elem->src_format);
-
- cl_reloc(vc4, &vc4->shader_rec, rsc->bo, offset);
- cl_u8(&vc4->shader_rec, elem_size - 1);
- cl_u8(&vc4->shader_rec, vb->stride);
- cl_u8(&vc4->shader_rec, vc4->prog.vs->vattr_offsets[i]);
- cl_u8(&vc4->shader_rec, vc4->prog.cs->vattr_offsets[i]);
-
- vpm_offset += align(elem_size, 4);
-
- if (vb->stride > 0) {
- max_index = MIN2(max_index,
- (vb_size - elem_size) / vb->stride);
- }
- }
-
- if (vtx->num_elements == 0) {
- assert(num_elements_emit == 1);
- struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
- cl_reloc(vc4, &vc4->shader_rec, bo, 0);
- cl_u8(&vc4->shader_rec, 16 - 1); /* element size */
- cl_u8(&vc4->shader_rec, 0); /* stride */
- cl_u8(&vc4->shader_rec, 0); /* VS VPM offset */
- cl_u8(&vc4->shader_rec, 0); /* CS VPM offset */
- vc4_bo_unreference(&bo);
- }
-
- /* the actual draw call. */
- cl_u8(&vc4->bcl, VC4_PACKET_GL_SHADER_STATE);
- assert(vtx->num_elements <= 8);
- /* Note that number of attributes == 0 in the packet means 8
- * attributes. This field also contains the offset into shader_rec.
- */
- cl_u32(&vc4->bcl, num_elements_emit & 0x7);
-
/* Note that the primitive type fields match with OpenGL/gallium
* definitions, up to but not including QUADS.
*/
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
if (info->indexed) {
uint32_t offset = vc4->indexbuf.offset;
uint32_t index_size = vc4->indexbuf.index_size;
@@ -251,25 +289,26 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
struct vc4_resource *rsc = vc4_resource(prsc);
- cl_start_reloc(&vc4->bcl, 1);
- cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
- cl_u8(&vc4->bcl,
+ cl_start_reloc(&vc4->bcl, &bcl, 1);
+ cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
+ cl_u8(&bcl,
info->mode |
(index_size == 2 ?
VC4_INDEX_BUFFER_U16:
VC4_INDEX_BUFFER_U8));
- cl_u32(&vc4->bcl, info->count);
- cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
- cl_u32(&vc4->bcl, max_index);
+ cl_u32(&bcl, info->count);
+ cl_reloc(vc4, &vc4->bcl, &bcl, rsc->bo, offset);
+ cl_u32(&bcl, vc4->max_index);
if (vc4->indexbuf.index_size == 4)
pipe_resource_reference(&prsc, NULL);
} else {
- cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
- cl_u8(&vc4->bcl, info->mode);
- cl_u32(&vc4->bcl, info->count);
- cl_u32(&vc4->bcl, info->start);
+ cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
+ cl_u8(&bcl, info->mode);
+ cl_u32(&bcl, info->count);
+ cl_u32(&bcl, info->start);
}
+ cl_end(&vc4->bcl, bcl);
if (vc4->zsa && vc4->zsa->base.depth.enabled) {
vc4->resolve |= PIPE_CLEAR_DEPTH;
diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h
index 5f1ee4fa125..863ef8da8fb 100644
--- a/src/gallium/drivers/vc4/vc4_drm.h
+++ b/src/gallium/drivers/vc4/vc4_drm.h
@@ -31,12 +31,14 @@
#define DRM_VC4_WAIT_BO 0x02
#define DRM_VC4_CREATE_BO 0x03
#define DRM_VC4_MMAP_BO 0x04
+#define DRM_VC4_CREATE_SHADER_BO 0x05
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
#define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
struct drm_vc4_submit_rcl_surface {
uint32_t hindex; /* Handle index, or ~0 if not present. */
@@ -182,6 +184,29 @@ struct drm_vc4_create_bo {
uint32_t pad;
};
+/**
+ * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
+ * shader BOs.
+ *
+ * Since allowing a shader to be overwritten while it's also being
+ * executed from would allow privlege escalation, shaders must be
+ * created using this ioctl, and they can't be mmapped later.
+ */
+struct drm_vc4_create_shader_bo {
+ /* Size of the data argument. */
+ uint32_t size;
+ /* Flags, currently must be 0. */
+ uint32_t flags;
+
+ /* Pointer to the data. */
+ uint64_t data;
+
+ /** Returned GEM handle for the BO. */
+ uint32_t handle;
+ /* Pad, must be 0. */
+ uint32_t pad;
+};
+
/**
* struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
*
diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c
index d2b54fccf91..ba064ff889b 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -28,23 +28,24 @@ vc4_emit_state(struct pipe_context *pctx)
{
struct vc4_context *vc4 = vc4_context(pctx);
+ struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) {
float *vpscale = vc4->viewport.scale;
float *vptranslate = vc4->viewport.translate;
- float vp_minx = -fabs(vpscale[0]) + vptranslate[0];
- float vp_maxx = fabs(vpscale[0]) + vptranslate[0];
- float vp_miny = -fabs(vpscale[1]) + vptranslate[1];
- float vp_maxy = fabs(vpscale[1]) + vptranslate[1];
+ float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
+ float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
+ float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
+ float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
uint32_t minx = MAX2(vc4->scissor.minx, vp_minx);
uint32_t miny = MAX2(vc4->scissor.miny, vp_miny);
uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx);
uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy);
- cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW);
- cl_u16(&vc4->bcl, minx);
- cl_u16(&vc4->bcl, miny);
- cl_u16(&vc4->bcl, maxx - minx);
- cl_u16(&vc4->bcl, maxy - miny);
+ cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
+ cl_u16(&bcl, minx);
+ cl_u16(&bcl, miny);
+ cl_u16(&bcl, maxx - minx);
+ cl_u16(&bcl, maxy - miny);
vc4->draw_min_x = MIN2(vc4->draw_min_x, minx);
vc4->draw_min_y = MIN2(vc4->draw_min_y, miny);
@@ -53,47 +54,49 @@ vc4_emit_state(struct pipe_context *pctx)
}
if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
- cl_u8(&vc4->bcl, VC4_PACKET_CONFIGURATION_BITS);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[0] |
vc4->zsa->config_bits[0]);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[1] |
vc4->zsa->config_bits[1]);
- cl_u8(&vc4->bcl,
+ cl_u8(&bcl,
vc4->rasterizer->config_bits[2] |
vc4->zsa->config_bits[2]);
}
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
- cl_u8(&vc4->bcl, VC4_PACKET_DEPTH_OFFSET);
- cl_u16(&vc4->bcl, vc4->rasterizer->offset_factor);
- cl_u16(&vc4->bcl, vc4->rasterizer->offset_units);
+ cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
+ cl_u16(&bcl, vc4->rasterizer->offset_factor);
+ cl_u16(&bcl, vc4->rasterizer->offset_units);
- cl_u8(&vc4->bcl, VC4_PACKET_POINT_SIZE);
- cl_f(&vc4->bcl, vc4->rasterizer->point_size);
+ cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
+ cl_f(&bcl, vc4->rasterizer->point_size);
- cl_u8(&vc4->bcl, VC4_PACKET_LINE_WIDTH);
- cl_f(&vc4->bcl, vc4->rasterizer->base.line_width);
+ cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
+ cl_f(&bcl, vc4->rasterizer->base.line_width);
}
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
- cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_XY_SCALING);
- cl_f(&vc4->bcl, vc4->viewport.scale[0] * 16.0f);
- cl_f(&vc4->bcl, vc4->viewport.scale[1] * 16.0f);
+ cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
+ cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
+ cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
- cl_u8(&vc4->bcl, VC4_PACKET_CLIPPER_Z_SCALING);
- cl_f(&vc4->bcl, vc4->viewport.translate[2]);
- cl_f(&vc4->bcl, vc4->viewport.scale[2]);
+ cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
+ cl_f(&bcl, vc4->viewport.translate[2]);
+ cl_f(&bcl, vc4->viewport.scale[2]);
- cl_u8(&vc4->bcl, VC4_PACKET_VIEWPORT_OFFSET);
- cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[0]);
- cl_u16(&vc4->bcl, 16 * vc4->viewport.translate[1]);
+ cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
+ cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
+ cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
}
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
- cl_u8(&vc4->bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
- cl_u32(&vc4->bcl, vc4->rasterizer->base.flatshade ?
+ cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
+ cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
vc4->prog.fs->color_inputs : 0);
}
+
+ cl_end(&vc4->bcl, bcl);
}
diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c
index f2ee91de61a..b6fb2a8a460 100644
--- a/src/gallium/drivers/vc4/vc4_fence.c
+++ b/src/gallium/drivers/vc4/vc4_fence.c
@@ -59,16 +59,6 @@ vc4_fence_reference(struct pipe_screen *pscreen,
*p = f;
}
-static boolean
-vc4_fence_signalled(struct pipe_screen *pscreen,
- struct pipe_fence_handle *pf)
-{
- struct vc4_screen *screen = vc4_screen(pscreen);
- struct vc4_fence *f = (struct vc4_fence *)pf;
-
- return vc4_wait_seqno(screen, f->seqno, 0);
-}
-
static boolean
vc4_fence_finish(struct pipe_screen *pscreen,
struct pipe_fence_handle *pf,
@@ -77,7 +67,7 @@ vc4_fence_finish(struct pipe_screen *pscreen,
struct vc4_screen *screen = vc4_screen(pscreen);
struct vc4_fence *f = (struct vc4_fence *)pf;
- return vc4_wait_seqno(screen, f->seqno, timeout_ns);
+ return vc4_wait_seqno(screen, f->seqno, timeout_ns, "fence wait");
}
struct vc4_fence *
@@ -98,6 +88,5 @@ void
vc4_fence_init(struct vc4_screen *screen)
{
screen->base.fence_reference = vc4_fence_reference;
- screen->base.fence_signalled = vc4_fence_signalled;
screen->base.fence_finish = vc4_fence_finish;
}
diff --git a/src/gallium/drivers/vc4/vc4_formats.c b/src/gallium/drivers/vc4/vc4_formats.c
index 004bac70c67..ffce61237de 100644
--- a/src/gallium/drivers/vc4/vc4_formats.c
+++ b/src/gallium/drivers/vc4/vc4_formats.c
@@ -108,7 +108,7 @@ static const struct vc4_format vc4_format_table[] = {
static const struct vc4_format *
get_format(enum pipe_format f)
{
- if (f > ARRAY_SIZE(vc4_format_table) ||
+ if (f >= ARRAY_SIZE(vc4_format_table) ||
!vc4_format_table[f].present)
return NULL;
else
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index dcade15443a..7ebd9f160eb 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -44,8 +44,7 @@ void
vc4_job_reset(struct vc4_context *vc4)
{
struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
- for (int i = 0; i < (vc4->bo_handles.next -
- vc4->bo_handles.base) / 4; i++) {
+ for (int i = 0; i < cl_offset(&vc4->bo_handles) / 4; i++) {
vc4_bo_unreference(&referenced_bos[i]);
}
vc4_reset_cl(&vc4->bcl);
@@ -145,7 +144,7 @@ vc4_job_submit(struct vc4_context *vc4)
{
if (vc4_debug & VC4_DEBUG_CL) {
fprintf(stderr, "BCL:\n");
- vc4_dump_cl(vc4->bcl.base, vc4->bcl.next - vc4->bcl.base, false);
+ vc4_dump_cl(vc4->bcl.base, cl_offset(&vc4->bcl), false);
}
struct drm_vc4_submit_cl submit;
@@ -164,15 +163,14 @@ vc4_job_submit(struct vc4_context *vc4)
vc4->zs_write, true, true);
submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
- submit.bo_handle_count = (vc4->bo_handles.next -
- vc4->bo_handles.base) / 4;
+ submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4;
submit.bin_cl = (uintptr_t)vc4->bcl.base;
- submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
+ submit.bin_cl_size = cl_offset(&vc4->bcl);
submit.shader_rec = (uintptr_t)vc4->shader_rec.base;
- submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
+ submit.shader_rec_size = cl_offset(&vc4->shader_rec);
submit.shader_rec_count = vc4->shader_rec_count;
submit.uniforms = (uintptr_t)vc4->uniforms.base;
- submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
+ submit.uniforms_size = cl_offset(&vc4->uniforms);
assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
submit.min_x_tile = vc4->draw_min_x / 64;
@@ -207,7 +205,7 @@ vc4_job_submit(struct vc4_context *vc4)
if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
- PIPE_TIMEOUT_INFINITE)) {
+ PIPE_TIMEOUT_INFINITE, "sync")) {
fprintf(stderr, "Wait failed.\n");
abort();
}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
new file mode 100644
index 00000000000..a372a6c0cdc
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Implements most of the fixed function fragment pipeline in shader code.
+ *
+ * VC4 doesn't have any hardware support for blending, alpha test, logic ops,
+ * or color mask. Instead, you read the current contents of the destination
+ * from the tile buffer after having waited for the scoreboard (which is
+ * handled by vc4_qpu_emit.c), then do math using your output color and that
+ * destination value, and update the output color appropriately.
+ */
+
+/**
+ * Lowers fixed-function blending to a load of the destination color and a
+ * series of ALU operations before the store of the output.
+ */
+#include "util/u_format.h"
+#include "vc4_qir.h"
+#include "glsl/nir/nir_builder.h"
+#include "vc4_context.h"
+
+/** Emits a load of the previous fragment color from the tile buffer. */
+static nir_ssa_def *
+vc4_nir_get_dst_color(nir_builder *b)
+{
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_load_input);
+ load->num_components = 1;
+ load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
+ nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
+ nir_builder_instr_insert(b, &load->instr);
+ return &load->dest.ssa;
+}
+
+static nir_ssa_def *
+vc4_nir_srgb_decode(nir_builder *b, nir_ssa_def *srgb)
+{
+ nir_ssa_def *is_low = nir_flt(b, srgb, nir_imm_float(b, 0.04045));
+ nir_ssa_def *low = nir_fmul(b, srgb, nir_imm_float(b, 1.0 / 12.92));
+ nir_ssa_def *high = nir_fpow(b,
+ nir_fmul(b,
+ nir_fadd(b, srgb,
+ nir_imm_float(b, 0.055)),
+ nir_imm_float(b, 1.0 / 1.055)),
+ nir_imm_float(b, 2.4));
+
+ return nir_bcsel(b, is_low, low, high);
+}
+
+static nir_ssa_def *
+vc4_nir_srgb_encode(nir_builder *b, nir_ssa_def *linear)
+{
+ nir_ssa_def *is_low = nir_flt(b, linear, nir_imm_float(b, 0.0031308));
+ nir_ssa_def *low = nir_fmul(b, linear, nir_imm_float(b, 12.92));
+ nir_ssa_def *high = nir_fsub(b,
+ nir_fmul(b,
+ nir_imm_float(b, 1.055),
+ nir_fpow(b,
+ linear,
+ nir_imm_float(b, 0.41666))),
+ nir_imm_float(b, 0.055));
+
+ return nir_bcsel(b, is_low, low, high);
+}
+
+static nir_ssa_def *
+vc4_blend_channel(nir_builder *b,
+ nir_ssa_def **src,
+ nir_ssa_def **dst,
+ unsigned factor,
+ int channel)
+{
+ switch(factor) {
+ case PIPE_BLENDFACTOR_ONE:
+ return nir_imm_float(b, 1.0);
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return src[channel];
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return src[3];
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return dst[3];
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return dst[channel];
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ if (channel != 3) {
+ return nir_fmin(b,
+ src[3],
+ nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ dst[3]));
+ } else {
+ return nir_imm_float(b, 1.0);
+ }
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel);
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W);
+ case PIPE_BLENDFACTOR_ZERO:
+ return nir_imm_float(b, 0.0);
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return nir_fsub(b, nir_imm_float(b, 1.0), src[channel]);
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return nir_fsub(b, nir_imm_float(b, 1.0), src[3]);
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return nir_fsub(b, nir_imm_float(b, 1.0), dst[3]);
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return nir_fsub(b, nir_imm_float(b, 1.0), dst[channel]);
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return nir_fsub(b, nir_imm_float(b, 1.0),
+ vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_X + channel));
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return nir_fsub(b, nir_imm_float(b, 1.0),
+ vc4_nir_get_state_uniform(b, QUNIFORM_BLEND_CONST_COLOR_W));
+
+ default:
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend factor %d\n", factor);
+ return nir_imm_float(b, 1.0);
+ }
+}
+
+static nir_ssa_def *
+vc4_blend_func(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst,
+ unsigned func)
+{
+ switch (func) {
+ case PIPE_BLEND_ADD:
+ return nir_fadd(b, src, dst);
+ case PIPE_BLEND_SUBTRACT:
+ return nir_fsub(b, src, dst);
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return nir_fsub(b, dst, src);
+ case PIPE_BLEND_MIN:
+ return nir_fmin(b, src, dst);
+ case PIPE_BLEND_MAX:
+ return nir_fmax(b, src, dst);
+
+ default:
+ /* Unsupported. */
+ fprintf(stderr, "Unknown blend func %d\n", func);
+ return src;
+
+ }
+}
+
+static void
+vc4_do_blending(struct vc4_compile *c, nir_builder *b, nir_ssa_def **result,
+ nir_ssa_def **src_color, nir_ssa_def **dst_color)
+{
+ struct pipe_rt_blend_state *blend = &c->fs_key->blend;
+
+ if (!blend->blend_enable) {
+ for (int i = 0; i < 4; i++)
+ result[i] = src_color[i];
+ return;
+ }
+
+ /* Clamp the src color to [0, 1]. Dest is already clamped. */
+ for (int i = 0; i < 4; i++)
+ src_color[i] = nir_fsat(b, src_color[i]);
+
+ nir_ssa_def *src_blend[4], *dst_blend[4];
+ for (int i = 0; i < 4; i++) {
+ int src_factor = ((i != 3) ? blend->rgb_src_factor :
+ blend->alpha_src_factor);
+ int dst_factor = ((i != 3) ? blend->rgb_dst_factor :
+ blend->alpha_dst_factor);
+ src_blend[i] = nir_fmul(b, src_color[i],
+ vc4_blend_channel(b,
+ src_color, dst_color,
+ src_factor, i));
+ dst_blend[i] = nir_fmul(b, dst_color[i],
+ vc4_blend_channel(b,
+ src_color, dst_color,
+ dst_factor, i));
+ }
+
+ for (int i = 0; i < 4; i++) {
+ result[i] = vc4_blend_func(b, src_blend[i], dst_blend[i],
+ ((i != 3) ? blend->rgb_func :
+ blend->alpha_func));
+ }
+}
+
+static nir_ssa_def *
+vc4_logicop(nir_builder *b, int logicop_func,
+ nir_ssa_def *src, nir_ssa_def *dst)
+{
+ switch (logicop_func) {
+ case PIPE_LOGICOP_CLEAR:
+ return nir_imm_int(b, 0);
+ case PIPE_LOGICOP_NOR:
+ return nir_inot(b, nir_ior(b, src, dst));
+ case PIPE_LOGICOP_AND_INVERTED:
+ return nir_iand(b, nir_inot(b, src), dst);
+ case PIPE_LOGICOP_COPY_INVERTED:
+ return nir_inot(b, src);
+ case PIPE_LOGICOP_AND_REVERSE:
+ return nir_iand(b, src, nir_inot(b, dst));
+ case PIPE_LOGICOP_INVERT:
+ return nir_inot(b, dst);
+ case PIPE_LOGICOP_XOR:
+ return nir_ixor(b, src, dst);
+ case PIPE_LOGICOP_NAND:
+ return nir_inot(b, nir_iand(b, src, dst));
+ case PIPE_LOGICOP_AND:
+ return nir_iand(b, src, dst);
+ case PIPE_LOGICOP_EQUIV:
+ return nir_inot(b, nir_ixor(b, src, dst));
+ case PIPE_LOGICOP_NOOP:
+ return dst;
+ case PIPE_LOGICOP_OR_INVERTED:
+ return nir_ior(b, nir_inot(b, src), dst);
+ case PIPE_LOGICOP_OR_REVERSE:
+ return nir_ior(b, src, nir_inot(b, dst));
+ case PIPE_LOGICOP_OR:
+ return nir_ior(b, src, dst);
+ case PIPE_LOGICOP_SET:
+ return nir_imm_int(b, ~0);
+ default:
+ fprintf(stderr, "Unknown logic op %d\n", logicop_func);
+ /* FALLTHROUGH */
+ case PIPE_LOGICOP_COPY:
+ return src;
+ }
+}
+
+static nir_ssa_def *
+vc4_nir_pipe_compare_func(nir_builder *b, int func,
+ nir_ssa_def *src0, nir_ssa_def *src1)
+{
+ switch (func) {
+ default:
+ fprintf(stderr, "Unknown compare func %d\n", func);
+ /* FALLTHROUGH */
+ case PIPE_FUNC_NEVER:
+ return nir_imm_int(b, 0);
+ case PIPE_FUNC_ALWAYS:
+ return nir_imm_int(b, ~0);
+ case PIPE_FUNC_EQUAL:
+ return nir_feq(b, src0, src1);
+ case PIPE_FUNC_NOTEQUAL:
+ return nir_fne(b, src0, src1);
+ case PIPE_FUNC_GREATER:
+ return nir_flt(b, src1, src0);
+ case PIPE_FUNC_GEQUAL:
+ return nir_fge(b, src0, src1);
+ case PIPE_FUNC_LESS:
+ return nir_flt(b, src0, src1);
+ case PIPE_FUNC_LEQUAL:
+ return nir_fge(b, src1, src0);
+ }
+}
+
+static void
+vc4_nir_emit_alpha_test_discard(struct vc4_compile *c, nir_builder *b,
+ nir_ssa_def *alpha)
+{
+ if (!c->fs_key->alpha_test)
+ return;
+
+ nir_ssa_def *alpha_ref =
+ vc4_nir_get_state_uniform(b, QUNIFORM_ALPHA_REF);
+ nir_ssa_def *condition =
+ vc4_nir_pipe_compare_func(b, c->fs_key->alpha_test_func,
+ alpha, alpha_ref);
+
+ nir_intrinsic_instr *discard =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_discard_if);
+ discard->num_components = 1;
+ discard->src[0] = nir_src_for_ssa(nir_inot(b, condition));
+ nir_builder_instr_insert(b, &discard->instr);
+}
+
+static void
+vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ enum pipe_format color_format = c->fs_key->color_format;
+ const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
+
+ /* Pull out the float src/dst color components. */
+ nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
+ nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
+ nir_ssa_def *src_color[4], *unpacked_dst_color[4];
+ for (unsigned i = 0; i < 4; i++) {
+ src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
+ unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
+ }
+
+ /* Unswizzle the destination color. */
+ nir_ssa_def *dst_color[4];
+ for (unsigned i = 0; i < 4; i++) {
+ dst_color[i] = vc4_nir_get_swizzled_channel(b,
+ unpacked_dst_color,
+ format_swiz[i]);
+ }
+
+ vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
+
+ /* Turn dst color to linear. */
+ if (util_format_is_srgb(color_format)) {
+ for (int i = 0; i < 3; i++)
+ dst_color[i] = vc4_nir_srgb_decode(b, dst_color[i]);
+ }
+
+ nir_ssa_def *blend_color[4];
+ vc4_do_blending(c, b, blend_color, src_color, dst_color);
+
+ /* sRGB encode the output color */
+ if (util_format_is_srgb(color_format)) {
+ for (int i = 0; i < 3; i++)
+ blend_color[i] = vc4_nir_srgb_encode(b, blend_color[i]);
+ }
+
+ nir_ssa_def *swizzled_outputs[4];
+ for (int i = 0; i < 4; i++) {
+ swizzled_outputs[i] =
+ vc4_nir_get_swizzled_channel(b, blend_color,
+ format_swiz[i]);
+ }
+
+ nir_ssa_def *packed_color =
+ nir_pack_unorm_4x8(b,
+ nir_vec4(b,
+ swizzled_outputs[0],
+ swizzled_outputs[1],
+ swizzled_outputs[2],
+ swizzled_outputs[3]));
+
+ packed_color = vc4_logicop(b, c->fs_key->logicop_func,
+ packed_color, packed_dst_color);
+
+ /* If the bit isn't set in the color mask, then just return the
+ * original dst color, instead.
+ */
+ uint32_t colormask = 0xffffffff;
+ for (int i = 0; i < 4; i++) {
+ if (format_swiz[i] < 4 &&
+ !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
+ colormask &= ~(0xff << (i * 8));
+ }
+ }
+ packed_color = nir_ior(b,
+ nir_iand(b, packed_color,
+ nir_imm_int(b, colormask)),
+ nir_iand(b, packed_dst_color,
+ nir_imm_int(b, ~colormask)));
+
+ /* Turn the old vec4 output into a store of the packed color. */
+ nir_instr_rewrite_src(&intr->instr, &intr->src[0],
+ nir_src_for_ssa(packed_color));
+ intr->num_components = 1;
+}
+
+static bool
+vc4_nir_lower_blend_block(nir_block *block, void *state)
+{
+ struct vc4_compile *c = state;
+
+ nir_foreach_instr(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
+
+ nir_variable *output_var = NULL;
+ foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+ if (var->data.driver_location == intr->const_index[0]) {
+ output_var = var;
+ break;
+ }
+ }
+ assert(output_var);
+ unsigned semantic_name = output_var->data.location;
+
+ if (semantic_name != TGSI_SEMANTIC_COLOR)
+ continue;
+
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&block->cf_node);
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ nir_builder_insert_before_instr(&b, &intr->instr);
+ vc4_nir_lower_blend_instr(c, &b, intr);
+ }
+ return true;
+}
+
+void
+vc4_nir_lower_blend(struct vc4_compile *c)
+{
+ nir_foreach_overload(c->s, overload) {
+ if (overload->impl) {
+ nir_foreach_block(overload->impl,
+ vc4_nir_lower_blend_block, c);
+
+ nir_metadata_preserve(overload->impl,
+ nir_metadata_block_index |
+ nir_metadata_dominance);
+ }
+ }
+}
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
new file mode 100644
index 00000000000..229d41147d8
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "vc4_qir.h"
+#include "tgsi/tgsi_info.h"
+#include "glsl/nir/nir_builder.h"
+
+/**
+ * Walks the NIR generated by TGSI-to-NIR to lower its io intrinsics into
+ * something amenable to the VC4 architecture.
+ *
+ * Currently, it split inputs, outputs, and uniforms into scalars, drops any
+ * non-position outputs in coordinate shaders, and fixes up the addressing on
+ * indirect uniform loads.
+ */
+
+static void
+replace_intrinsic_with_vec4(nir_builder *b, nir_intrinsic_instr *intr,
+ nir_ssa_def **comps)
+{
+
+ /* Batch things back together into a vec4. This will get split by the
+ * later ALU scalarization pass.
+ */
+ nir_ssa_def *vec = nir_vec4(b, comps[0], comps[1], comps[2], comps[3]);
+
+ /* Replace the old intrinsic with a reference to our reconstructed
+ * vec4.
+ */
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec),
+ ralloc_parent(b->impl));
+ nir_instr_remove(&intr->instr);
+}
+
+static void
+vc4_nir_lower_input(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ if (c->stage == QSTAGE_FRAG && intr->const_index[0] ==
+ VC4_NIR_TLB_COLOR_READ_INPUT) {
+ /* This doesn't need any lowering. */
+ return;
+ }
+
+ nir_variable *input_var = NULL;
+ foreach_list_typed(nir_variable, var, node, &c->s->inputs) {
+ if (var->data.driver_location == intr->const_index[0]) {
+ input_var = var;
+ break;
+ }
+ }
+ assert(input_var);
+ int semantic_name = input_var->data.location;
+ int semantic_index = input_var->data.index;
+
+ /* All TGSI-to-NIR inputs are vec4. */
+ assert(intr->num_components == 4);
+
+ /* Generate scalar loads equivalent to the original VEC4. */
+ nir_ssa_def *dests[4];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, nir_intrinsic_load_input);
+ intr_comp->num_components = 1;
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+ nir_builder_instr_insert(b, &intr_comp->instr);
+
+ dests[i] = &intr_comp->dest.ssa;
+ }
+
+ switch (c->stage) {
+ case QSTAGE_FRAG:
+ switch (semantic_name) {
+ case TGSI_SEMANTIC_FACE:
+ dests[0] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ nir_fmul(b,
+ nir_i2f(b, dests[0]),
+ nir_imm_float(b, 2.0)));
+ dests[1] = nir_imm_float(b, 0.0);
+ dests[2] = nir_imm_float(b, 0.0);
+ dests[3] = nir_imm_float(b, 1.0);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ if (c->fs_key->point_sprite_mask &
+ (1 << semantic_index)) {
+ if (!c->fs_key->is_points) {
+ dests[0] = nir_imm_float(b, 0.0);
+ dests[1] = nir_imm_float(b, 0.0);
+ }
+ if (c->fs_key->point_coord_upper_left) {
+ dests[1] = nir_fsub(b,
+ nir_imm_float(b, 1.0),
+ dests[1]);
+ }
+ dests[2] = nir_imm_float(b, 0.0);
+ dests[3] = nir_imm_float(b, 1.0);
+ }
+ break;
+ }
+ break;
+ case QSTAGE_COORD:
+ case QSTAGE_VERT:
+ break;
+ }
+
+ replace_intrinsic_with_vec4(b, intr, dests);
+}
+
+static void
+vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ nir_variable *output_var = NULL;
+ foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
+ if (var->data.driver_location == intr->const_index[0]) {
+ output_var = var;
+ break;
+ }
+ }
+ assert(output_var);
+ unsigned semantic_name = output_var->data.location;
+
+ if (c->stage == QSTAGE_COORD &&
+ (semantic_name != TGSI_SEMANTIC_POSITION &&
+ semantic_name != TGSI_SEMANTIC_PSIZE)) {
+ nir_instr_remove(&intr->instr);
+ return;
+ }
+
+ /* Color output is lowered by vc4_nir_lower_blend(). */
+ if (c->stage == QSTAGE_FRAG && semantic_name == TGSI_SEMANTIC_COLOR) {
+ intr->const_index[0] *= 4;
+ return;
+ }
+
+ /* All TGSI-to-NIR outputs are VEC4. */
+ assert(intr->num_components == 4);
+
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
+ intr_comp->num_components = 1;
+ intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
+
+ assert(intr->src[0].is_ssa);
+ intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b,
+ intr->src[0].ssa,
+ &i, 1, false));
+ nir_builder_instr_insert(b, &intr_comp->instr);
+ }
+
+ nir_instr_remove(&intr->instr);
+}
+
+static void
+vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ /* All TGSI-to-NIR uniform loads are vec4, but we may create dword
+ * loads in our lowering passes.
+ */
+ if (intr->num_components == 1)
+ return;
+ assert(intr->num_components == 4);
+
+ nir_builder_insert_before_instr(b, &intr->instr);
+
+ /* Generate scalar loads equivalent to the original VEC4. */
+ nir_ssa_def *dests[4];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *intr_comp =
+ nir_intrinsic_instr_create(c->s, intr->intrinsic);
+ intr_comp->num_components = 1;
+ nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL);
+
+ if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) {
+ /* Convert the variable TGSI register index to a byte
+ * offset.
+ */
+ intr_comp->src[0] =
+ nir_src_for_ssa(nir_ishl(b,
+ intr->src[0].ssa,
+ nir_imm_int(b, 4)));
+
+ /* Convert the offset to be a byte index, too. */
+ intr_comp->const_index[0] = (intr->const_index[0] * 16 +
+ i * 4);
+ } else {
+ /* We want a dword index for non-indirect uniform
+ * loads.
+ */
+ intr_comp->const_index[0] = (intr->const_index[0] * 4 +
+ i);
+ }
+
+ dests[i] = &intr_comp->dest.ssa;
+
+ nir_builder_instr_insert(b, &intr_comp->instr);
+ }
+
+ replace_intrinsic_with_vec4(b, intr, dests);
+}
+
+static void
+vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
+ struct nir_instr *instr)
+{
+ if (instr->type != nir_instr_type_intrinsic)
+ return;
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_input:
+ vc4_nir_lower_input(c, b, intr);
+ break;
+
+ case nir_intrinsic_store_output:
+ vc4_nir_lower_output(c, b, intr);
+ break;
+
+ case nir_intrinsic_load_uniform:
+ case nir_intrinsic_load_uniform_indirect:
+ vc4_nir_lower_uniform(c, b, intr);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static bool
+vc4_nir_lower_io_block(nir_block *block, void *arg)
+{
+ struct vc4_compile *c = arg;
+ nir_function_impl *impl =
+ nir_cf_node_get_function(&block->cf_node);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_instr_safe(block, instr)
+ vc4_nir_lower_io_instr(c, &b, instr);
+
+ return true;
+}
+
+static bool
+vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl)
+{
+ nir_foreach_block(impl, vc4_nir_lower_io_block, c);
+
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+
+ return true;
+}
+
+void
+vc4_nir_lower_io(struct vc4_compile *c)
+{
+ nir_foreach_overload(c->s, overload) {
+ if (overload->impl)
+ vc4_nir_lower_io_impl(c, overload->impl);
+ }
+}
diff --git a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
index d6d2fbf257f..a755de9aa41 100644
--- a/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
+++ b/src/gallium/drivers/vc4/vc4_opt_copy_propagation.c
@@ -67,10 +67,7 @@ qir_opt_copy_propagation(struct vc4_compile *c)
if (inst->op == QOP_MOV &&
inst->dst.file == QFILE_TEMP &&
- inst->src[0].file != QFILE_VPM &&
- !(inst->src[0].file == QFILE_TEMP &&
- (c->defs[inst->src[0].index]->op == QOP_TEX_RESULT ||
- c->defs[inst->src[0].index]->op == QOP_TLB_COLOR_READ))) {
+ inst->src[0].file != QFILE_VPM) {
movs[inst->dst.index] = inst->src[0];
}
}
diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c
index 92c8260eb59..0e5480ea781 100644
--- a/src/gallium/drivers/vc4/vc4_opt_cse.c
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -46,8 +46,7 @@ struct inst_key {
struct qreg src[4];
/**
* If the instruction depends on the flags, how many SFs have been
- * seen before this instruction, or if it depends on r4, how many r4
- * writes have been seen.
+ * seen before this instruction.
*/
uint32_t implicit_arg_update_count;
};
@@ -63,8 +62,7 @@ inst_key_equals(const void *a, const void *b)
static struct qinst *
vc4_find_cse(struct vc4_compile *c, struct hash_table *ht,
- struct qinst *inst, uint32_t sf_count,
- uint32_t r4_count)
+ struct qinst *inst, uint32_t sf_count)
{
if (inst->dst.file != QFILE_TEMP ||
inst->op == QOP_MOV ||
@@ -79,8 +77,6 @@ vc4_find_cse(struct vc4_compile *c, struct hash_table *ht,
qir_get_op_nsrc(inst->op) * sizeof(key.src[0]));
if (qir_depends_on_flags(inst))
key.implicit_arg_update_count = sf_count;
- if (qir_reads_r4(inst))
- key.implicit_arg_update_count = r4_count;
uint32_t hash = _mesa_hash_data(&key, sizeof(key));
struct hash_entry *entry =
@@ -121,7 +117,7 @@ bool
qir_opt_cse(struct vc4_compile *c)
{
bool progress = false;
- uint32_t sf_count = 0, r4_count = 0;
+ uint32_t sf_count = 0;
struct hash_table *ht = _mesa_hash_table_create(NULL, NULL,
inst_key_equals);
@@ -130,15 +126,15 @@ qir_opt_cse(struct vc4_compile *c)
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
if (qir_has_side_effects(c, inst) ||
- qir_has_side_effect_reads(c, inst)) {
+ qir_has_side_effect_reads(c, inst) ||
+ inst->op == QOP_TLB_COLOR_READ) {
continue;
}
if (inst->sf) {
sf_count++;
} else {
- struct qinst *cse = vc4_find_cse(c, ht, inst,
- sf_count, r4_count);
+ struct qinst *cse = vc4_find_cse(c, ht, inst, sf_count);
if (cse) {
inst->src[0] = cse->dst;
for (int i = 1; i < qir_get_op_nsrc(inst->op);
@@ -154,9 +150,6 @@ qir_opt_cse(struct vc4_compile *c)
}
}
}
-
- if (qir_writes_r4(inst))
- r4_count++;
}
ralloc_free(ht);
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index ba47c51d9bd..13c472152d8 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -23,21 +23,19 @@
*/
#include
-#include "pipe/p_state.h"
#include "util/u_format.h"
#include "util/u_hash.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "util/u_pack_color.h"
-#include "util/format_srgb.h"
#include "util/ralloc.h"
#include "util/hash_table.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_lowering.h"
#include "tgsi/tgsi_parse.h"
+#include "glsl/nir/nir.h"
+#include "glsl/nir/nir_builder.h"
#include "nir/tgsi_to_nir.h"
-
#include "vc4_context.h"
#include "vc4_qpu.h"
#include "vc4_qir.h"
@@ -45,51 +43,8 @@
#include "simpenrose/simpenrose.h"
#endif
-struct vc4_key {
- struct vc4_uncompiled_shader *shader_state;
- struct {
- enum pipe_format format;
- unsigned compare_mode:1;
- unsigned compare_func:3;
- unsigned wrap_s:3;
- unsigned wrap_t:3;
- uint8_t swizzle[4];
- } tex[VC4_MAX_TEXTURE_SAMPLERS];
- uint8_t ucp_enables;
-};
-
-struct vc4_fs_key {
- struct vc4_key base;
- enum pipe_format color_format;
- bool depth_enabled;
- bool stencil_enabled;
- bool stencil_twoside;
- bool stencil_full_writemasks;
- bool is_points;
- bool is_lines;
- bool alpha_test;
- bool point_coord_upper_left;
- bool light_twoside;
- uint8_t alpha_test_func;
- uint8_t logicop_func;
- uint32_t point_sprite_mask;
-
- struct pipe_rt_blend_state blend;
-};
-
-struct vc4_vs_key {
- struct vc4_key base;
-
- /**
- * This is a proxy for the array of FS input semantics, which is
- * larger than we would want to put in the key.
- */
- uint64_t compiled_fs_id;
-
- enum pipe_format attr_formats[8];
- bool is_coord;
- bool per_vertex_point_size;
-};
+static struct qreg
+ntq_get_src(struct vc4_compile *c, nir_src src, int i);
static void
resize_qreg_array(struct vc4_compile *c,
@@ -113,10 +68,10 @@ resize_qreg_array(struct vc4_compile *c,
}
static struct qreg
-indirect_uniform_load(struct vc4_compile *c,
- struct qreg indirect_offset,
- unsigned offset)
+indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
{
+ struct qreg indirect_offset = ntq_get_src(c, intr->src[0], 0);
+ uint32_t offset = intr->const_index[0];
struct vc4_compiler_ubo_range *range = NULL;
unsigned i;
for (i = 0; i < c->num_uniform_ranges; i++) {
@@ -138,10 +93,6 @@ indirect_uniform_load(struct vc4_compile *c,
};
offset -= range->src_offset;
- /* Translate the user's TGSI register index from the TGSI register
- * base to a byte offset.
- */
- indirect_offset = qir_SHL(c, indirect_offset, qir_uniform_ui(c, 4));
/* Adjust for where we stored the TGSI register base. */
indirect_offset = qir_ADD(c, indirect_offset,
@@ -155,24 +106,70 @@ indirect_uniform_load(struct vc4_compile *c,
range->size - 4)));
qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
- struct qreg r4 = qir_TEX_RESULT(c);
c->num_texture_samples++;
- return qir_MOV(c, r4);
+ return qir_TEX_RESULT(c);
+}
+
+nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
+ enum quniform_contents contents)
+{
+ nir_intrinsic_instr *intr =
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_load_uniform);
+ intr->const_index[0] = VC4_NIR_STATE_UNIFORM_OFFSET + contents;
+ intr->num_components = 1;
+ nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
+ nir_builder_instr_insert(b, &intr->instr);
+ return &intr->dest.ssa;
+}
+
+nir_ssa_def *
+vc4_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
+{
+ switch (swiz) {
+ default:
+ case UTIL_FORMAT_SWIZZLE_NONE:
+ fprintf(stderr, "warning: unknown swizzle\n");
+ /* FALLTHROUGH */
+ case UTIL_FORMAT_SWIZZLE_0:
+ return nir_imm_float(b, 0.0);
+ case UTIL_FORMAT_SWIZZLE_1:
+ return nir_imm_float(b, 1.0);
+ case UTIL_FORMAT_SWIZZLE_X:
+ case UTIL_FORMAT_SWIZZLE_Y:
+ case UTIL_FORMAT_SWIZZLE_Z:
+ case UTIL_FORMAT_SWIZZLE_W:
+ return srcs[swiz];
+ }
}
static struct qreg *
-ntq_get_dest(struct vc4_compile *c, nir_dest dest)
+ntq_init_ssa_def(struct vc4_compile *c, nir_ssa_def *def)
{
- assert(!dest.is_ssa);
- nir_register *reg = dest.reg.reg;
- struct hash_entry *entry = _mesa_hash_table_search(c->def_ht, reg);
- assert(reg->num_array_elems == 0);
- assert(dest.reg.base_offset == 0);
-
- struct qreg *qregs = entry->data;
+ struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
+ def->num_components);
+ _mesa_hash_table_insert(c->def_ht, def, qregs);
return qregs;
}
+static struct qreg *
+ntq_get_dest(struct vc4_compile *c, nir_dest *dest)
+{
+ if (dest->is_ssa) {
+ struct qreg *qregs = ntq_init_ssa_def(c, &dest->ssa);
+ for (int i = 0; i < dest->ssa.num_components; i++)
+ qregs[i] = c->undef;
+ return qregs;
+ } else {
+ nir_register *reg = dest->reg.reg;
+ assert(dest->reg.base_offset == 0);
+ assert(reg->num_array_elems == 0);
+ struct hash_entry *entry =
+ _mesa_hash_table_search(c->def_ht, reg);
+ return entry->data;
+ }
+}
+
static struct qreg
ntq_get_src(struct vc4_compile *c, nir_src src, int i)
{
@@ -281,22 +278,6 @@ qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
return qir_SEL_X_Y_NS(c, low, high);
}
-static struct qreg
-qir_srgb_encode(struct vc4_compile *c, struct qreg linear)
-{
- struct qreg low = qir_FMUL(c, linear, qir_uniform_f(c, 12.92));
- struct qreg high = qir_FSUB(c,
- qir_FMUL(c,
- qir_uniform_f(c, 1.055),
- qir_POW(c,
- linear,
- qir_uniform_f(c, 0.41666))),
- qir_uniform_f(c, 0.055));
-
- qir_SF(c, qir_FSUB(c, linear, qir_uniform_f(c, 0.0031308)));
- return qir_SEL_X_Y_NS(c, low, high);
-}
-
static struct qreg
ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1)
{
@@ -410,13 +391,13 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
qir_TEX_S(c, s, texture_u[next_texture_u++]);
c->num_texture_samples++;
- struct qreg r4 = qir_TEX_RESULT(c);
+ struct qreg tex = qir_TEX_RESULT(c);
enum pipe_format format = c->key->tex[unit].format;
struct qreg unpacked[4];
if (util_format_is_depth_or_stencil(format)) {
- struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4,
+ struct qreg depthf = qir_ITOF(c, qir_SHR(c, tex,
qir_uniform_ui(c, 8)));
struct qreg normalized = qir_FMUL(c, depthf,
qir_uniform_f(c, 1.0f/0xffffff));
@@ -468,7 +449,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
unpacked[i] = depth_output;
} else {
for (int i = 0; i < 4; i++)
- unpacked[i] = qir_R4_UNPACK(c, r4, i);
+ unpacked[i] = qir_UNPACK_8_F(c, tex, i);
}
const uint8_t *format_swiz = vc4_get_format_swizzle(format);
@@ -484,7 +465,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
texture_output[i]);
}
- struct qreg *dest = ntq_get_dest(c, instr->dest);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest);
for (int i = 0; i < 4; i++) {
dest[i] = get_swizzled_channel(c, texture_output,
c->key->tex[unit].swizzle[i]);
@@ -558,7 +539,7 @@ ntq_fsin(struct vc4_compile *c, struct qreg src)
struct qreg scaled_x =
qir_FMUL(c,
src,
- qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
+ qir_uniform_f(c, 1.0 / (M_PI * 2.0)));
struct qreg x = qir_FADD(c,
ntq_ffract(c, scaled_x),
@@ -756,26 +737,6 @@ emit_fragcoord_input(struct vc4_compile *c, int attr)
c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c));
}
-static void
-emit_point_coord_input(struct vc4_compile *c, int attr)
-{
- if (c->point_x.file == QFILE_NULL) {
- c->point_x = qir_uniform_f(c, 0.0);
- c->point_y = qir_uniform_f(c, 0.0);
- }
-
- c->inputs[attr * 4 + 0] = c->point_x;
- if (c->fs_key->point_coord_upper_left) {
- c->inputs[attr * 4 + 1] = qir_FSUB(c,
- qir_uniform_f(c, 1.0),
- c->point_y);
- } else {
- c->inputs[attr * 4 + 1] = c->point_y;
- }
- c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
- c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
-}
-
static struct qreg
emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
uint8_t index, uint8_t swizzle)
@@ -816,19 +777,6 @@ emit_fragment_input(struct vc4_compile *c, int attr,
}
}
-static void
-emit_face_input(struct vc4_compile *c, int attr)
-{
- c->inputs[attr * 4 + 0] = qir_FSUB(c,
- qir_uniform_f(c, 1.0),
- qir_FMUL(c,
- qir_ITOF(c, qir_FRAG_REV_FLAG(c)),
- qir_uniform_f(c, 2.0)));
- c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0);
- c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0);
- c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0);
-}
-
static void
add_output(struct vc4_compile *c,
uint32_t decl_offset,
@@ -884,12 +832,38 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
srcs[i] = ntq_get_src(c, instr->src[i].src,
instr->src[i].swizzle[0]);
- struct qreg *dest = ntq_get_dest(c, instr->dest.dest);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
dest[i] = srcs[i];
return;
}
+ if (instr->op == nir_op_pack_unorm_4x8) {
+ struct qreg result;
+ for (int i = 0; i < 4; i++) {
+ struct qreg src = ntq_get_src(c, instr->src[0].src,
+ instr->src[0].swizzle[i]);
+ if (i == 0)
+ result = qir_PACK_8888_F(c, src);
+ else
+ result = qir_PACK_8_F(c, result, src, i);
+ }
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ *dest = result;
+ return;
+ }
+
+ if (instr->op == nir_op_unpack_unorm_4x8) {
+ struct qreg src = ntq_get_src(c, instr->src[0].src,
+ instr->src[0].swizzle[0]);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ for (int i = 0; i < 4; i++) {
+ if (instr->dest.write_mask & (1 << i))
+ dest[i] = qir_UNPACK_8_F(c, src, i);
+ }
+ return;
+ }
+
/* General case: We can just grab the one used channel per src. */
struct qreg src[nir_op_infos[instr->op].num_inputs];
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
@@ -898,7 +872,7 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
/* Pick the channel to store the output in. */
assert(!instr->dest.saturate);
- struct qreg *dest = ntq_get_dest(c, instr->dest.dest);
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
assert(util_is_power_of_two(instr->dest.write_mask));
dest += ffs(instr->dest.write_mask) - 1;
@@ -1092,167 +1066,6 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
}
}
-static struct qreg
-vc4_blend_channel(struct vc4_compile *c,
- struct qreg *dst,
- struct qreg *src,
- struct qreg val,
- unsigned factor,
- int channel)
-{
- switch(factor) {
- case PIPE_BLENDFACTOR_ONE:
- return val;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- return qir_FMUL(c, val, src[channel]);
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- return qir_FMUL(c, val, src[3]);
- case PIPE_BLENDFACTOR_DST_ALPHA:
- return qir_FMUL(c, val, dst[3]);
- case PIPE_BLENDFACTOR_DST_COLOR:
- return qir_FMUL(c, val, dst[channel]);
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- if (channel != 3) {
- return qir_FMUL(c,
- val,
- qir_FMIN(c,
- src[3],
- qir_FSUB(c,
- qir_uniform_f(c, 1.0),
- dst[3])));
- } else {
- return val;
- }
- case PIPE_BLENDFACTOR_CONST_COLOR:
- return qir_FMUL(c, val,
- qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR,
- channel));
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- return qir_FMUL(c, val,
- qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, 3));
- case PIPE_BLENDFACTOR_ZERO:
- return qir_uniform_f(c, 0.0);
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- src[channel]));
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- src[3]));
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- dst[3]));
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0),
- dst[channel]));
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- return qir_FMUL(c, val,
- qir_FSUB(c, qir_uniform_f(c, 1.0),
- qir_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- channel)));
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- return qir_FMUL(c, val,
- qir_FSUB(c, qir_uniform_f(c, 1.0),
- qir_uniform(c,
- QUNIFORM_BLEND_CONST_COLOR,
- 3)));
-
- default:
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- /* Unsupported. */
- fprintf(stderr, "Unknown blend factor %d\n", factor);
- return val;
- }
-}
-
-static struct qreg
-vc4_blend_func(struct vc4_compile *c,
- struct qreg src, struct qreg dst,
- unsigned func)
-{
- switch (func) {
- case PIPE_BLEND_ADD:
- return qir_FADD(c, src, dst);
- case PIPE_BLEND_SUBTRACT:
- return qir_FSUB(c, src, dst);
- case PIPE_BLEND_REVERSE_SUBTRACT:
- return qir_FSUB(c, dst, src);
- case PIPE_BLEND_MIN:
- return qir_FMIN(c, src, dst);
- case PIPE_BLEND_MAX:
- return qir_FMAX(c, src, dst);
-
- default:
- /* Unsupported. */
- fprintf(stderr, "Unknown blend func %d\n", func);
- return src;
-
- }
-}
-
-/**
- * Implements fixed function blending in shader code.
- *
- * VC4 doesn't have any hardware support for blending. Instead, you read the
- * current contents of the destination from the tile buffer after having
- * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
- * math using your output color and that destination value, and update the
- * output color appropriately.
- */
-static void
-vc4_blend(struct vc4_compile *c, struct qreg *result,
- struct qreg *dst_color, struct qreg *src_color)
-{
- struct pipe_rt_blend_state *blend = &c->fs_key->blend;
-
- if (!blend->blend_enable) {
- for (int i = 0; i < 4; i++)
- result[i] = src_color[i];
- return;
- }
-
- struct qreg clamped_src[4];
- struct qreg clamped_dst[4];
- for (int i = 0; i < 4; i++) {
- clamped_src[i] = qir_SAT(c, src_color[i]);
- clamped_dst[i] = qir_SAT(c, dst_color[i]);
- }
- src_color = clamped_src;
- dst_color = clamped_dst;
-
- struct qreg src_blend[4], dst_blend[4];
- for (int i = 0; i < 3; i++) {
- src_blend[i] = vc4_blend_channel(c,
- dst_color, src_color,
- src_color[i],
- blend->rgb_src_factor, i);
- dst_blend[i] = vc4_blend_channel(c,
- dst_color, src_color,
- dst_color[i],
- blend->rgb_dst_factor, i);
- }
- src_blend[3] = vc4_blend_channel(c,
- dst_color, src_color,
- src_color[3],
- blend->alpha_src_factor, 3);
- dst_blend[3] = vc4_blend_channel(c,
- dst_color, src_color,
- dst_color[3],
- blend->alpha_dst_factor, 3);
-
- for (int i = 0; i < 3; i++) {
- result[i] = vc4_blend_func(c,
- src_blend[i], dst_blend[i],
- blend->rgb_func);
- }
- result[3] = vc4_blend_func(c,
- src_blend[3], dst_blend[3],
- blend->alpha_func);
-}
-
static void
clip_distance_discard(struct vc4_compile *c)
{
@@ -1275,168 +1088,16 @@ clip_distance_discard(struct vc4_compile *c)
}
}
-static void
-alpha_test_discard(struct vc4_compile *c)
-{
- struct qreg src_alpha;
- struct qreg alpha_ref = qir_uniform(c, QUNIFORM_ALPHA_REF, 0);
-
- if (!c->fs_key->alpha_test)
- return;
-
- if (c->output_color_index != -1)
- src_alpha = c->outputs[c->output_color_index + 3];
- else
- src_alpha = qir_uniform_f(c, 1.0);
-
- if (c->discard.file == QFILE_NULL)
- c->discard = qir_uniform_ui(c, 0);
-
- switch (c->fs_key->alpha_test_func) {
- case PIPE_FUNC_NEVER:
- c->discard = qir_uniform_ui(c, ~0);
- break;
- case PIPE_FUNC_ALWAYS:
- break;
- case PIPE_FUNC_EQUAL:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_ZS(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_NOTEQUAL:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_ZC(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_GREATER:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_NC(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_GEQUAL:
- qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
- c->discard = qir_SEL_X_Y_NS(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_LESS:
- qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref));
- c->discard = qir_SEL_X_Y_NS(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- case PIPE_FUNC_LEQUAL:
- qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha));
- c->discard = qir_SEL_X_Y_NC(c, c->discard,
- qir_uniform_ui(c, ~0));
- break;
- }
-}
-
-static struct qreg
-vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst)
-{
- switch (c->fs_key->logicop_func) {
- case PIPE_LOGICOP_CLEAR:
- return qir_uniform_f(c, 0.0);
- case PIPE_LOGICOP_NOR:
- return qir_NOT(c, qir_OR(c, src, dst));
- case PIPE_LOGICOP_AND_INVERTED:
- return qir_AND(c, qir_NOT(c, src), dst);
- case PIPE_LOGICOP_COPY_INVERTED:
- return qir_NOT(c, src);
- case PIPE_LOGICOP_AND_REVERSE:
- return qir_AND(c, src, qir_NOT(c, dst));
- case PIPE_LOGICOP_INVERT:
- return qir_NOT(c, dst);
- case PIPE_LOGICOP_XOR:
- return qir_XOR(c, src, dst);
- case PIPE_LOGICOP_NAND:
- return qir_NOT(c, qir_AND(c, src, dst));
- case PIPE_LOGICOP_AND:
- return qir_AND(c, src, dst);
- case PIPE_LOGICOP_EQUIV:
- return qir_NOT(c, qir_XOR(c, src, dst));
- case PIPE_LOGICOP_NOOP:
- return dst;
- case PIPE_LOGICOP_OR_INVERTED:
- return qir_OR(c, qir_NOT(c, src), dst);
- case PIPE_LOGICOP_OR_REVERSE:
- return qir_OR(c, src, qir_NOT(c, dst));
- case PIPE_LOGICOP_OR:
- return qir_OR(c, src, dst);
- case PIPE_LOGICOP_SET:
- return qir_uniform_ui(c, ~0);
- case PIPE_LOGICOP_COPY:
- default:
- return src;
- }
-}
-
static void
emit_frag_end(struct vc4_compile *c)
{
clip_distance_discard(c);
- alpha_test_discard(c);
- enum pipe_format color_format = c->fs_key->color_format;
- const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
- struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef };
- struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
- struct qreg linear_dst_color[4] = { c->undef, c->undef, c->undef, c->undef };
- struct qreg packed_dst_color = c->undef;
-
- if (c->fs_key->blend.blend_enable ||
- c->fs_key->blend.colormask != 0xf ||
- c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
- struct qreg r4 = qir_TLB_COLOR_READ(c);
- for (int i = 0; i < 4; i++)
- tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
- for (int i = 0; i < 4; i++) {
- dst_color[i] = get_swizzled_channel(c,
- tlb_read_color,
- format_swiz[i]);
- if (util_format_is_srgb(color_format) && i != 3) {
- linear_dst_color[i] =
- qir_srgb_decode(c, dst_color[i]);
- } else {
- linear_dst_color[i] = dst_color[i];
- }
- }
-
- /* Save the packed value for logic ops. Can't reuse r4
- * because other things might smash it (like sRGB)
- */
- packed_dst_color = qir_MOV(c, r4);
- }
-
- struct qreg blend_color[4];
- struct qreg undef_array[4] = {
- c->undef, c->undef, c->undef, c->undef
- };
- vc4_blend(c, blend_color, linear_dst_color,
- (c->output_color_index != -1 ?
- c->outputs + c->output_color_index :
- undef_array));
-
- if (util_format_is_srgb(color_format)) {
- for (int i = 0; i < 3; i++)
- blend_color[i] = qir_srgb_encode(c, blend_color[i]);
- }
-
- /* Debug: Sometimes you're getting a black output and just want to see
- * if the FS is getting executed at all. Spam magenta into the color
- * output.
- */
- if (0) {
- blend_color[0] = qir_uniform_f(c, 1.0);
- blend_color[1] = qir_uniform_f(c, 0.0);
- blend_color[2] = qir_uniform_f(c, 1.0);
- blend_color[3] = qir_uniform_f(c, 0.5);
- }
-
- struct qreg swizzled_outputs[4];
- for (int i = 0; i < 4; i++) {
- swizzled_outputs[i] = get_swizzled_channel(c, blend_color,
- format_swiz[i]);
+ struct qreg color;
+ if (c->output_color_index != -1) {
+ color = c->outputs[c->output_color_index];
+ } else {
+ color = qir_uniform_ui(c, 0);
}
if (c->discard.file != QFILE_NULL)
@@ -1463,47 +1124,7 @@ emit_frag_end(struct vc4_compile *c)
qir_TLB_Z_WRITE(c, z);
}
- struct qreg packed_color = c->undef;
- for (int i = 0; i < 4; i++) {
- if (swizzled_outputs[i].file == QFILE_NULL)
- continue;
- if (packed_color.file == QFILE_NULL) {
- packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]);
- } else {
- packed_color = qir_PACK_8_F(c,
- packed_color,
- swizzled_outputs[i],
- i);
- }
- }
-
- if (packed_color.file == QFILE_NULL)
- packed_color = qir_uniform_ui(c, 0);
-
- if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) {
- packed_color = vc4_logicop(c, packed_color, packed_dst_color);
- }
-
- /* If the bit isn't set in the color mask, then just return the
- * original dst color, instead.
- */
- uint32_t colormask = 0xffffffff;
- for (int i = 0; i < 4; i++) {
- if (format_swiz[i] < 4 &&
- !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) {
- colormask &= ~(0xff << (i * 8));
- }
- }
- if (colormask != 0xffffffff) {
- packed_color = qir_OR(c,
- qir_AND(c, packed_color,
- qir_uniform_ui(c, colormask)),
- qir_AND(c, packed_dst_color,
- qir_uniform_ui(c, ~colormask)));
- }
-
- qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef,
- packed_color, c->undef));
+ qir_TLB_COLOR_WRITE(c, color);
}
static void
@@ -1695,6 +1316,7 @@ vc4_optimize_nir(struct nir_shader *s)
progress = nir_opt_peephole_select(s) || progress;
progress = nir_opt_algebraic(s) || progress;
progress = nir_opt_constant_folding(s) || progress;
+ progress = nir_opt_undef(s) || progress;
} while (progress);
}
@@ -1736,6 +1358,7 @@ ntq_setup_inputs(struct vc4_compile *c)
unsigned loc = var->data.driver_location;
assert(array_len == 1);
+ (void)array_len;
resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
(loc + 1) * 4);
@@ -1743,11 +1366,12 @@ ntq_setup_inputs(struct vc4_compile *c)
if (semantic_name == TGSI_SEMANTIC_POSITION) {
emit_fragcoord_input(c, loc);
} else if (semantic_name == TGSI_SEMANTIC_FACE) {
- emit_face_input(c, loc);
+ c->inputs[loc * 4 + 0] = qir_FRAG_REV_FLAG(c);
} else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
(c->fs_key->point_sprite_mask &
(1 << semantic_index))) {
- emit_point_coord_input(c, loc);
+ c->inputs[loc * 4 + 0] = c->point_x;
+ c->inputs[loc * 4 + 1] = c->point_y;
} else {
emit_fragment_input(c, loc,
semantic_name,
@@ -1770,6 +1394,13 @@ ntq_setup_outputs(struct vc4_compile *c)
unsigned loc = var->data.driver_location * 4;
assert(array_len == 1);
+ (void)array_len;
+
+ /* NIR hack to pass through
+ * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS */
+ if (semantic_name == TGSI_SEMANTIC_COLOR &&
+ semantic_index == -1)
+ semantic_index = 0;
for (int i = 0; i < 4; i++) {
add_output(c,
@@ -1834,14 +1465,25 @@ ntq_setup_registers(struct vc4_compile *c, struct exec_list *list)
static void
ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr)
{
- struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
- instr->def.num_components);
+ struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
for (int i = 0; i < instr->def.num_components; i++)
qregs[i] = qir_uniform_ui(c, instr->value.u[i]);
_mesa_hash_table_insert(c->def_ht, &instr->def, qregs);
}
+static void
+ntq_emit_ssa_undef(struct vc4_compile *c, nir_ssa_undef_instr *instr)
+{
+ struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
+
+ /* QIR needs there to be *some* value, so pick 0 (same as for
+ * ntq_setup_registers().
+ */
+ for (int i = 0; i < instr->def.num_components; i++)
+ qregs[i] = qir_uniform_ui(c, 0);
+}
+
static void
ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
{
@@ -1849,41 +1491,41 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
struct qreg *dest = NULL;
if (info->has_dest) {
- dest = ntq_get_dest(c, instr->dest);
+ dest = ntq_get_dest(c, &instr->dest);
}
switch (instr->intrinsic) {
case nir_intrinsic_load_uniform:
- for (int i = 0; i < instr->num_components; i++) {
- dest[i] = qir_uniform(c, QUNIFORM_UNIFORM,
- instr->const_index[0] * 4 + i);
+ assert(instr->num_components == 1);
+ if (instr->const_index[0] < VC4_NIR_STATE_UNIFORM_OFFSET) {
+ *dest = qir_uniform(c, QUNIFORM_UNIFORM,
+ instr->const_index[0]);
+ } else {
+ *dest = qir_uniform(c, instr->const_index[0] -
+ VC4_NIR_STATE_UNIFORM_OFFSET,
+ 0);
}
break;
case nir_intrinsic_load_uniform_indirect:
- for (int i = 0; i < instr->num_components; i++) {
- dest[i] = indirect_uniform_load(c,
- ntq_get_src(c, instr->src[0], 0),
- (instr->const_index[0] *
- 4 + i) * sizeof(float));
- }
+ *dest = indirect_uniform_load(c, instr);
break;
case nir_intrinsic_load_input:
- for (int i = 0; i < instr->num_components; i++)
- dest[i] = c->inputs[instr->const_index[0] * 4 + i];
-
+ assert(instr->num_components == 1);
+ if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
+ *dest = qir_TLB_COLOR_READ(c);
+ } else {
+ *dest = c->inputs[instr->const_index[0]];
+ }
break;
case nir_intrinsic_store_output:
- for (int i = 0; i < instr->num_components; i++) {
- c->outputs[instr->const_index[0] * 4 + i] =
- qir_MOV(c, ntq_get_src(c, instr->src[0], i));
- }
- c->num_outputs = MAX2(c->num_outputs,
- instr->const_index[0] * 4 +
- instr->num_components + 1);
+ assert(instr->num_components == 1);
+ c->outputs[instr->const_index[0]] =
+ qir_MOV(c, ntq_get_src(c, instr->src[0], 0));
+ c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1);
break;
case nir_intrinsic_discard:
@@ -1927,6 +1569,10 @@ ntq_emit_instr(struct vc4_compile *c, nir_instr *instr)
ntq_emit_load_const(c, nir_instr_as_load_const(instr));
break;
+ case nir_instr_type_ssa_undef:
+ ntq_emit_ssa_undef(c, nir_instr_as_ssa_undef(instr));
+ break;
+
case nir_instr_type_tex:
ntq_emit_tex(c, nir_instr_as_tex(instr));
break;
@@ -2084,13 +1730,17 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
c->s = tgsi_to_nir(tokens, &nir_options);
nir_opt_global_to_local(c->s);
nir_convert_to_ssa(c->s);
+ if (stage == QSTAGE_FRAG)
+ vc4_nir_lower_blend(c);
+ vc4_nir_lower_io(c);
nir_lower_idiv(c->s);
+ nir_lower_load_const_to_scalar(c->s);
vc4_optimize_nir(c->s);
nir_remove_dead_variables(c->s);
- nir_convert_from_ssa(c->s);
+ nir_convert_from_ssa(c->s, true);
if (vc4_debug & VC4_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",
@@ -2187,6 +1837,8 @@ copy_uniform_state_to_shader(struct vc4_compiled_shader *shader,
memcpy(uinfo->contents, c->uniform_contents,
count * sizeof(*uinfo->contents));
uinfo->num_texture_samples = c->num_texture_samples;
+
+ vc4_set_shader_uniform_dirty_flags(shader);
}
static struct vc4_compiled_shader *
@@ -2259,9 +1911,8 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
}
copy_uniform_state_to_shader(shader, c);
- shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts,
- c->qpu_inst_count * sizeof(uint64_t),
- "code");
+ shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts,
+ c->qpu_inst_count * sizeof(uint64_t));
/* Copy the compiler UBO range state to the compiled shader, dropping
* out arrays that were never referenced by an indirect load.
@@ -2288,10 +1939,12 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
}
}
if (shader->ubo_size) {
- fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
- qir_get_stage_name(c->stage),
- c->program_id, c->variant_id,
- shader->ubo_size / 4);
+ if (vc4_debug & VC4_DEBUG_SHADERDB) {
+ fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n",
+ qir_get_stage_name(c->stage),
+ c->program_id, c->variant_id,
+ shader->ubo_size / 4);
+ }
}
qir_compile_destroy(c);
@@ -2421,9 +2074,20 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode)
(prim_mode == PIPE_PRIM_POINTS &&
vc4->rasterizer->base.point_size_per_vertex);
- vc4->prog.vs = vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
+ struct vc4_compiled_shader *vs =
+ vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base);
+ if (vs != vc4->prog.vs) {
+ vc4->prog.vs = vs;
+ vc4->dirty |= VC4_DIRTY_COMPILED_VS;
+ }
+
key->is_coord = true;
- vc4->prog.cs = vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
+ struct vc4_compiled_shader *cs =
+ vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base);
+ if (cs != vc4->prog.cs) {
+ vc4->prog.cs = cs;
+ vc4->dirty |= VC4_DIRTY_COMPILED_CS;
+ }
}
void
@@ -2490,305 +2154,6 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
free(so);
}
-static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
-{
- switch (p_wrap) {
- case PIPE_TEX_WRAP_REPEAT:
- return 0;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- return 1;
- case PIPE_TEX_WRAP_MIRROR_REPEAT:
- return 2;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
- return 3;
- case PIPE_TEX_WRAP_CLAMP:
- return (using_nearest ? 1 : 3);
- default:
- fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
- assert(!"not reached");
- return 0;
- }
-}
-
-static void
-write_texture_p0(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t unit)
-{
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
-
- cl_reloc(vc4, &vc4->uniforms, rsc->bo,
- VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
- VC4_SET_FIELD(texture->u.tex.last_level -
- texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
- VC4_SET_FIELD(texture->target == PIPE_TEXTURE_CUBE,
- VC4_TEX_P0_CMMODE) |
- VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE));
-}
-
-static void
-write_texture_p1(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t unit)
-{
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
- struct pipe_sampler_state *sampler = texstate->samplers[unit];
- static const uint8_t minfilter_map[6] = {
- VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
- VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
- VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
- VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
- VC4_TEX_P1_MINFILT_NEAREST,
- VC4_TEX_P1_MINFILT_LINEAR,
- };
- static const uint32_t magfilter_map[] = {
- [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
- [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
- };
-
- bool either_nearest =
- (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
- sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
-
- cl_aligned_u32(&vc4->uniforms,
- VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
- VC4_SET_FIELD(texture->texture->height0 & 2047,
- VC4_TEX_P1_HEIGHT) |
- VC4_SET_FIELD(texture->texture->width0 & 2047,
- VC4_TEX_P1_WIDTH) |
- VC4_SET_FIELD(magfilter_map[sampler->mag_img_filter],
- VC4_TEX_P1_MAGFILT) |
- VC4_SET_FIELD(minfilter_map[sampler->min_mip_filter * 2 +
- sampler->min_img_filter],
- VC4_TEX_P1_MINFILT) |
- VC4_SET_FIELD(translate_wrap(sampler->wrap_s, either_nearest),
- VC4_TEX_P1_WRAP_S) |
- VC4_SET_FIELD(translate_wrap(sampler->wrap_t, either_nearest),
- VC4_TEX_P1_WRAP_T));
-}
-
-static void
-write_texture_p2(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t data)
-{
- uint32_t unit = data & 0xffff;
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
-
- cl_aligned_u32(&vc4->uniforms,
- VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
- VC4_TEX_P2_PTYPE) |
- VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
- VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
-}
-
-
-#define SWIZ(x,y,z,w) { \
- UTIL_FORMAT_SWIZZLE_##x, \
- UTIL_FORMAT_SWIZZLE_##y, \
- UTIL_FORMAT_SWIZZLE_##z, \
- UTIL_FORMAT_SWIZZLE_##w \
-}
-
-static void
-write_texture_border_color(struct vc4_context *vc4,
- struct vc4_texture_stateobj *texstate,
- uint32_t unit)
-{
- struct pipe_sampler_state *sampler = texstate->samplers[unit];
- struct pipe_sampler_view *texture = texstate->textures[unit];
- struct vc4_resource *rsc = vc4_resource(texture->texture);
- union util_color uc;
-
- const struct util_format_description *tex_format_desc =
- util_format_description(texture->format);
-
- float border_color[4];
- for (int i = 0; i < 4; i++)
- border_color[i] = sampler->border_color.f[i];
- if (util_format_is_srgb(texture->format)) {
- for (int i = 0; i < 3; i++)
- border_color[i] =
- util_format_linear_to_srgb_float(border_color[i]);
- }
-
- /* Turn the border color into the layout of channels that it would
- * have when stored as texture contents.
- */
- float storage_color[4];
- util_format_unswizzle_4f(storage_color,
- border_color,
- tex_format_desc->swizzle);
-
- /* Now, pack so that when the vc4_format-sampled texture contents are
- * replaced with our border color, the vc4_get_format_swizzle()
- * swizzling will get the right channels.
- */
- if (util_format_is_depth_or_stencil(texture->format)) {
- uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
- sampler->border_color.f[0]) << 8;
- } else {
- switch (rsc->vc4_format) {
- default:
- case VC4_TEXTURE_TYPE_RGBA8888:
- util_pack_color(storage_color,
- PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
- break;
- case VC4_TEXTURE_TYPE_RGBA4444:
- util_pack_color(storage_color,
- PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
- break;
- case VC4_TEXTURE_TYPE_RGB565:
- util_pack_color(storage_color,
- PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
- break;
- case VC4_TEXTURE_TYPE_ALPHA:
- uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
- break;
- case VC4_TEXTURE_TYPE_LUMALPHA:
- uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
- (float_to_ubyte(storage_color[0]) << 0));
- break;
- }
- }
-
- cl_aligned_u32(&vc4->uniforms, uc.ui[0]);
-}
-
-static uint32_t
-get_texrect_scale(struct vc4_texture_stateobj *texstate,
- enum quniform_contents contents,
- uint32_t data)
-{
- struct pipe_sampler_view *texture = texstate->textures[data];
- uint32_t dim;
-
- if (contents == QUNIFORM_TEXRECT_SCALE_X)
- dim = texture->texture->width0;
- else
- dim = texture->texture->height0;
-
- return fui(1.0f / dim);
-}
-
-static struct vc4_bo *
-vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
- const uint32_t *gallium_uniforms)
-{
- if (!shader->ubo_size)
- return NULL;
-
- struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
- uint32_t *data = vc4_bo_map(ubo);
- for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
- memcpy(data + shader->ubo_ranges[i].dst_offset,
- gallium_uniforms + shader->ubo_ranges[i].src_offset,
- shader->ubo_ranges[i].size);
- }
-
- return ubo;
-}
-
-void
-vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
- struct vc4_constbuf_stateobj *cb,
- struct vc4_texture_stateobj *texstate)
-{
- struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
- const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
- struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
-
- cl_ensure_space(&vc4->uniforms, (uinfo->count +
- uinfo->num_texture_samples) * 4);
-
- cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples);
-
- for (int i = 0; i < uinfo->count; i++) {
-
- switch (uinfo->contents[i]) {
- case QUNIFORM_CONSTANT:
- cl_aligned_u32(&vc4->uniforms, uinfo->data[i]);
- break;
- case QUNIFORM_UNIFORM:
- cl_aligned_u32(&vc4->uniforms,
- gallium_uniforms[uinfo->data[i]]);
- break;
- case QUNIFORM_VIEWPORT_X_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f);
- break;
- case QUNIFORM_VIEWPORT_Y_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f);
- break;
-
- case QUNIFORM_VIEWPORT_Z_OFFSET:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]);
- break;
- case QUNIFORM_VIEWPORT_Z_SCALE:
- cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]);
- break;
-
- case QUNIFORM_USER_CLIP_PLANE:
- cl_aligned_f(&vc4->uniforms,
- vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
- break;
-
- case QUNIFORM_TEXTURE_CONFIG_P0:
- write_texture_p0(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_TEXTURE_CONFIG_P1:
- write_texture_p1(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_TEXTURE_CONFIG_P2:
- write_texture_p2(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_UBO_ADDR:
- cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0);
- break;
-
- case QUNIFORM_TEXTURE_BORDER_COLOR:
- write_texture_border_color(vc4, texstate, uinfo->data[i]);
- break;
-
- case QUNIFORM_TEXRECT_SCALE_X:
- case QUNIFORM_TEXRECT_SCALE_Y:
- cl_aligned_u32(&vc4->uniforms,
- get_texrect_scale(texstate,
- uinfo->contents[i],
- uinfo->data[i]));
- break;
-
- case QUNIFORM_BLEND_CONST_COLOR:
- cl_aligned_f(&vc4->uniforms,
- CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1));
- break;
-
- case QUNIFORM_STENCIL:
- cl_aligned_u32(&vc4->uniforms,
- vc4->zsa->stencil_uniforms[uinfo->data[i]] |
- (uinfo->data[i] <= 1 ?
- (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
- 0));
- break;
-
- case QUNIFORM_ALPHA_REF:
- cl_aligned_f(&vc4->uniforms,
- vc4->zsa->base.alpha.ref_value);
- break;
- }
-#if 0
- uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
- fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
- shader, i, written_val, uif(written_val));
-#endif
- }
-}
-
static void
vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c
index 1c96ef4795f..254140a72f5 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -96,10 +96,6 @@ static const struct qir_op_info qir_op_info[] = {
[QOP_TEX_B] = { "tex_b", 0, 2 },
[QOP_TEX_DIRECT] = { "tex_direct", 0, 2 },
[QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
- [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 1 },
- [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 1 },
- [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 1 },
- [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 1 },
[QOP_UNPACK_8A_F] = { "unpack_8a_f", 1, 1 },
[QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 },
[QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 },
@@ -234,20 +230,6 @@ qir_writes_r4(struct qinst *inst)
}
}
-bool
-qir_reads_r4(struct qinst *inst)
-{
- switch (inst->op) {
- case QOP_R4_UNPACK_A:
- case QOP_R4_UNPACK_B:
- case QOP_R4_UNPACK_C:
- case QOP_R4_UNPACK_D:
- return true;
- default:
- return false;
- }
-}
-
static void
qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 732cfd0b306..cade795c12a 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -36,6 +36,11 @@
#include "util/list.h"
#include "util/u_math.h"
+#include "vc4_screen.h"
+#include "pipe/p_state.h"
+
+struct nir_builder;
+
enum qfile {
QFILE_NULL,
QFILE_TEMP,
@@ -155,10 +160,6 @@ enum qop {
* the destination
*/
QOP_TEX_RESULT,
- QOP_R4_UNPACK_A,
- QOP_R4_UNPACK_B,
- QOP_R4_UNPACK_C,
- QOP_R4_UNPACK_D
};
struct queued_qpu_inst {
@@ -243,7 +244,11 @@ enum quniform_contents {
QUNIFORM_TEXTURE_BORDER_COLOR,
- QUNIFORM_BLEND_CONST_COLOR,
+ QUNIFORM_BLEND_CONST_COLOR_X,
+ QUNIFORM_BLEND_CONST_COLOR_Y,
+ QUNIFORM_BLEND_CONST_COLOR_Z,
+ QUNIFORM_BLEND_CONST_COLOR_W,
+
QUNIFORM_STENCIL,
QUNIFORM_ALPHA_REF,
@@ -280,6 +285,52 @@ struct vc4_compiler_ubo_range {
bool used;
};
+struct vc4_key {
+ struct vc4_uncompiled_shader *shader_state;
+ struct {
+ enum pipe_format format;
+ unsigned compare_mode:1;
+ unsigned compare_func:3;
+ unsigned wrap_s:3;
+ unsigned wrap_t:3;
+ uint8_t swizzle[4];
+ } tex[VC4_MAX_TEXTURE_SAMPLERS];
+ uint8_t ucp_enables;
+};
+
+struct vc4_fs_key {
+ struct vc4_key base;
+ enum pipe_format color_format;
+ bool depth_enabled;
+ bool stencil_enabled;
+ bool stencil_twoside;
+ bool stencil_full_writemasks;
+ bool is_points;
+ bool is_lines;
+ bool alpha_test;
+ bool point_coord_upper_left;
+ bool light_twoside;
+ uint8_t alpha_test_func;
+ uint8_t logicop_func;
+ uint32_t point_sprite_mask;
+
+ struct pipe_rt_blend_state blend;
+};
+
+struct vc4_vs_key {
+ struct vc4_key base;
+
+ /**
+ * This is a proxy for the array of FS input semantics, which is
+ * larger than we would want to put in the key.
+ */
+ uint64_t compiled_fs_id;
+
+ enum pipe_format attr_formats[8];
+ bool is_coord;
+ bool per_vertex_point_size;
+};
+
struct vc4_compile {
struct vc4_context *vc4;
nir_shader *s;
@@ -369,6 +420,16 @@ struct vc4_compile {
uint32_t variant_id;
};
+/* Special nir_load_input intrinsic index for loading the current TLB
+ * destination color.
+ */
+#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000
+
+/* Special offset for nir_load_uniform values to get a QUNIFORM_*
+ * state-dependent value.
+ */
+#define VC4_NIR_STATE_UNIFORM_OFFSET 2000000000
+
struct vc4_compile *qir_compile_init(void);
void qir_compile_destroy(struct vc4_compile *c);
struct qinst *qir_inst(enum qop op, struct qreg dst,
@@ -393,7 +454,6 @@ bool qir_is_multi_instruction(struct qinst *inst);
bool qir_is_tex(struct qinst *inst);
bool qir_depends_on_flags(struct qinst *inst);
bool qir_writes_r4(struct qinst *inst);
-bool qir_reads_r4(struct qinst *inst);
bool qir_src_needs_a_file(struct qinst *inst);
struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
@@ -409,6 +469,12 @@ bool qir_opt_cse(struct vc4_compile *c);
bool qir_opt_dead_code(struct vc4_compile *c);
bool qir_opt_small_immediates(struct vc4_compile *c);
bool qir_opt_vpm_writes(struct vc4_compile *c);
+void vc4_nir_lower_blend(struct vc4_compile *c);
+void vc4_nir_lower_io(struct vc4_compile *c);
+nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
+ enum quniform_contents contents);
+nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
+ nir_ssa_def **srcs, int swiz);
void qir_lower_uniforms(struct vc4_compile *c);
void qpu_schedule_instructions(struct vc4_compile *c);
@@ -523,26 +589,11 @@ QIR_ALU0(FRAG_W)
QIR_ALU0(FRAG_REV_FLAG)
QIR_ALU0(TEX_RESULT)
QIR_ALU0(TLB_COLOR_READ)
+QIR_NODST_1(TLB_COLOR_WRITE)
QIR_NODST_1(TLB_Z_WRITE)
QIR_NODST_1(TLB_DISCARD_SETUP)
QIR_NODST_1(TLB_STENCIL_SETUP)
-static inline struct qreg
-qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
-{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, r4, c->undef));
- return t;
-}
-
-static inline struct qreg
-qir_SEL_X_0_COND(struct vc4_compile *c, int i)
-{
- struct qreg t = qir_get_temp(c);
- qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
- return t;
-}
-
static inline struct qreg
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
{
diff --git a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
index 910c89dca79..f087c3b81b5 100644
--- a/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
@@ -52,7 +52,7 @@ static void
add_uniform(struct hash_table *ht, struct qreg reg)
{
struct hash_entry *entry;
- void *key = (void *)(uintptr_t)reg.index;
+ void *key = (void *)(uintptr_t)(reg.index + 1);
entry = _mesa_hash_table_search(ht, key);
if (entry) {
@@ -66,7 +66,7 @@ static void
remove_uniform(struct hash_table *ht, struct qreg reg)
{
struct hash_entry *entry;
- void *key = (void *)(uintptr_t)reg.index;
+ void *key = (void *)(uintptr_t)(reg.index + 1);
entry = _mesa_hash_table_search(ht, key);
assert(entry);
@@ -122,7 +122,7 @@ qir_lower_uniforms(struct vc4_compile *c)
struct hash_entry *entry;
hash_table_foreach(ht, entry) {
uint32_t count = (uintptr_t)entry->data;
- uint32_t index = (uintptr_t)entry->key;
+ uint32_t index = (uintptr_t)entry->key - 1;
if (count > max_count) {
max_count = count;
max_index = index;
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h
index c9ab6344589..fbb90ba12a0 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -122,23 +122,23 @@ static inline struct qpu_reg qpu_r3(void) { return qpu_rn(3); }
static inline struct qpu_reg qpu_r4(void) { return qpu_rn(4); }
static inline struct qpu_reg qpu_r5(void) { return qpu_rn(5); }
-uint64_t qpu_NOP(void);
-uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src);
-uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src);
+uint64_t qpu_NOP(void) ATTRIBUTE_CONST;
+uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST;
+uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST;
uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst,
- struct qpu_reg src0, struct qpu_reg src1);
+ struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
- struct qpu_reg src0, struct qpu_reg src1);
-uint64_t qpu_merge_inst(uint64_t a, uint64_t b);
-uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val);
-uint64_t qpu_set_sig(uint64_t inst, uint32_t sig);
-uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond);
-uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond);
-uint32_t qpu_encode_small_immediate(uint32_t i);
+ struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
+uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
+uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
+uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
+uint32_t qpu_encode_small_immediate(uint32_t i) ATTRIBUTE_CONST;
-bool qpu_waddr_is_tlb(uint32_t waddr);
-bool qpu_inst_is_tlb(uint64_t inst);
-int qpu_num_sf_accesses(uint64_t inst);
+bool qpu_waddr_is_tlb(uint32_t waddr) ATTRIBUTE_CONST;
+bool qpu_inst_is_tlb(uint64_t inst) ATTRIBUTE_CONST;
+int qpu_num_sf_accesses(uint64_t inst) ATTRIBUTE_CONST;
void qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst);
static inline uint64_t
diff --git a/src/gallium/drivers/vc4/vc4_qpu_disasm.c b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
index 55e0e6139b5..00aeb300a9b 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_disasm.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_disasm.c
@@ -225,7 +225,7 @@ static const char *qpu_condflags[] = {
};
#define DESC(array, index) \
- ((index > ARRAY_SIZE(array) || !(array)[index]) ? \
+ ((index >= ARRAY_SIZE(array) || !(array)[index]) ? \
"???" : (array)[index])
static const char *
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 99afe4b8798..f324056258c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -234,6 +234,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QFILE_VPM:
assert((int)qinst->src[i].index >=
last_vpm_read_index);
+ (void)last_vpm_read_index;
last_vpm_read_index = qinst->src[i].index;
src[i] = qpu_ra(QPU_R_VPM);
break;
@@ -319,7 +320,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
abort();
}
- queue(c, qpu_a_MOV(dst, qpu_r4()));
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
@@ -402,6 +404,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_COLOR_LOAD);
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
case QOP_TLB_COLOR_WRITE:
@@ -451,21 +455,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
queue(c, qpu_NOP());
*last_inst(c) = qpu_set_sig(*last_inst(c),
QPU_SIG_LOAD_TMU0);
-
- break;
-
- case QOP_R4_UNPACK_A:
- case QOP_R4_UNPACK_B:
- case QOP_R4_UNPACK_C:
- case QOP_R4_UNPACK_D:
- assert(src[0].mux == QPU_MUX_R4);
- queue(c, qpu_a_MOV(dst, src[0]));
- *last_inst(c) |= QPU_PM;
- *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
- (qinst->op -
- QOP_R4_UNPACK_A),
- QPU_UNPACK);
-
+ if (dst.mux != QPU_MUX_R4)
+ queue(c, qpu_a_MOV(dst, qpu_r4()));
break;
case QOP_UNPACK_8A_F:
@@ -474,20 +465,30 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
case QOP_UNPACK_8D_F:
case QOP_UNPACK_16A_F:
case QOP_UNPACK_16B_F: {
- assert(src[0].mux == QPU_MUX_A);
+ if (src[0].mux == QPU_MUX_R4) {
+ queue(c, qpu_a_MOV(dst, src[0]));
+ *last_inst(c) |= QPU_PM;
+ *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
+ (qinst->op -
+ QOP_UNPACK_8A_F),
+ QPU_UNPACK);
+ } else {
+ assert(src[0].mux == QPU_MUX_A);
- /* Since we're setting the pack bits, if the
- * destination is in A it would get re-packed.
- */
- queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
- qpu_rb(31) : dst),
- src[0], src[0]));
- *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
- QOP_UNPACK_8A_F],
- QPU_UNPACK);
+ /* Since we're setting the pack bits, if the
+ * destination is in A it would get re-packed.
+ */
+ queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
+ qpu_rb(31) : dst),
+ src[0], src[0]));
+ *last_inst(c) |=
+ QPU_SET_FIELD(unpack_map[qinst->op -
+ QOP_UNPACK_8A_F],
+ QPU_UNPACK);
- if (dst.mux == QPU_MUX_A) {
- queue(c, qpu_a_MOV(dst, qpu_rb(31)));
+ if (dst.mux == QPU_MUX_A) {
+ queue(c, qpu_a_MOV(dst, qpu_rb(31)));
+ }
}
}
break;
diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c
index 8471edbf62c..9cf6841f41c 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_validate.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c
@@ -23,6 +23,13 @@
#include "vc4_qpu.h"
+#ifdef NDEBUG
+/* Since most of our code is used in assert()s, don't warn about dead code. */
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
static bool
writes_reg(uint64_t inst, uint32_t w)
{
diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 3b0b890b66a..a29db1f3abe 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -116,6 +116,8 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs));
vc4->reg_class_any = ra_alloc_reg_class(vc4->regs);
+ vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs);
+ vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) {
/* Reserve ra31/rb31 for spilling fixup_raddr_conflict() in
* vc4_qpu_emit.c
@@ -126,15 +128,18 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
/* R4 can't be written as a general purpose register. (it's
* TMU_NOSWAP as a write address).
*/
- if (vc4_regs[i].mux == QPU_MUX_R4)
+ if (vc4_regs[i].mux == QPU_MUX_R4) {
+ ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
continue;
+ }
ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
}
- vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
- for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2)
+ for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) {
ra_class_add_reg(vc4->regs, vc4->reg_class_a, i);
+ ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
+ }
ra_set_finalize(vc4->regs, NULL);
}
@@ -153,6 +158,10 @@ node_to_temp_priority(const void *in_a, const void *in_b)
return a->priority - b->priority;
}
+#define CLASS_BIT_A (1 << 0)
+#define CLASS_BIT_B_OR_ACC (1 << 1)
+#define CLASS_BIT_R4 (1 << 2)
+
/**
* Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
*
@@ -165,6 +174,7 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
uint32_t temp_to_node[c->num_temps];
uint32_t def[c->num_temps];
uint32_t use[c->num_temps];
+ uint8_t class_bits[c->num_temps];
struct qpu_reg *temp_registers = calloc(c->num_temps,
sizeof(*temp_registers));
memset(def, 0, sizeof(def));
@@ -181,10 +191,6 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
struct ra_graph *g = ra_alloc_interference_graph(vc4->regs,
c->num_temps);
- for (uint32_t i = 0; i < c->num_temps; i++) {
- ra_set_node_class(g, i, vc4->reg_class_any);
- }
-
/* Compute the live ranges so we can figure out interference.
*/
uint32_t ip = 0;
@@ -223,8 +229,33 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
temp_to_node[map[i].temp] = i;
}
- /* Figure out our register classes and preallocated registers*/
+ /* Figure out our register classes and preallocated registers. We
+ * start with any temp being able to be in any file, then instructions
+ * incrementally remove bits that the temp definitely can't be in.
+ */
+ memset(class_bits,
+ CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4,
+ sizeof(class_bits));
+
+ ip = 0;
list_for_each_entry(struct qinst, inst, &c->instructions, link) {
+ if (qir_writes_r4(inst)) {
+ /* This instruction writes r4 (and optionally moves
+ * its result to a temp), so nothing else can be
+ * stored in r4 across it.
+ */
+ for (int i = 0; i < c->num_temps; i++) {
+ if (def[i] < ip && use[i] > ip)
+ class_bits[i] &= ~CLASS_BIT_R4;
+ }
+ } else {
+ /* R4 can't be written as a general purpose
+ * register. (it's TMU_NOSWAP as a write address).
+ */
+ if (inst->dst.file == QFILE_TEMP)
+ class_bits[inst->dst.index] &= ~CLASS_BIT_R4;
+ }
+
switch (inst->op) {
case QOP_FRAG_Z:
ra_set_node_reg(g, temp_to_node[inst->dst.index],
@@ -236,17 +267,9 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
break;
- case QOP_TEX_RESULT:
- case QOP_TLB_COLOR_READ:
- assert(vc4_regs[ACC_INDEX + 4].mux == QPU_MUX_R4);
- ra_set_node_reg(g, temp_to_node[inst->dst.index],
- ACC_INDEX + 4);
- break;
-
case QOP_PACK_SCALED:
/* The pack flags require an A-file dst register. */
- ra_set_node_class(g, temp_to_node[inst->dst.index],
- vc4->reg_class_a);
+ class_bits[inst->dst.index] &= CLASS_BIT_A;
break;
default:
@@ -254,8 +277,30 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
}
if (qir_src_needs_a_file(inst)) {
- ra_set_node_class(g, temp_to_node[inst->src[0].index],
- vc4->reg_class_a);
+ class_bits[inst->src[0].index] &= CLASS_BIT_A;
+ }
+ ip++;
+ }
+
+ for (uint32_t i = 0; i < c->num_temps; i++) {
+ int node = temp_to_node[i];
+
+ switch (class_bits[i]) {
+ case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4:
+ case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
+ ra_set_node_class(g, node, vc4->reg_class_any);
+ break;
+ case CLASS_BIT_A | CLASS_BIT_R4:
+ ra_set_node_class(g, node, vc4->reg_class_r4_or_a);
+ break;
+ case CLASS_BIT_A:
+ ra_set_node_class(g, node, vc4->reg_class_a);
+ break;
+ default:
+ fprintf(stderr, "temp %d: bad class bits: 0x%x\n",
+ i, class_bits[i]);
+ abort();
+ break;
}
}
@@ -270,7 +315,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
}
bool ok = ra_allocate(g);
- assert(ok);
+ if (!ok) {
+ fprintf(stderr, "Failed to register allocate:\n");
+ qir_dump(c);
+ abort();
+ }
for (uint32_t i = 0; i < c->num_temps; i++) {
temp_registers[i] = vc4_regs[ra_get_node_reg(g, temp_to_node[i])];
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index cab76406055..5d5166fd818 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -102,6 +102,12 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
vc4_resource_bo_alloc(rsc);
+
+ /* If it might be bound as one of our vertex buffers, make
+ * sure we re-emit vertex buffer state.
+ */
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc4->dirty |= VC4_DIRTY_VTXBUF;
} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
if (vc4_cl_references_bo(pctx, rsc->bo)) {
if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
@@ -110,6 +116,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
prsc->height0 == box->height &&
prsc->depth0 == box->depth) {
vc4_resource_bo_alloc(rsc);
+ if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+ vc4->dirty |= VC4_DIRTY_VTXBUF;
} else {
vc4_flush(pctx);
}
diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h
index ab8f5d3cd55..87571b75e8b 100644
--- a/src/gallium/drivers/vc4/vc4_resource.h
+++ b/src/gallium/drivers/vc4/vc4_resource.h
@@ -82,19 +82,19 @@ struct vc4_resource {
struct pipe_resource *shadow_parent;
};
-static INLINE struct vc4_resource *
+static inline struct vc4_resource *
vc4_resource(struct pipe_resource *prsc)
{
return (struct vc4_resource *)prsc;
}
-static INLINE struct vc4_surface *
+static inline struct vc4_surface *
vc4_surface(struct pipe_surface *psurf)
{
return (struct vc4_surface *)psurf;
}
-static INLINE struct vc4_transfer *
+static inline struct vc4_transfer *
vc4_transfer(struct pipe_transfer *ptrans)
{
return (struct vc4_transfer *)ptrans;
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index f63bead0fbb..2dee1d40e5f 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -176,6 +176,10 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+ case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+ case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+ case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+ case PIPE_CAP_DEPTH_BOUNDS_TEST:
return 0;
/* Stream output. */
@@ -489,6 +493,12 @@ vc4_screen_bo_get_handle(struct pipe_screen *pscreen,
{
whandle->stride = stride;
+ /* If we're passing some reference to our BO out to some other part of
+ * the system, then we can't do any optimizations about only us being
+ * the ones seeing it (like BO caching or shadow update avoidance).
+ */
+ bo->private = false;
+
switch (whandle->type) {
case DRM_API_HANDLE_TYPE_SHARED:
return vc4_bo_flink(bo, &whandle->handle);
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index b58013dd2ee..7cfd236349d 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -74,11 +74,12 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec)
struct vc4_bo **bos = vc4->bo_pointers.base;
exec->bo_count = args->bo_handle_count;
- exec->bo = calloc(exec->bo_count, sizeof(struct vc4_bo_exec_state));
+ exec->bo = calloc(exec->bo_count, sizeof(void *));
for (int i = 0; i < exec->bo_count; i++) {
struct vc4_bo *bo = bos[i];
struct drm_gem_cma_object *obj = vc4_wrap_bo_with_cma(dev, bo);
+ struct drm_vc4_bo *drm_bo = to_vc4_bo(&obj->base);
#if 0
fprintf(stderr, "bo hindex %d: %s\n", i, bo->name);
#endif
@@ -86,7 +87,16 @@ vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec)
vc4_bo_map(bo);
memcpy(obj->vaddr, bo->map, bo->size);
- exec->bo[i].bo = obj;
+ exec->bo[i] = obj;
+
+ /* The kernel does this validation at shader create ioctl
+ * time.
+ */
+ if (strcmp(bo->name, "code") == 0) {
+ drm_bo->validated_shader = vc4_validate_shader(obj);
+ if (!drm_bo->validated_shader)
+ abort();
+ }
}
return 0;
}
@@ -95,7 +105,7 @@ static int
vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
{
for (int i = 0; i < exec->bo_count; i++) {
- struct drm_gem_cma_object *obj = exec->bo[i].bo;
+ struct drm_gem_cma_object *obj = exec->bo[i];
struct vc4_bo *bo = to_vc4_bo(&obj->base)->bo;
memcpy(bo->map, obj->vaddr, bo->size);
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h
index 2bb36b253bb..68ace0216aa 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.h
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -78,6 +78,7 @@ struct drm_gem_cma_object {
struct drm_vc4_bo {
struct drm_gem_cma_object base;
struct vc4_bo *bo;
+ struct vc4_validated_shader_info *validated_shader;
struct list_head unref_head;
};
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 4a1d4c3a4d6..8a759c2ca4c 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -107,7 +107,7 @@ vc4_create_rasterizer_state(struct pipe_context *pctx,
/* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
* BCM21553).
*/
- so->point_size = MAX2(cso->point_size, .125);
+ so->point_size = MAX2(cso->point_size, .125f);
if (cso->front_ccw)
so->config_bits[0] |= VC4_CONFIG_BITS_CW_PRIMITIVES;
@@ -461,11 +461,64 @@ vc4_get_stage_tex(struct vc4_context *vc4, unsigned shader)
}
}
+static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
+{
+ switch (p_wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return 0;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return 1;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return 2;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return 3;
+ case PIPE_TEX_WRAP_CLAMP:
+ return (using_nearest ? 1 : 3);
+ default:
+ fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
+ assert(!"not reached");
+ return 0;
+ }
+}
+
static void *
vc4_create_sampler_state(struct pipe_context *pctx,
const struct pipe_sampler_state *cso)
{
- return vc4_generic_cso_state_create(cso, sizeof(*cso));
+ static const uint8_t minfilter_map[6] = {
+ VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
+ VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
+ VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
+ VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
+ VC4_TEX_P1_MINFILT_NEAREST,
+ VC4_TEX_P1_MINFILT_LINEAR,
+ };
+ static const uint32_t magfilter_map[] = {
+ [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
+ [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
+ };
+ bool either_nearest =
+ (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+ cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+ struct vc4_sampler_state *so = CALLOC_STRUCT(vc4_sampler_state);
+
+ if (!so)
+ return NULL;
+
+ memcpy(so, cso, sizeof(*cso));
+
+ so->texture_p1 =
+ (VC4_SET_FIELD(magfilter_map[cso->mag_img_filter],
+ VC4_TEX_P1_MAGFILT) |
+ VC4_SET_FIELD(minfilter_map[cso->min_mip_filter * 2 +
+ cso->min_img_filter],
+ VC4_TEX_P1_MINFILT) |
+ VC4_SET_FIELD(translate_wrap(cso->wrap_s, either_nearest),
+ VC4_TEX_P1_WRAP_S) |
+ VC4_SET_FIELD(translate_wrap(cso->wrap_t, either_nearest),
+ VC4_TEX_P1_WRAP_T));
+
+ return so;
}
static void
@@ -499,13 +552,13 @@ static struct pipe_sampler_view *
vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
const struct pipe_sampler_view *cso)
{
- struct pipe_sampler_view *so = malloc(sizeof(*so));
+ struct vc4_sampler_view *so = malloc(sizeof(*so));
struct vc4_resource *rsc = vc4_resource(prsc);
if (!so)
return NULL;
- *so = *cso;
+ so->base = *cso;
pipe_reference(NULL, &prsc->reference);
@@ -516,18 +569,19 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
* Also, Raspberry Pi doesn't support sampling from raster textures,
* so we also have to copy to a temporary then.
*/
- if (so->u.tex.first_level ||
+ if (cso->u.tex.first_level ||
rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
struct vc4_resource *shadow_parent = vc4_resource(prsc);
struct pipe_resource tmpl = shadow_parent->base.b;
struct vc4_resource *clone;
tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
- tmpl.width0 = u_minify(tmpl.width0, so->u.tex.first_level);
- tmpl.height0 = u_minify(tmpl.height0, so->u.tex.first_level);
- tmpl.last_level = so->u.tex.last_level - so->u.tex.first_level;
+ tmpl.width0 = u_minify(tmpl.width0, cso->u.tex.first_level);
+ tmpl.height0 = u_minify(tmpl.height0, cso->u.tex.first_level);
+ tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;
prsc = vc4_resource_create(pctx->screen, &tmpl);
+ rsc = vc4_resource(prsc);
clone = vc4_resource(prsc);
clone->shadow_parent = &shadow_parent->base.b;
/* Flag it as needing update of the contents from the parent. */
@@ -535,11 +589,23 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
}
- so->texture = prsc;
- so->reference.count = 1;
- so->context = pctx;
+ so->base.texture = prsc;
+ so->base.reference.count = 1;
+ so->base.context = pctx;
- return so;
+ so->texture_p0 =
+ (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
+ VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
+ VC4_SET_FIELD(cso->u.tex.last_level -
+ cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
+ VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE,
+ VC4_TEX_P0_CMMODE));
+ so->texture_p1 =
+ (VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
+ VC4_SET_FIELD(prsc->height0 & 2047, VC4_TEX_P1_HEIGHT) |
+ VC4_SET_FIELD(prsc->width0 & 2047, VC4_TEX_P1_WIDTH));
+
+ return &so->base;
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_tiling.c b/src/gallium/drivers/vc4/vc4_tiling.c
index f9801c9cefd..cf86eb0fa31 100644
--- a/src/gallium/drivers/vc4/vc4_tiling.c
+++ b/src/gallium/drivers/vc4/vc4_tiling.c
@@ -127,13 +127,10 @@ vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp)
static void
check_box_utile_alignment(const struct pipe_box *box, int cpp)
{
- uint32_t utile_w = vc4_utile_width(cpp);
- uint32_t utile_h = vc4_utile_height(cpp);
-
- assert(!(box->x & (utile_w - 1)));
- assert(!(box->y & (utile_h - 1)));
- assert(!(box->width & (utile_w - 1)));
- assert(!(box->height & (utile_h - 1)));
+ assert(!(box->x & (vc4_utile_width(cpp) - 1)));
+ assert(!(box->y & (vc4_utile_height(cpp) - 1)));
+ assert(!(box->width & (vc4_utile_width(cpp) - 1)));
+ assert(!(box->height & (vc4_utile_height(cpp) - 1)));
}
static void
diff --git a/src/gallium/drivers/vc4/vc4_tiling.h b/src/gallium/drivers/vc4/vc4_tiling.h
index b5d10da3417..b90bba70200 100644
--- a/src/gallium/drivers/vc4/vc4_tiling.h
+++ b/src/gallium/drivers/vc4/vc4_tiling.h
@@ -24,9 +24,9 @@
#ifndef VC4_TILING_H
#define VC4_TILING_H
-uint32_t vc4_utile_width(int cpp);
-uint32_t vc4_utile_height(int cpp);
-bool vc4_size_is_lt(uint32_t width, uint32_t height, int cpp);
+uint32_t vc4_utile_width(int cpp) ATTRIBUTE_CONST;
+uint32_t vc4_utile_height(int cpp) ATTRIBUTE_CONST;
+bool vc4_size_is_lt(uint32_t width, uint32_t height, int cpp) ATTRIBUTE_CONST;
void vc4_load_utile(void *dst, void *src, uint32_t dst_stride, uint32_t cpp);
void vc4_store_utile(void *dst, void *src, uint32_t src_stride, uint32_t cpp);
void vc4_load_tiled_image(void *dst, uint32_t dst_stride,
diff --git a/src/gallium/drivers/vc4/vc4_uniforms.c b/src/gallium/drivers/vc4/vc4_uniforms.c
new file mode 100644
index 00000000000..85d6998205e
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "util/u_pack_color.h"
+#include "util/format_srgb.h"
+
+#include "vc4_context.h"
+#include "vc4_qir.h"
+
+static void
+write_texture_p0(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct vc4_sampler_view *sview =
+ vc4_sampler_view(texstate->textures[unit]);
+ struct vc4_resource *rsc = vc4_resource(sview->base.texture);
+
+ cl_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, sview->texture_p0);
+}
+
+static void
+write_texture_p1(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct vc4_sampler_view *sview =
+ vc4_sampler_view(texstate->textures[unit]);
+ struct vc4_sampler_state *sampler =
+ vc4_sampler_state(texstate->samplers[unit]);
+
+ cl_aligned_u32(uniforms, sview->texture_p1 | sampler->texture_p1);
+}
+
+static void
+write_texture_p2(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t data)
+{
+ uint32_t unit = data & 0xffff;
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc4_resource *rsc = vc4_resource(texture->texture);
+
+ cl_aligned_u32(uniforms,
+ VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
+ VC4_TEX_P2_PTYPE) |
+ VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) |
+ VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
+}
+
+
+#define SWIZ(x,y,z,w) { \
+ UTIL_FORMAT_SWIZZLE_##x, \
+ UTIL_FORMAT_SWIZZLE_##y, \
+ UTIL_FORMAT_SWIZZLE_##z, \
+ UTIL_FORMAT_SWIZZLE_##w \
+}
+
+static void
+write_texture_border_color(struct vc4_context *vc4,
+ struct vc4_cl_out **uniforms,
+ struct vc4_texture_stateobj *texstate,
+ uint32_t unit)
+{
+ struct pipe_sampler_state *sampler = texstate->samplers[unit];
+ struct pipe_sampler_view *texture = texstate->textures[unit];
+ struct vc4_resource *rsc = vc4_resource(texture->texture);
+ union util_color uc;
+
+ const struct util_format_description *tex_format_desc =
+ util_format_description(texture->format);
+
+ float border_color[4];
+ for (int i = 0; i < 4; i++)
+ border_color[i] = sampler->border_color.f[i];
+ if (util_format_is_srgb(texture->format)) {
+ for (int i = 0; i < 3; i++)
+ border_color[i] =
+ util_format_linear_to_srgb_float(border_color[i]);
+ }
+
+ /* Turn the border color into the layout of channels that it would
+ * have when stored as texture contents.
+ */
+ float storage_color[4];
+ util_format_unswizzle_4f(storage_color,
+ border_color,
+ tex_format_desc->swizzle);
+
+ /* Now, pack so that when the vc4_format-sampled texture contents are
+ * replaced with our border color, the vc4_get_format_swizzle()
+ * swizzling will get the right channels.
+ */
+ if (util_format_is_depth_or_stencil(texture->format)) {
+ uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
+ sampler->border_color.f[0]) << 8;
+ } else {
+ switch (rsc->vc4_format) {
+ default:
+ case VC4_TEXTURE_TYPE_RGBA8888:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
+ break;
+ case VC4_TEXTURE_TYPE_RGBA4444:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
+ break;
+ case VC4_TEXTURE_TYPE_RGB565:
+ util_pack_color(storage_color,
+ PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ break;
+ case VC4_TEXTURE_TYPE_ALPHA:
+ uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
+ break;
+ case VC4_TEXTURE_TYPE_LUMALPHA:
+ uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
+ (float_to_ubyte(storage_color[0]) << 0));
+ break;
+ }
+ }
+
+ cl_aligned_u32(uniforms, uc.ui[0]);
+}
+
+static uint32_t
+get_texrect_scale(struct vc4_texture_stateobj *texstate,
+ enum quniform_contents contents,
+ uint32_t data)
+{
+ struct pipe_sampler_view *texture = texstate->textures[data];
+ uint32_t dim;
+
+ if (contents == QUNIFORM_TEXRECT_SCALE_X)
+ dim = texture->texture->width0;
+ else
+ dim = texture->texture->height0;
+
+ return fui(1.0f / dim);
+}
+
+static struct vc4_bo *
+vc4_upload_ubo(struct vc4_context *vc4,
+ struct vc4_compiled_shader *shader,
+ const uint32_t *gallium_uniforms)
+{
+ if (!shader->ubo_size)
+ return NULL;
+
+ struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo");
+ uint32_t *data = vc4_bo_map(ubo);
+ for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) {
+ memcpy(data + shader->ubo_ranges[i].dst_offset,
+ gallium_uniforms + shader->ubo_ranges[i].src_offset,
+ shader->ubo_ranges[i].size);
+ }
+
+ return ubo;
+}
+
+void
+vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
+ struct vc4_constbuf_stateobj *cb,
+ struct vc4_texture_stateobj *texstate)
+{
+ struct vc4_shader_uniform_info *uinfo = &shader->uniforms;
+ const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
+ struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms);
+
+ cl_ensure_space(&vc4->uniforms, (uinfo->count +
+ uinfo->num_texture_samples) * 4);
+
+ struct vc4_cl_out *uniforms =
+ cl_start_shader_reloc(&vc4->uniforms,
+ uinfo->num_texture_samples);
+
+ for (int i = 0; i < uinfo->count; i++) {
+
+ switch (uinfo->contents[i]) {
+ case QUNIFORM_CONSTANT:
+ cl_aligned_u32(&uniforms, uinfo->data[i]);
+ break;
+ case QUNIFORM_UNIFORM:
+ cl_aligned_u32(&uniforms,
+ gallium_uniforms[uinfo->data[i]]);
+ break;
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f);
+ break;
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ cl_aligned_f(&uniforms, vc4->viewport.scale[1] * 16.0f);
+ break;
+
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ cl_aligned_f(&uniforms, vc4->viewport.translate[2]);
+ break;
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ cl_aligned_f(&uniforms, vc4->viewport.scale[2]);
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ cl_aligned_f(&uniforms,
+ vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P0:
+ write_texture_p0(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ write_texture_p1(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P2:
+ write_texture_p2(vc4, &uniforms, texstate,
+ uinfo->data[i]);
+ break;
+
+ case QUNIFORM_UBO_ADDR:
+ cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
+ break;
+
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ write_texture_border_color(vc4, &uniforms,
+ texstate, uinfo->data[i]);
+ break;
+
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ cl_aligned_u32(&uniforms,
+ get_texrect_scale(texstate,
+ uinfo->contents[i],
+ uinfo->data[i]));
+ break;
+
+ case QUNIFORM_BLEND_CONST_COLOR_X:
+ case QUNIFORM_BLEND_CONST_COLOR_Y:
+ case QUNIFORM_BLEND_CONST_COLOR_Z:
+ case QUNIFORM_BLEND_CONST_COLOR_W:
+ cl_aligned_f(&uniforms,
+ CLAMP(vc4->blend_color.color[uinfo->contents[i] -
+ QUNIFORM_BLEND_CONST_COLOR_X],
+ 0, 1));
+ break;
+
+ case QUNIFORM_STENCIL:
+ cl_aligned_u32(&uniforms,
+ vc4->zsa->stencil_uniforms[uinfo->data[i]] |
+ (uinfo->data[i] <= 1 ?
+ (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+ 0));
+ break;
+
+ case QUNIFORM_ALPHA_REF:
+ cl_aligned_f(&uniforms,
+ vc4->zsa->base.alpha.ref_value);
+ break;
+ }
+#if 0
+ uint32_t written_val = *((uint32_t *)uniforms - 1);
+ fprintf(stderr, "%p: %d / 0x%08x (%f)\n",
+ shader, i, written_val, uif(written_val));
+#endif
+ }
+
+ cl_end(&vc4->uniforms, uniforms);
+
+ vc4_bo_unreference(&ubo);
+}
+
+void
+vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
+{
+ uint32_t dirty = 0;
+
+ for (int i = 0; i < shader->uniforms.count; i++) {
+ switch (shader->uniforms.contents[i]) {
+ case QUNIFORM_CONSTANT:
+ break;
+ case QUNIFORM_UNIFORM:
+ case QUNIFORM_UBO_ADDR:
+ dirty |= VC4_DIRTY_CONSTBUF;
+ break;
+
+ case QUNIFORM_VIEWPORT_X_SCALE:
+ case QUNIFORM_VIEWPORT_Y_SCALE:
+ case QUNIFORM_VIEWPORT_Z_OFFSET:
+ case QUNIFORM_VIEWPORT_Z_SCALE:
+ dirty |= VC4_DIRTY_VIEWPORT;
+ break;
+
+ case QUNIFORM_USER_CLIP_PLANE:
+ dirty |= VC4_DIRTY_CLIP;
+ break;
+
+ case QUNIFORM_TEXTURE_CONFIG_P0:
+ case QUNIFORM_TEXTURE_CONFIG_P1:
+ case QUNIFORM_TEXTURE_CONFIG_P2:
+ case QUNIFORM_TEXTURE_BORDER_COLOR:
+ case QUNIFORM_TEXRECT_SCALE_X:
+ case QUNIFORM_TEXRECT_SCALE_Y:
+ dirty |= VC4_DIRTY_TEXSTATE;
+ break;
+
+ case QUNIFORM_BLEND_CONST_COLOR_X:
+ case QUNIFORM_BLEND_CONST_COLOR_Y:
+ case QUNIFORM_BLEND_CONST_COLOR_Z:
+ case QUNIFORM_BLEND_CONST_COLOR_W:
+ dirty |= VC4_DIRTY_BLEND_COLOR;
+ break;
+
+ case QUNIFORM_STENCIL:
+ case QUNIFORM_ALPHA_REF:
+ dirty |= VC4_DIRTY_ZSA;
+ break;
+ }
+ }
+
+ shader->uniform_dirty_bits = dirty;
+}
diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h
index 0e953695b52..7eed57018b7 100644
--- a/src/gallium/include/pipe/p_compiler.h
+++ b/src/gallium/include/pipe/p_compiler.h
@@ -94,11 +94,6 @@ typedef unsigned char boolean;
#endif
#endif
-/* XXX: Use standard `inline` keyword instead */
-#ifndef INLINE
-# define INLINE inline
-#endif
-
/* Forced function inlining */
#ifndef ALWAYS_INLINE
# ifdef __GNUC__
@@ -106,7 +101,7 @@ typedef unsigned char boolean;
# elif defined(_MSC_VER)
# define ALWAYS_INLINE __forceinline
# else
-# define ALWAYS_INLINE INLINE
+# define ALWAYS_INLINE inline
# endif
#endif
diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index 794aabe85f2..ac14f86fdc4 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -100,8 +100,8 @@
#else
#define PIPE_ARCH_SSE
#endif
-#if defined(PIPE_CC_GCC) && !defined(__SSSE3__)
-/* #warning SSE3 support requires -msse3 compiler options */
+#if defined(PIPE_CC_GCC) && (__GNUC__ * 100 + __GNUC_MINOR__) < 409 && !defined(__SSSE3__)
+/* #warning SSE3 support requires -msse3 compiler options before GCC 4.9 */
#else
#define PIPE_ARCH_SSSE3
#endif
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index c2eedf8e7c7..f89dae98a2f 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -48,6 +48,7 @@ struct pipe_depth_stencil_alpha_state;
struct pipe_draw_info;
struct pipe_fence_handle;
struct pipe_framebuffer_state;
+struct pipe_image_view;
struct pipe_index_buffer;
struct pipe_query;
struct pipe_poly_stipple;
@@ -57,6 +58,7 @@ struct pipe_resource;
struct pipe_sampler_state;
struct pipe_sampler_view;
struct pipe_scissor_state;
+struct pipe_shader_buffer;
struct pipe_shader_state;
struct pipe_stencil_ref;
struct pipe_stream_output_target;
@@ -236,20 +238,38 @@ struct pipe_context {
const float default_inner_level[2]);
/**
- * Bind an array of shader resources that will be used by the
- * graphics pipeline. Any resources that were previously bound to
- * the specified range will be unbound after this call.
+ * Bind an array of shader buffers that will be used by a shader.
+ * Any buffers that were previously bound to the specified range
+ * will be unbound.
*
- * \param start first resource to bind.
- * \param count number of consecutive resources to bind.
- * \param resources array of pointers to the resources to bind, it
+ * \param shader selects shader stage
+ * \param start_slot first buffer slot to bind.
+ * \param count number of consecutive buffers to bind.
+ * \param buffers array of pointers to the buffers to bind, it
* should contain at least \a count elements
- * unless it's NULL, in which case no new
- * resources will be bound.
+ * unless it's NULL, in which case no buffers will
+ * be bound.
*/
- void (*set_shader_resources)(struct pipe_context *,
- unsigned start, unsigned count,
- struct pipe_surface **resources);
+ void (*set_shader_buffers)(struct pipe_context *, unsigned shader,
+ unsigned start_slot, unsigned count,
+ struct pipe_shader_buffer *buffers);
+
+ /**
+ * Bind an array of images that will be used by a shader.
+ * Any images that were previously bound to the specified range
+ * will be unbound.
+ *
+ * \param shader selects shader stage
+ * \param start_slot first image slot to bind.
+ * \param count number of consecutive images to bind.
+ * \param buffers array of pointers to the images to bind, it
+ * should contain at least \a count elements
+ * unless it's NULL, in which case no images will
+ * be bound.
+ */
+ void (*set_shader_images)(struct pipe_context *, unsigned shader,
+ unsigned start_slot, unsigned count,
+ struct pipe_image_view **images);
void (*set_vertex_buffers)( struct pipe_context *,
unsigned start_slot,
@@ -361,8 +381,14 @@ struct pipe_context {
const void *clear_value,
int clear_value_size);
- /** Flush draw commands
+ /**
+ * Flush draw commands
*
+ * NOTE: use screen->fence_reference() (or equivalent) to transfer
+ * new fence ref to **fence, to ensure that previous fence is unref'd
+ *
+ * \param fence if not NULL, an old fence to unref and transfer a
+ * new fence reference to
* \param flags bitfield of enum pipe_flush_flags values.
*/
void (*flush)(struct pipe_context *pipe,
@@ -391,6 +417,17 @@ struct pipe_context {
void (*surface_destroy)(struct pipe_context *ctx,
struct pipe_surface *);
+ /**
+ * Create an image view into a buffer or texture to be used with load,
+ * store, and atomic instructions by a shader stage.
+ */
+ struct pipe_image_view * (*create_image_view)(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ const struct pipe_image_view *templat);
+
+ void (*image_view_destroy)(struct pipe_context *ctx,
+ struct pipe_image_view *view);
+
/**
* Map a resource.
*
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 88b7b7699c1..2ba56eac793 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -351,9 +351,10 @@ enum pipe_flush_flags
#define PIPE_BIND_CURSOR (1 << 11) /* mouse cursor */
#define PIPE_BIND_CUSTOM (1 << 12) /* state-tracker/winsys usages */
#define PIPE_BIND_GLOBAL (1 << 13) /* set_global_binding */
-#define PIPE_BIND_SHADER_RESOURCE (1 << 14) /* set_shader_resources */
-#define PIPE_BIND_COMPUTE_RESOURCE (1 << 15) /* set_compute_resources */
-#define PIPE_BIND_COMMAND_ARGS_BUFFER (1 << 16) /* pipe_draw_info.indirect */
+#define PIPE_BIND_SHADER_BUFFER (1 << 14) /* set_shader_buffers */
+#define PIPE_BIND_SHADER_IMAGE (1 << 15) /* set_shader_images */
+#define PIPE_BIND_COMPUTE_RESOURCE (1 << 16) /* set_compute_resources */
+#define PIPE_BIND_COMMAND_ARGS_BUFFER (1 << 17) /* pipe_draw_info.indirect */
/**
* The first two flags above were previously part of the amorphous
@@ -374,9 +375,9 @@ enum pipe_flush_flags
* The third flag has been added to be able to force textures to be created
* in linear mode (no tiling).
*/
-#define PIPE_BIND_SCANOUT (1 << 17) /* */
-#define PIPE_BIND_SHARED (1 << 18) /* get_texture_handle ??? */
-#define PIPE_BIND_LINEAR (1 << 19)
+#define PIPE_BIND_SCANOUT (1 << 18) /* */
+#define PIPE_BIND_SHARED (1 << 19) /* get_texture_handle ??? */
+#define PIPE_BIND_LINEAR (1 << 20)
/**
@@ -605,6 +606,10 @@ enum pipe_cap
PIPE_CAP_MULTISAMPLE_Z_RESOLVE,
PIPE_CAP_RESOURCE_FROM_USER_MEMORY,
PIPE_CAP_DEVICE_RESET_STATUS_QUERY,
+ PIPE_CAP_MAX_SHADER_PATCH_VARYINGS,
+ PIPE_CAP_TEXTURE_FLOAT_LINEAR,
+ PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR,
+ PIPE_CAP_DEPTH_BOUNDS_TEST,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
@@ -700,7 +705,8 @@ enum pipe_compute_cap
PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY,
PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS,
- PIPE_COMPUTE_CAP_IMAGES_SUPPORTED
+ PIPE_COMPUTE_CAP_IMAGES_SUPPORTED,
+ PIPE_COMPUTE_CAP_SUBGROUP_SIZE
};
/**
@@ -759,6 +765,7 @@ union pipe_query_result
/* PIPE_QUERY_PRIMITIVES_GENERATED */
/* PIPE_QUERY_PRIMITIVES_EMITTED */
/* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+ /* PIPE_DRIVER_QUERY_TYPE_HZ */
uint64_t u64;
/* PIPE_DRIVER_QUERY_TYPE_UINT */
@@ -787,11 +794,13 @@ union pipe_color_union
enum pipe_driver_query_type
{
- PIPE_DRIVER_QUERY_TYPE_UINT64 = 0,
- PIPE_DRIVER_QUERY_TYPE_UINT = 1,
- PIPE_DRIVER_QUERY_TYPE_FLOAT = 2,
- PIPE_DRIVER_QUERY_TYPE_PERCENTAGE = 3,
- PIPE_DRIVER_QUERY_TYPE_BYTES = 4,
+ PIPE_DRIVER_QUERY_TYPE_UINT64 = 0,
+ PIPE_DRIVER_QUERY_TYPE_UINT = 1,
+ PIPE_DRIVER_QUERY_TYPE_FLOAT = 2,
+ PIPE_DRIVER_QUERY_TYPE_PERCENTAGE = 3,
+ PIPE_DRIVER_QUERY_TYPE_BYTES = 4,
+ PIPE_DRIVER_QUERY_TYPE_MICROSECONDS = 5,
+ PIPE_DRIVER_QUERY_TYPE_HZ = 6,
};
enum pipe_driver_query_group_type
@@ -800,6 +809,15 @@ enum pipe_driver_query_group_type
PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
};
+/* Whether an average value per frame or a cumulative value should be
+ * displayed.
+ */
+enum pipe_driver_query_result_type
+{
+ PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE = 0,
+ PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
+};
+
union pipe_numeric_type_union
{
uint64_t u64;
@@ -813,6 +831,7 @@ struct pipe_driver_query_info
unsigned query_type; /* PIPE_QUERY_DRIVER_SPECIFIC + i */
union pipe_numeric_type_union max_value; /* max value that can be returned */
enum pipe_driver_query_type type;
+ enum pipe_driver_query_result_type result_type;
unsigned group_id;
};
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 98b2159defe..0d2658313e5 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -211,12 +211,6 @@ struct pipe_screen {
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence );
- /**
- * Checks whether the fence has been signalled.
- */
- boolean (*fence_signalled)( struct pipe_screen *screen,
- struct pipe_fence_handle *fence );
-
/**
* Wait for the fence to finish.
* \param timeout in nanoseconds (may be PIPE_TIMEOUT_INFINITE).
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index bb57e805c29..6e07b2c5c7c 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -685,7 +685,7 @@ struct tgsi_src_register
*
* File, Index and Swizzle are handled the same as in tgsi_src_register.
*
- * If ArrayID is zero the whole register file might be is indirectly addressed,
+ * If ArrayID is zero the whole register file might be indirectly addressed,
* if not only the Declaration with this ArrayID is accessed by this operand.
*
*/
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index a18f12e8a87..1e493f47ccf 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -61,7 +61,8 @@ extern "C" {
#define PIPE_MAX_SHADER_INPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */
#define PIPE_MAX_SHADER_OUTPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */
#define PIPE_MAX_SHADER_SAMPLER_VIEWS 32
-#define PIPE_MAX_SHADER_RESOURCES 32
+#define PIPE_MAX_SHADER_BUFFERS 32
+#define PIPE_MAX_SHADER_IMAGES 32
#define PIPE_MAX_TEXTURE_LEVELS 16
#define PIPE_MAX_SO_BUFFERS 4
#define PIPE_MAX_SO_OUTPUTS 64
@@ -222,6 +223,9 @@ struct pipe_depth_state
unsigned enabled:1; /**< depth test enabled? */
unsigned writemask:1; /**< allow depth buffer writes? */
unsigned func:3; /**< depth test func (PIPE_FUNC_x) */
+ unsigned bounds_test:1; /**< depth bounds test enabled? */
+ float bounds_min; /**< minimum depth bound */
+ float bounds_max; /**< maximum depth bound */
};
@@ -387,6 +391,31 @@ struct pipe_sampler_view
};
+/**
+ * A view into a writable buffer or texture that can be bound to a shader
+ * stage.
+ */
+struct pipe_image_view
+{
+ struct pipe_reference reference;
+ struct pipe_resource *resource; /**< resource into which this is a view */
+ struct pipe_context *context; /**< context this view belongs to */
+ enum pipe_format format; /**< typed PIPE_FORMAT_x */
+
+ union {
+ struct {
+ unsigned first_layer:16; /**< first layer to use for array textures */
+ unsigned last_layer:16; /**< last layer to use for array textures */
+ unsigned level:8; /**< mipmap level to use */
+ } tex;
+ struct {
+ unsigned first_element;
+ unsigned last_element;
+ } buf;
+ } u;
+};
+
+
/**
* Subregion of 1D/2D/3D image resource.
*/
@@ -467,6 +496,16 @@ struct pipe_constant_buffer
};
+/**
+ * An untyped shader buffer supporting loads, stores, and atomics.
+ */
+struct pipe_shader_buffer {
+ struct pipe_resource *buffer; /**< the actual buffer */
+ unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */
+ unsigned buffer_size; /**< how much data can be read in shader */
+};
+
+
/**
* A stream output target. The structure specifies the range vertices can
* be written to.
diff --git a/src/gallium/include/pipe/p_video_enums.h b/src/gallium/include/pipe/p_video_enums.h
index e28d57dd3b0..9a20146f43e 100644
--- a/src/gallium/include/pipe/p_video_enums.h
+++ b/src/gallium/include/pipe/p_video_enums.h
@@ -34,7 +34,8 @@ enum pipe_video_format
PIPE_VIDEO_FORMAT_MPEG12, /**< MPEG1, MPEG2 */
PIPE_VIDEO_FORMAT_MPEG4, /**< DIVX, XVID */
PIPE_VIDEO_FORMAT_VC1, /**< WMV */
- PIPE_VIDEO_FORMAT_MPEG4_AVC /**< H.264 */
+ PIPE_VIDEO_FORMAT_MPEG4_AVC,/**< H.264 */
+ PIPE_VIDEO_FORMAT_HEVC /**< H.265 */
};
enum pipe_video_profile
@@ -54,7 +55,12 @@ enum pipe_video_profile
PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10,
PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH422,
- PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444
+ PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444,
+ PIPE_VIDEO_PROFILE_HEVC_MAIN,
+ PIPE_VIDEO_PROFILE_HEVC_MAIN_10,
+ PIPE_VIDEO_PROFILE_HEVC_MAIN_STILL,
+ PIPE_VIDEO_PROFILE_HEVC_MAIN_12,
+ PIPE_VIDEO_PROFILE_HEVC_MAIN_444
};
/* Video caps, can be different for each codec/profile */
@@ -68,7 +74,8 @@ enum pipe_video_cap
PIPE_VIDEO_CAP_PREFERS_INTERLACED = 5,
PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE = 6,
PIPE_VIDEO_CAP_SUPPORTS_INTERLACED = 7,
- PIPE_VIDEO_CAP_MAX_LEVEL = 8
+ PIPE_VIDEO_CAP_MAX_LEVEL = 8,
+ PIPE_VIDEO_CAP_STACKED_FRAMES = 9
};
enum pipe_video_entrypoint
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 3713cd91b09..7d13151e643 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -376,6 +376,111 @@ struct pipe_h264_enc_picture_desc
bool not_referenced;
};
+struct pipe_h265_sps
+{
+ uint8_t chroma_format_idc;
+ uint8_t separate_colour_plane_flag;
+ uint32_t pic_width_in_luma_samples;
+ uint32_t pic_height_in_luma_samples;
+ uint8_t bit_depth_luma_minus8;
+ uint8_t bit_depth_chroma_minus8;
+ uint8_t log2_max_pic_order_cnt_lsb_minus4;
+ uint8_t sps_max_dec_pic_buffering_minus1;
+ uint8_t log2_min_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_luma_coding_block_size;
+ uint8_t log2_min_transform_block_size_minus2;
+ uint8_t log2_diff_max_min_transform_block_size;
+ uint8_t max_transform_hierarchy_depth_inter;
+ uint8_t max_transform_hierarchy_depth_intra;
+ uint8_t scaling_list_enabled_flag;
+ uint8_t ScalingList4x4[6][16];
+ uint8_t ScalingList8x8[6][64];
+ uint8_t ScalingList16x16[6][64];
+ uint8_t ScalingList32x32[2][64];
+ uint8_t ScalingListDCCoeff16x16[6];
+ uint8_t ScalingListDCCoeff32x32[2];
+ uint8_t amp_enabled_flag;
+ uint8_t sample_adaptive_offset_enabled_flag;
+ uint8_t pcm_enabled_flag;
+ uint8_t pcm_sample_bit_depth_luma_minus1;
+ uint8_t pcm_sample_bit_depth_chroma_minus1;
+ uint8_t log2_min_pcm_luma_coding_block_size_minus3;
+ uint8_t log2_diff_max_min_pcm_luma_coding_block_size;
+ uint8_t pcm_loop_filter_disabled_flag;
+ uint8_t num_short_term_ref_pic_sets;
+ uint8_t long_term_ref_pics_present_flag;
+ uint8_t num_long_term_ref_pics_sps;
+ uint8_t sps_temporal_mvp_enabled_flag;
+ uint8_t strong_intra_smoothing_enabled_flag;
+};
+
+struct pipe_h265_pps
+{
+ struct pipe_h265_sps *sps;
+
+ uint8_t dependent_slice_segments_enabled_flag;
+ uint8_t output_flag_present_flag;
+ uint8_t num_extra_slice_header_bits;
+ uint8_t sign_data_hiding_enabled_flag;
+ uint8_t cabac_init_present_flag;
+ uint8_t num_ref_idx_l0_default_active_minus1;
+ uint8_t num_ref_idx_l1_default_active_minus1;
+ int8_t init_qp_minus26;
+ uint8_t constrained_intra_pred_flag;
+ uint8_t transform_skip_enabled_flag;
+ uint8_t cu_qp_delta_enabled_flag;
+ uint8_t diff_cu_qp_delta_depth;
+ int8_t pps_cb_qp_offset;
+ int8_t pps_cr_qp_offset;
+ uint8_t pps_slice_chroma_qp_offsets_present_flag;
+ uint8_t weighted_pred_flag;
+ uint8_t weighted_bipred_flag;
+ uint8_t transquant_bypass_enabled_flag;
+ uint8_t tiles_enabled_flag;
+ uint8_t entropy_coding_sync_enabled_flag;
+ uint8_t num_tile_columns_minus1;
+ uint8_t num_tile_rows_minus1;
+ uint8_t uniform_spacing_flag;
+ uint16_t column_width_minus1[20];
+ uint16_t row_height_minus1[22];
+ uint8_t loop_filter_across_tiles_enabled_flag;
+ uint8_t pps_loop_filter_across_slices_enabled_flag;
+ uint8_t deblocking_filter_control_present_flag;
+ uint8_t deblocking_filter_override_enabled_flag;
+ uint8_t pps_deblocking_filter_disabled_flag;
+ int8_t pps_beta_offset_div2;
+ int8_t pps_tc_offset_div2;
+ uint8_t lists_modification_present_flag;
+ uint8_t log2_parallel_merge_level_minus2;
+ uint8_t slice_segment_header_extension_present_flag;
+};
+
+struct pipe_h265_picture_desc
+{
+ struct pipe_picture_desc base;
+
+ struct pipe_h265_pps *pps;
+
+ uint8_t IDRPicFlag;
+ uint8_t RAPPicFlag;
+ uint8_t CurrRpsIdx;
+ uint32_t NumPocTotalCurr;
+ uint32_t NumDeltaPocsOfRefRpsIdx;
+ uint32_t NumShortTermPictureSliceHeaderBits;
+ uint32_t NumLongTermPictureSliceHeaderBits;
+
+ int32_t CurrPicOrderCntVal;
+ struct pipe_video_buffer *ref[16];
+ int32_t PicOrderCntVal[16];
+ uint8_t IsLongTerm[16];
+ uint8_t NumPocStCurrBefore;
+ uint8_t NumPocStCurrAfter;
+ uint8_t NumPocLtCurr;
+ uint8_t RefPicSetStCurrBefore[8];
+ uint8_t RefPicSetStCurrAfter[8];
+ uint8_t RefPicSetLtCurr[8];
+};
+
#ifdef __cplusplus
}
#endif
diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h
index ecf1c07fb98..356863d8531 100644
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -533,7 +533,7 @@ struct st_api
/**
* Return true if the visual has the specified buffers.
*/
-static INLINE boolean
+static inline boolean
st_visual_have_buffers(const struct st_visual *visual, unsigned mask)
{
return ((visual->buffer_mask & mask) == mask);
diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am
index f46d9ef457d..fd0ccf88cc5 100644
--- a/src/gallium/state_trackers/clover/Makefile.am
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include Makefile.sources
AM_CPPFLAGS = \
diff --git a/src/gallium/state_trackers/clover/api/dispatch.cpp b/src/gallium/state_trackers/clover/api/dispatch.cpp
index b5a4094cf2f..f10babe31a0 100644
--- a/src/gallium/state_trackers/clover/api/dispatch.cpp
+++ b/src/gallium/state_trackers/clover/api/dispatch.cpp
@@ -123,12 +123,12 @@ namespace clover {
clCreateImage,
clCreateProgramWithBuiltInKernels,
clCompileProgram,
- NULL, // clLinkProgram
+ clLinkProgram,
clUnloadPlatformCompiler,
- NULL, // clGetKernelArgInfo
- NULL, // clEnqueueFillBuffer
- NULL, // clEnqueueFillImage
- NULL, // clEnqueueMigrateMemObjects
+ clGetKernelArgInfo,
+ clEnqueueFillBuffer,
+ clEnqueueFillImage,
+ clEnqueueMigrateMemObjects,
clEnqueueMarkerWithWaitList,
clEnqueueBarrierWithWaitList,
NULL, // clGetExtensionFunctionAddressForPlatform
diff --git a/src/gallium/state_trackers/clover/api/dispatch.hpp b/src/gallium/state_trackers/clover/api/dispatch.hpp
index ffae1ae6e12..7f622822ef9 100644
--- a/src/gallium/state_trackers/clover/api/dispatch.hpp
+++ b/src/gallium/state_trackers/clover/api/dispatch.hpp
@@ -693,7 +693,13 @@ struct _cl_icd_dispatch {
CL_API_ENTRY cl_int (CL_API_CALL *clUnloadPlatformCompiler)(
cl_platform_id platform);
- void *clGetKernelArgInfo;
+ CL_API_ENTRY cl_int (CL_API_CALL *clGetKernelArgInfo)(
+ cl_kernel kernel,
+ cl_uint arg_indx,
+ cl_kernel_arg_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret);
CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueFillBuffer)(
cl_command_queue command_queue,
@@ -701,7 +707,7 @@ struct _cl_icd_dispatch {
const void *pattern,
size_t pattern_size,
size_t offset,
- size_t cb,
+ size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event);
@@ -710,13 +716,20 @@ struct _cl_icd_dispatch {
cl_command_queue command_queue,
cl_mem image,
const void *fill_color,
- const size_t origin[3],
- const size_t region[3],
+ const size_t *origin,
+ const size_t *region,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event);
- void *clEnqueueMigrateMemObjects;
+ CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueMigrateMemObjects)(
+ cl_command_queue command_queue,
+ cl_uint num_mem_objects,
+ const cl_mem *mem_objects,
+ cl_mem_migration_flags flags,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event);
CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueMarkerWithWaitList)(
cl_command_queue command_queue,
diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp
index 05cc392a914..73ba34abe8e 100644
--- a/src/gallium/state_trackers/clover/api/kernel.cpp
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -169,7 +169,7 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
break;
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
- buf.as_scalar() = 1;
+ buf.as_scalar() = dev.subgroup_size();
break;
case CL_KERNEL_PRIVATE_MEM_SIZE:
@@ -189,6 +189,14 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
return CL_INVALID_DEVICE;
}
+CLOVER_API cl_int
+clGetKernelArgInfo(cl_kernel d_kern,
+ cl_uint idx, cl_kernel_arg_info param,
+ size_t size, void *r_buf, size_t *r_size) {
+ CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+ return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
+}
+
namespace {
///
/// Common argument checking shared by kernel invocation commands.
diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp
index 3ff6ba0e1c5..1efb95b5ce7 100644
--- a/src/gallium/state_trackers/clover/api/memory.cpp
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -357,9 +357,29 @@ clCreateImage(cl_context d_ctx, cl_mem_flags flags,
const cl_image_format *format,
const cl_image_desc *image_desc,
void *host_ptr, cl_int *r_errcode) {
- // This function was added in OpenCL 1.2
- std::cerr << "CL user error: clCreateImage() not supported by OpenCL 1.1." <<
- std::endl;
+ CLOVER_NOT_SUPPORTED_UNTIL("1.2");
ret_error(r_errcode, CL_INVALID_OPERATION);
return NULL;
}
+
+CLOVER_API cl_int
+clEnqueueFillBuffer(cl_command_queue command_queue, cl_mem buffer,
+ const void *pattern, size_t pattern_size,
+ size_t offset, size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event) {
+ CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+ return CL_INVALID_VALUE;
+}
+
+CLOVER_API cl_int
+clEnqueueFillImage(cl_command_queue command_queue, cl_mem image,
+ const void *fill_color,
+ const size_t *origin, const size_t *region,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event) {
+ CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+ return CL_INVALID_VALUE;
+}
diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp
index e9b1f384344..27ca2efd0bc 100644
--- a/src/gallium/state_trackers/clover/api/program.cpp
+++ b/src/gallium/state_trackers/clover/api/program.cpp
@@ -231,6 +231,16 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs,
return e.get();
}
+CLOVER_API cl_program
+clLinkProgram(cl_context d_ctx, cl_uint num_devs, const cl_device_id *d_devs,
+ const char *p_opts, cl_uint num_progs, const cl_program *d_progs,
+ void (*pfn_notify)(cl_program, void *), void *user_data,
+ cl_int *r_errcode) {
+ CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+ ret_error(r_errcode, CL_LINKER_NOT_AVAILABLE);
+ return NULL;
+}
+
CLOVER_API cl_int
clUnloadCompiler() {
return CL_SUCCESS;
diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp b/src/gallium/state_trackers/clover/api/transfer.cpp
index fdb9405c918..f7046253be8 100644
--- a/src/gallium/state_trackers/clover/api/transfer.cpp
+++ b/src/gallium/state_trackers/clover/api/transfer.cpp
@@ -726,3 +726,15 @@ clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
} catch (error &e) {
return e.get();
}
+
+CLOVER_API cl_int
+clEnqueueMigrateMemObjects(cl_command_queue command_queue,
+ cl_uint num_mem_objects,
+ const cl_mem *mem_objects,
+ cl_mem_migration_flags flags,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event) {
+ CLOVER_NOT_SUPPORTED_UNTIL("1.2");
+ return CL_INVALID_VALUE;
+}
diff --git a/src/gallium/state_trackers/clover/api/util.hpp b/src/gallium/state_trackers/clover/api/util.hpp
index 918df6125a4..31e20e424b9 100644
--- a/src/gallium/state_trackers/clover/api/util.hpp
+++ b/src/gallium/state_trackers/clover/api/util.hpp
@@ -38,6 +38,13 @@
#define CLOVER_ICD_API PUBLIC
#endif
+#define CLOVER_NOT_SUPPORTED_UNTIL(version) \
+ do { \
+ std::cerr << "CL user error: " << __func__ \
+ << "() requires OpenCL version " << (version) \
+ << " or greater." << std::endl; \
+ } while (0)
+
namespace clover {
///
/// Return an error code in \a p if non-zero.
diff --git a/src/gallium/state_trackers/clover/core/compiler.hpp b/src/gallium/state_trackers/clover/core/compiler.hpp
index c68aa39db85..207641785ca 100644
--- a/src/gallium/state_trackers/clover/core/compiler.hpp
+++ b/src/gallium/state_trackers/clover/core/compiler.hpp
@@ -37,7 +37,8 @@ namespace clover {
const std::string &opts,
std::string &r_log);
- module compile_program_tgsi(const std::string &source);
+ module compile_program_tgsi(const std::string &source,
+ std::string &r_log);
}
#endif
diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp
index 42b45b7f2b8..6efff79c7f4 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -89,12 +89,12 @@ device::vendor_id() const {
size_t
device::max_images_read() const {
- return PIPE_MAX_SHADER_RESOURCES;
+ return PIPE_MAX_SHADER_IMAGES;
}
size_t
device::max_images_write() const {
- return PIPE_MAX_SHADER_RESOURCES;
+ return PIPE_MAX_SHADER_IMAGES;
}
cl_uint
@@ -185,6 +185,11 @@ device::max_block_size() const {
return { v.begin(), v.end() };
}
+cl_uint
+device::subgroup_size() const {
+ return get_compute_param(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
+}
+
std::string
device::device_name() const {
return pipe->get_name(pipe);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp
index de5fc6bb9c4..285784744f3 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -67,6 +67,7 @@ namespace clover {
bool has_doubles() const;
std::vector max_block_size() const;
+ cl_uint subgroup_size() const;
std::string device_name() const;
std::string vendor_name() const;
enum pipe_shader_ir ir_format() const;
diff --git a/src/gallium/state_trackers/clover/core/error.hpp b/src/gallium/state_trackers/clover/core/error.hpp
index 780b973383a..59a5af4c799 100644
--- a/src/gallium/state_trackers/clover/core/error.hpp
+++ b/src/gallium/state_trackers/clover/core/error.hpp
@@ -65,9 +65,9 @@ namespace clover {
cl_int code;
};
- class build_error : public error {
+ class compile_error : public error {
public:
- build_error(const std::string &what = "") :
+ compile_error(const std::string &what = "") :
error(CL_COMPILE_PROGRAM_FAILURE, what) {
}
};
diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp
index e1f9de07f83..d75b8397794 100644
--- a/src/gallium/state_trackers/clover/core/event.cpp
+++ b/src/gallium/state_trackers/clover/core/event.cpp
@@ -141,7 +141,7 @@ hard_event::status() const {
else if (!_fence)
return CL_QUEUED;
- else if (!screen->fence_signalled(screen, _fence))
+ else if (!screen->fence_finish(screen, _fence, 0))
return CL_SUBMITTED;
else
diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp
index 0756f068553..a226ec1a752 100644
--- a/src/gallium/state_trackers/clover/core/kernel.cpp
+++ b/src/gallium/state_trackers/clover/core/kernel.cpp
@@ -182,6 +182,34 @@ kernel::exec_context::bind(intrusive_ptr _q,
}
break;
}
+ case module::argument::image_size: {
+ auto img = dynamic_cast(**(explicit_arg - 1)).get();
+ std::vector image_size{
+ static_cast(img->width()),
+ static_cast(img->height()),
+ static_cast(img->depth())};
+ for (auto x : image_size) {
+ auto arg = argument::create(marg);
+
+ arg->set(sizeof(x), &x);
+ arg->bind(*this, marg);
+ }
+ break;
+ }
+ case module::argument::image_format: {
+ auto img = dynamic_cast(**(explicit_arg - 1)).get();
+ cl_image_format fmt = img->format();
+ std::vector image_format{
+ static_cast(fmt.image_channel_data_type),
+ static_cast(fmt.image_channel_order)};
+ for (auto x : image_format) {
+ auto arg = argument::create(marg);
+
+ arg->set(sizeof(x), &x);
+ arg->bind(*this, marg);
+ }
+ break;
+ }
}
}
@@ -339,6 +367,9 @@ kernel::scalar_argument::scalar_argument(size_t size) : size(size) {
void
kernel::scalar_argument::set(size_t size, const void *value) {
+ if (!value)
+ throw error(CL_INVALID_ARG_VALUE);
+
if (size != this->size)
throw error(CL_INVALID_ARG_SIZE);
@@ -407,6 +438,9 @@ kernel::local_argument::set(size_t size, const void *value) {
if (value)
throw error(CL_INVALID_ARG_VALUE);
+ if (!size)
+ throw error(CL_INVALID_ARG_SIZE);
+
_storage = size;
_set = true;
}
@@ -466,6 +500,9 @@ kernel::constant_argument::unbind(exec_context &ctx) {
void
kernel::image_rd_argument::set(size_t size, const void *value) {
+ if (!value)
+ throw error(CL_INVALID_ARG_VALUE);
+
if (size != sizeof(cl_mem))
throw error(CL_INVALID_ARG_SIZE);
@@ -494,6 +531,9 @@ kernel::image_rd_argument::unbind(exec_context &ctx) {
void
kernel::image_wr_argument::set(size_t size, const void *value) {
+ if (!value)
+ throw error(CL_INVALID_ARG_VALUE);
+
if (size != sizeof(cl_mem))
throw error(CL_INVALID_ARG_SIZE);
@@ -522,6 +562,9 @@ kernel::image_wr_argument::unbind(exec_context &ctx) {
void
kernel::sampler_argument::set(size_t size, const void *value) {
+ if (!value)
+ throw error(CL_INVALID_SAMPLER);
+
if (size != sizeof(cl_sampler))
throw error(CL_INVALID_ARG_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/kernel.hpp b/src/gallium/state_trackers/clover/core/kernel.hpp
index d6432a4df8d..4ba6ff467b7 100644
--- a/src/gallium/state_trackers/clover/core/kernel.hpp
+++ b/src/gallium/state_trackers/clover/core/kernel.hpp
@@ -190,7 +190,16 @@ namespace clover {
pipe_surface *st;
};
- class image_rd_argument : public argument {
+ class image_argument : public argument {
+ public:
+ const image *get() const {
+ return img;
+ }
+ protected:
+ image *img;
+ };
+
+ class image_rd_argument : public image_argument {
public:
virtual void set(size_t size, const void *value);
virtual void bind(exec_context &ctx,
@@ -198,11 +207,10 @@ namespace clover {
virtual void unbind(exec_context &ctx);
private:
- image *img;
pipe_sampler_view *st;
};
- class image_wr_argument : public argument {
+ class image_wr_argument : public image_argument {
public:
virtual void set(size_t size, const void *value);
virtual void bind(exec_context &ctx,
@@ -210,7 +218,6 @@ namespace clover {
virtual void unbind(exec_context &ctx);
private:
- image *img;
pipe_surface *st;
};
diff --git a/src/gallium/state_trackers/clover/core/memory.cpp b/src/gallium/state_trackers/clover/core/memory.cpp
index 055336a3325..b852e6896fe 100644
--- a/src/gallium/state_trackers/clover/core/memory.cpp
+++ b/src/gallium/state_trackers/clover/core/memory.cpp
@@ -189,7 +189,7 @@ image2d::image2d(clover::context &ctx, cl_mem_flags flags,
const cl_image_format *format, size_t width,
size_t height, size_t row_pitch,
void *host_ptr) :
- image(ctx, flags, format, width, height, 0,
+ image(ctx, flags, format, width, height, 1,
row_pitch, 0, height * row_pitch, host_ptr) {
}
diff --git a/src/gallium/state_trackers/clover/core/module.hpp b/src/gallium/state_trackers/clover/core/module.hpp
index 9d656885945..5db0548872c 100644
--- a/src/gallium/state_trackers/clover/core/module.hpp
+++ b/src/gallium/state_trackers/clover/core/module.hpp
@@ -72,7 +72,9 @@ namespace clover {
enum semantic {
general,
grid_dimension,
- grid_offset
+ grid_offset,
+ image_size,
+ image_format
};
argument(enum type type, size_t size,
diff --git a/src/gallium/state_trackers/clover/core/program.cpp b/src/gallium/state_trackers/clover/core/program.cpp
index 0d6cc402db7..6eebd9c5cda 100644
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -56,14 +56,14 @@ program::build(const ref_vector &devs, const char *opts,
try {
auto module = (dev.ir_format() == PIPE_SHADER_IR_TGSI ?
- compile_program_tgsi(_source) :
+ compile_program_tgsi(_source, log) :
compile_program_llvm(_source, headers,
dev.ir_format(),
dev.ir_target(), build_opts(dev),
log));
_binaries.insert({ &dev, module });
_logs.insert({ &dev, log });
- } catch (const build_error &) {
+ } catch (const error &) {
_logs.insert({ &dev, log });
throw;
}
diff --git a/src/gallium/state_trackers/clover/core/resource.cpp b/src/gallium/state_trackers/clover/core/resource.cpp
index 78ebafb644f..10a29a94eac 100644
--- a/src/gallium/state_trackers/clover/core/resource.cpp
+++ b/src/gallium/state_trackers/clover/core/resource.cpp
@@ -132,6 +132,7 @@ root_resource::root_resource(clover::device &dev, memory_obj &obj,
info.depth0 = 1;
}
+ info.array_size = 1;
info.target = translate_target(obj.type());
info.bind = (PIPE_BIND_SAMPLER_VIEW |
PIPE_BIND_COMPUTE_RESOURCE |
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 9b91fee9032..63c3f8ee49b 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -108,7 +108,7 @@ namespace {
name, llvm::MemoryBuffer::getMemBuffer(source));
if (!c.ExecuteAction(act))
- throw build_error(log);
+ throw compile_error(log);
}
module
@@ -256,7 +256,7 @@ namespace {
r_log = log;
if (!ExecSuccess)
- throw build_error();
+ throw compile_error();
// Get address spaces map to be able to find kernel argument address space
memcpy(address_spaces, c.getTarget().getAddressSpaceMap(),
@@ -269,17 +269,19 @@ namespace {
#endif
}
- void
- find_kernels(llvm::Module *mod, std::vector &kernels) {
+ std::vector
+ find_kernels(const llvm::Module *mod) {
const llvm::NamedMDNode *kernel_node =
mod->getNamedMetadata("opencl.kernels");
// This means there are no kernels in the program. The spec does not
// require that we return an error here, but there will be an error if
// the user tries to pass this program to a clCreateKernel() call.
if (!kernel_node) {
- return;
+ return std::vector();
}
+ std::vector kernels;
+ kernels.reserve(kernel_node->getNumOperands());
for (unsigned i = 0; i < kernel_node->getNumOperands(); ++i) {
#if HAVE_LLVM >= 0x0306
kernels.push_back(llvm::mdconst::dyn_extract(
@@ -288,11 +290,11 @@ namespace {
#endif
kernel_node->getOperand(i)->getOperand(0)));
}
+ return kernels;
}
void
- optimize(llvm::Module *mod, unsigned optimization_level,
- const std::vector &kernels) {
+ optimize(llvm::Module *mod, unsigned optimization_level) {
#if HAVE_LLVM >= 0x0307
llvm::legacy::PassManager PM;
@@ -300,6 +302,8 @@ namespace {
llvm::PassManager PM;
#endif
+ const std::vector kernels = find_kernels(mod);
+
// Add a function internalizer pass.
//
// By default, the function internalizer pass will look for a function
@@ -340,18 +344,91 @@ namespace {
PM.run(*mod);
}
+ // Kernel metadata
+
+ const llvm::MDNode *
+ get_kernel_metadata(const llvm::Function *kernel_func) {
+ auto mod = kernel_func->getParent();
+ auto kernels_node = mod->getNamedMetadata("opencl.kernels");
+ if (!kernels_node) {
+ return nullptr;
+ }
+
+ const llvm::MDNode *kernel_node = nullptr;
+ for (unsigned i = 0; i < kernels_node->getNumOperands(); ++i) {
+#if HAVE_LLVM >= 0x0306
+ auto func = llvm::mdconst::dyn_extract(
+#else
+ auto func = llvm::dyn_cast(
+#endif
+ kernels_node->getOperand(i)->getOperand(0));
+ if (func == kernel_func) {
+ kernel_node = kernels_node->getOperand(i);
+ break;
+ }
+ }
+
+ return kernel_node;
+ }
+
+ llvm::MDNode*
+ node_from_op_checked(const llvm::MDOperand &md_operand,
+ llvm::StringRef expect_name,
+ unsigned expect_num_args)
+ {
+ auto node = llvm::cast(md_operand);
+ assert(node->getNumOperands() == expect_num_args &&
+ "Wrong number of operands.");
+
+ auto str_node = llvm::cast(node->getOperand(0));
+ assert(str_node->getString() == expect_name &&
+ "Wrong metadata node name.");
+
+ return node;
+ }
+
+ struct kernel_arg_md {
+ llvm::StringRef type_name;
+ llvm::StringRef access_qual;
+ kernel_arg_md(llvm::StringRef type_name_, llvm::StringRef access_qual_):
+ type_name(type_name_), access_qual(access_qual_) {}
+ };
+
+ std::vector
+ get_kernel_arg_md(const llvm::Function *kernel_func) {
+ auto num_args = kernel_func->getArgumentList().size();
+
+ auto kernel_node = get_kernel_metadata(kernel_func);
+ auto aq = node_from_op_checked(kernel_node->getOperand(2),
+ "kernel_arg_access_qual", num_args + 1);
+ auto ty = node_from_op_checked(kernel_node->getOperand(3),
+ "kernel_arg_type", num_args + 1);
+
+ std::vector res;
+ res.reserve(num_args);
+ for (unsigned i = 0; i < num_args; ++i) {
+ res.push_back(kernel_arg_md(
+ llvm::cast(ty->getOperand(i+1))->getString(),
+ llvm::cast(aq->getOperand(i+1))->getString()));
+ }
+
+ return res;
+ }
+
std::vector
get_kernel_args(const llvm::Module *mod, const std::string &kernel_name,
const clang::LangAS::Map &address_spaces) {
std::vector args;
llvm::Function *kernel_func = mod->getFunction(kernel_name);
+ assert(kernel_func && "Kernel name not found in module.");
+ auto arg_md = get_kernel_arg_md(kernel_func);
llvm::DataLayout TD(mod);
+ llvm::Type *size_type =
+ TD.getSmallestLegalIntType(mod->getContext(), sizeof(cl_uint) * 8);
- for (llvm::Function::const_arg_iterator I = kernel_func->arg_begin(),
- E = kernel_func->arg_end(); I != E; ++I) {
- const llvm::Argument &arg = *I;
+ for (const auto &arg: kernel_func->args()) {
llvm::Type *arg_type = arg.getType();
const unsigned arg_store_size = TD.getTypeStoreSize(arg_type);
@@ -369,6 +446,59 @@ namespace {
unsigned target_size = TD.getTypeStoreSize(target_type);
unsigned target_align = TD.getABITypeAlignment(target_type);
+ llvm::StringRef type_name = arg_md[arg.getArgNo()].type_name;
+ llvm::StringRef access_qual = arg_md[arg.getArgNo()].access_qual;
+
+ // Image
+ const bool is_image2d = type_name == "image2d_t";
+ const bool is_image3d = type_name == "image3d_t";
+ if (is_image2d || is_image3d) {
+ const bool is_write_only = access_qual == "write_only";
+ const bool is_read_only = access_qual == "read_only";
+
+ typename module::argument::type marg_type;
+ if (is_image2d && is_read_only) {
+ marg_type = module::argument::image2d_rd;
+ } else if (is_image2d && is_write_only) {
+ marg_type = module::argument::image2d_wr;
+ } else if (is_image3d && is_read_only) {
+ marg_type = module::argument::image3d_rd;
+ } else if (is_image3d && is_write_only) {
+ marg_type = module::argument::image3d_wr;
+ } else {
+ assert(0 && "Wrong image access qualifier");
+ }
+
+ args.push_back(module::argument(marg_type,
+ arg_store_size, target_size,
+ target_align,
+ module::argument::zero_ext));
+ continue;
+ }
+
+ // Image size implicit argument
+ if (type_name == "__llvm_image_size") {
+ args.push_back(module::argument(module::argument::scalar,
+ sizeof(cl_uint),
+ TD.getTypeStoreSize(size_type),
+ TD.getABITypeAlignment(size_type),
+ module::argument::zero_ext,
+ module::argument::image_size));
+ continue;
+ }
+
+ // Image format implicit argument
+ if (type_name == "__llvm_image_format") {
+ args.push_back(module::argument(module::argument::scalar,
+ sizeof(cl_uint),
+ TD.getTypeStoreSize(size_type),
+ TD.getABITypeAlignment(size_type),
+ module::argument::zero_ext,
+ module::argument::image_format));
+ continue;
+ }
+
+ // Other types
if (llvm::isa(arg_type) && arg.hasByValAttr()) {
arg_type =
llvm::dyn_cast(arg_type)->getElementType();
@@ -413,9 +543,6 @@ namespace {
// Append implicit arguments. XXX - The types, ordering and
// vector size of the implicit arguments should depend on the
// target according to the selected calling convention.
- llvm::Type *size_type =
- TD.getSmallestLegalIntType(mod->getContext(), sizeof(cl_uint) * 8);
-
args.push_back(
module::argument(module::argument::scalar, sizeof(cl_uint),
TD.getTypeStoreSize(size_type),
@@ -435,7 +562,6 @@ namespace {
module
build_module_llvm(llvm::Module *mod,
- const std::vector &kernels,
clang::LangAS::Map& address_spaces) {
module m;
@@ -445,8 +571,11 @@ namespace {
llvm::raw_svector_ostream bitcode_ostream(llvm_bitcode);
llvm::BitstreamWriter writer(llvm_bitcode);
llvm::WriteBitcodeToFile(mod, bitcode_ostream);
+#if HAVE_LLVM < 0x0308
bitcode_ostream.flush();
+#endif
+ const std::vector kernels = find_kernels(mod);
for (unsigned i = 0; i < kernels.size(); ++i) {
std::string kernel_name = kernels[i]->getName();
std::vector args =
@@ -485,7 +614,7 @@ namespace {
LLVMDisposeMessage(err_message);
if (err) {
- throw build_error();
+ throw compile_error();
}
}
@@ -505,7 +634,7 @@ namespace {
if (LLVMGetTargetFromTriple(triple.c_str(), &target, &error_message)) {
r_log = std::string(error_message);
LLVMDisposeMessage(error_message);
- throw build_error();
+ throw compile_error();
}
LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
@@ -514,7 +643,7 @@ namespace {
if (!tm) {
r_log = "Could not create TargetMachine: " + triple;
- throw build_error();
+ throw compile_error();
}
if (dump_asm) {
@@ -567,7 +696,7 @@ namespace {
const char *name;
if (gelf_getshdr(section, &symtab_header) != &symtab_header) {
r_log = "Failed to read ELF section header.";
- throw build_error();
+ throw compile_error();
}
name = elf_strptr(elf, section_str_index, symtab_header.sh_name);
if (!strcmp(name, ".symtab")) {
@@ -577,9 +706,9 @@ namespace {
}
if (!symtab) {
r_log = "Unable to find symbol table.";
- throw build_error();
+ throw compile_error();
}
- } catch (build_error &e) {
+ } catch (compile_error &e) {
elf_end(elf);
throw e;
}
@@ -610,10 +739,11 @@ namespace {
module
build_module_native(std::vector &code,
const llvm::Module *mod,
- const std::vector &kernels,
const clang::LangAS::Map &address_spaces,
std::string &r_log) {
+ const std::vector kernels = find_kernels(mod);
+
std::map kernel_offsets =
get_kernel_offsets(code, kernels, r_log);
@@ -650,7 +780,7 @@ namespace {
stream.flush();
*(std::string*)data = message;
- throw build_error();
+ throw compile_error();
}
}
@@ -697,7 +827,6 @@ clover::compile_program_llvm(const std::string &source,
init_targets();
- std::vector kernels;
size_t processor_str_len = std::string(target).find_first_of("-");
std::string processor(target, 0, processor_str_len);
std::string triple(target, processor_str_len + 1,
@@ -717,9 +846,7 @@ clover::compile_program_llvm(const std::string &source,
triple, processor, opts, address_spaces,
optimization_level, r_log);
- find_kernels(mod, kernels);
-
- optimize(mod, optimization_level, kernels);
+ optimize(mod, optimization_level);
if (get_debug_flags() & DBG_LLVM) {
std::string log;
@@ -738,13 +865,13 @@ clover::compile_program_llvm(const std::string &source,
m = module();
break;
case PIPE_SHADER_IR_LLVM:
- m = build_module_llvm(mod, kernels, address_spaces);
+ m = build_module_llvm(mod, address_spaces);
break;
case PIPE_SHADER_IR_NATIVE: {
std::vector code = compile_native(mod, triple, processor,
get_debug_flags() & DBG_ASM,
r_log);
- m = build_module_native(code, mod, kernels, address_spaces, r_log);
+ m = build_module_native(code, mod, address_spaces, r_log);
break;
}
}
diff --git a/src/gallium/state_trackers/clover/tgsi/compiler.cpp b/src/gallium/state_trackers/clover/tgsi/compiler.cpp
index b70104e7604..54cb747e6fb 100644
--- a/src/gallium/state_trackers/clover/tgsi/compiler.cpp
+++ b/src/gallium/state_trackers/clover/tgsi/compiler.cpp
@@ -32,7 +32,7 @@ using namespace clover;
namespace {
void
- read_header(const std::string &header, module &m) {
+ read_header(const std::string &header, module &m, std::string &r_log) {
std::istringstream ls(header);
std::string line;
@@ -45,8 +45,10 @@ namespace {
if (!(ts >> name))
continue;
- if (!(ts >> offset))
- throw build_error("invalid kernel start address");
+ if (!(ts >> offset)) {
+ r_log = "invalid kernel start address";
+ throw compile_error();
+ }
while (ts >> tok) {
if (tok == "scalar")
@@ -67,8 +69,10 @@ namespace {
args.push_back({ module::argument::image3d_wr, 4 });
else if (tok == "sampler")
args.push_back({ module::argument::sampler, 0 });
- else
- throw build_error("invalid kernel argument");
+ else {
+ r_log = "invalid kernel argument";
+ throw compile_error();
+ }
}
m.syms.push_back({ name, 0, offset, args });
@@ -76,11 +80,13 @@ namespace {
}
void
- read_body(const char *source, module &m) {
+ read_body(const char *source, module &m, std::string &r_log) {
tgsi_token prog[1024];
- if (!tgsi_text_translate(source, prog, Elements(prog)))
- throw build_error("translate failed");
+ if (!tgsi_text_translate(source, prog, Elements(prog))) {
+ r_log = "translate failed";
+ throw compile_error();
+ }
unsigned sz = tgsi_num_tokens(prog) * sizeof(tgsi_token);
std::vector data( (char *)prog, (char *)prog + sz );
@@ -89,13 +95,13 @@ namespace {
}
module
-clover::compile_program_tgsi(const std::string &source) {
+clover::compile_program_tgsi(const std::string &source, std::string &r_log) {
const size_t body_pos = source.find("COMP\n");
const char *body = &source[body_pos];
module m;
- read_header({ source.begin(), source.begin() + body_pos }, m);
- read_body(body, m);
+ read_header({ source.begin(), source.begin() + body_pos }, m, r_log);
+ read_body(body, m, r_log);
return m;
}
diff --git a/src/gallium/state_trackers/dri/Android.mk b/src/gallium/state_trackers/dri/Android.mk
index 188e4a1404d..43f0de9b464 100644
--- a/src/gallium/state_trackers/dri/Android.mk
+++ b/src/gallium/state_trackers/dri/Android.mk
@@ -44,14 +44,10 @@ LOCAL_STATIC_LIBRARIES := \
libmesa_dri_common \
ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),)
-LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE
LOCAL_SRC_FILES += $(drisw_SOURCES)
endif
-# swrast only?
-ifeq ($(MESA_GPU_DRIVERS),swrast)
-LOCAL_CFLAGS += -D__NOT_HAVE_DRM_H
-else
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
LOCAL_SRC_FILES += $(dri2_SOURCES)
LOCAL_SHARED_LIBRARIES := libdrm
endif
diff --git a/src/gallium/state_trackers/dri/Makefile.am b/src/gallium/state_trackers/dri/Makefile.am
index d2c7a82d720..9f4deba0c1e 100644
--- a/src/gallium/state_trackers/dri/Makefile.am
+++ b/src/gallium/state_trackers/dri/Makefile.am
@@ -50,10 +50,6 @@ noinst_LTLIBRARIES = libdri.la
libdri_la_SOURCES = $(common_SOURCES)
if HAVE_DRISW
-if !HAVE_DRI2
-AM_CPPFLAGS += \
- -D__NOT_HAVE_DRM_H
-endif
libdri_la_SOURCES += $(drisw_SOURCES)
endif
diff --git a/src/gallium/state_trackers/dri/SConscript b/src/gallium/state_trackers/dri/SConscript
index 89b5e611c2e..657300baf13 100644
--- a/src/gallium/state_trackers/dri/SConscript
+++ b/src/gallium/state_trackers/dri/SConscript
@@ -5,10 +5,7 @@ Import('*')
env = env.Clone()
-# XXX: If HAVE_DRI2
env.PkgUseModules(['DRM'])
-# else
-#env.Append(CPPDEFINES = [('__NOT_HAVE_DRM_H', '1')])
env.Append(CPPPATH = [
'#/src',
@@ -20,7 +17,6 @@ env.Append(CPPPATH = [
env.Append(CPPDEFINES = [
('GALLIUM_STATIC_TARGETS', '1'),
- 'GALLIUM_SOFTPIPE',
])
sources = env.ParseSourceList('Makefile.sources', 'common_SOURCES')
diff --git a/src/gallium/state_trackers/dri/dri2.c b/src/gallium/state_trackers/dri/dri2.c
index 8d93f786433..91b443147d6 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -554,7 +554,7 @@ dri2_allocate_textures(struct dri_context *ctx,
if (drawable->textures[statt]) {
templ.format = drawable->textures[statt]->format;
- templ.bind = drawable->textures[statt]->bind;
+ templ.bind = drawable->textures[statt]->bind & ~PIPE_BIND_SCANOUT;
templ.nr_samples = drawable->stvis.samples;
/* Try to reuse the resource.
@@ -1460,7 +1460,7 @@ dri2_init_screen(__DRIscreen * sPriv)
throttle_ret = dd_configuration(DRM_CONF_THROTTLE);
dmabuf_ret = dd_configuration(DRM_CONF_SHARE_FD);
#else
- if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd, false)) {
+ if (pipe_loader_drm_probe_fd(&screen->dev, screen->fd)) {
pscreen = pipe_loader_create_screen(screen->dev, PIPE_SEARCH_DIR);
throttle_ret = pipe_loader_configuration(screen->dev, DRM_CONF_THROTTLE);
diff --git a/src/gallium/state_trackers/dri/dri2_buffer.h b/src/gallium/state_trackers/dri/dri2_buffer.h
index e8e474ddb76..0cee4e906e6 100644
--- a/src/gallium/state_trackers/dri/dri2_buffer.h
+++ b/src/gallium/state_trackers/dri/dri2_buffer.h
@@ -11,7 +11,7 @@ struct dri2_buffer
struct pipe_resource *resource;
};
-static INLINE struct dri2_buffer *
+static inline struct dri2_buffer *
dri2_buffer(__DRIbuffer * driBufferPriv)
{
return (struct dri2_buffer *) driBufferPriv;
diff --git a/src/gallium/state_trackers/dri/dri_context.h b/src/gallium/state_trackers/dri/dri_context.h
index 56dfa2ccc70..96f06442fa0 100644
--- a/src/gallium/state_trackers/dri/dri_context.h
+++ b/src/gallium/state_trackers/dri/dri_context.h
@@ -59,7 +59,7 @@ struct dri_context
struct hud_context *hud;
};
-static INLINE struct dri_context *
+static inline struct dri_context *
dri_context(__DRIcontext * driContextPriv)
{
if (!driContextPriv)
diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c
index b8afe6c4d23..0d2929aaaa1 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -279,7 +279,12 @@ dri_drawable_get_format(struct dri_drawable *drawable,
case ST_ATTACHMENT_BACK_LEFT:
case ST_ATTACHMENT_FRONT_RIGHT:
case ST_ATTACHMENT_BACK_RIGHT:
- *format = drawable->stvis.color_format;
+ /* Other pieces of the driver stack get confused and behave incorrectly
+ * when they get an sRGB drawable. st/mesa receives "drawable->stvis"
+ * though other means and handles it correctly, so we don't really need
+ * to use an sRGB format here.
+ */
+ *format = util_format_linear(drawable->stvis.color_format);
*bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
break;
case ST_ATTACHMENT_DEPTH_STENCIL:
diff --git a/src/gallium/state_trackers/dri/dri_drawable.h b/src/gallium/state_trackers/dri/dri_drawable.h
index c5142181e89..1f9842ea541 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.h
+++ b/src/gallium/state_trackers/dri/dri_drawable.h
@@ -87,7 +87,7 @@ struct dri_drawable
struct pipe_resource *res);
};
-static INLINE struct dri_drawable *
+static inline struct dri_drawable *
dri_drawable(__DRIdrawable * driDrawPriv)
{
return (struct dri_drawable *) (driDrawPriv)
diff --git a/src/gallium/state_trackers/dri/dri_query_renderer.c b/src/gallium/state_trackers/dri/dri_query_renderer.c
index 4a28ac37b70..ea31b6c1e10 100644
--- a/src/gallium/state_trackers/dri/dri_query_renderer.c
+++ b/src/gallium/state_trackers/dri/dri_query_renderer.c
@@ -42,6 +42,20 @@ dri2_query_renderer_integer(__DRIscreen *_screen, int param,
PIPE_CAP_UMA);
return 0;
+ case __DRI2_RENDERER_HAS_TEXTURE_3D:
+ value[0] =
+ screen->base.screen->get_param(screen->base.screen,
+ PIPE_CAP_MAX_TEXTURE_3D_LEVELS) != 0;
+ return 0;
+
+ case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB:
+ value[0] =
+ screen->base.screen->is_format_supported(screen->base.screen,
+ PIPE_FORMAT_B8G8R8A8_SRGB,
+ PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_RENDER_TARGET);
+ return 0;
+
default:
return driQueryRendererIntegerCommon(_screen, param, value);
}
diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c
index 85393d867e4..c4c2d9c8fb1 100644
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -103,14 +103,18 @@ dri_fill_st_options(struct st_config_options *options,
static const __DRIconfig **
dri_fill_in_modes(struct dri_screen *screen)
{
- static const mesa_format mesa_formats[3] = {
+ static const mesa_format mesa_formats[] = {
MESA_FORMAT_B8G8R8A8_UNORM,
MESA_FORMAT_B8G8R8X8_UNORM,
+ MESA_FORMAT_B8G8R8A8_SRGB,
+ MESA_FORMAT_B8G8R8X8_SRGB,
MESA_FORMAT_B5G6R5_UNORM,
};
- static const enum pipe_format pipe_formats[3] = {
+ static const enum pipe_format pipe_formats[] = {
PIPE_FORMAT_BGRA8888_UNORM,
PIPE_FORMAT_BGRX8888_UNORM,
+ PIPE_FORMAT_BGRA8888_SRGB,
+ PIPE_FORMAT_BGRX8888_SRGB,
PIPE_FORMAT_B5G6R5_UNORM,
};
mesa_format format;
@@ -186,6 +190,11 @@ dri_fill_in_modes(struct dri_screen *screen)
unsigned num_msaa_modes = 0; /* includes a single-sample mode */
uint8_t msaa_modes[MSAA_VISUAL_MAX_SAMPLES];
+ if (!p_screen->is_format_supported(p_screen, pipe_formats[format],
+ PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_RENDER_TARGET))
+ continue;
+
for (i = 1; i <= msaa_samples_max; i++) {
int samples = i > 1 ? i : 0;
@@ -241,9 +250,15 @@ dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen,
if (mode->redBits == 8) {
if (mode->alphaBits == 8)
- stvis->color_format = PIPE_FORMAT_BGRA8888_UNORM;
+ if (mode->sRGBCapable)
+ stvis->color_format = PIPE_FORMAT_BGRA8888_SRGB;
+ else
+ stvis->color_format = PIPE_FORMAT_BGRA8888_UNORM;
else
- stvis->color_format = PIPE_FORMAT_BGRX8888_UNORM;
+ if (mode->sRGBCapable)
+ stvis->color_format = PIPE_FORMAT_BGRX8888_SRGB;
+ else
+ stvis->color_format = PIPE_FORMAT_BGRX8888_UNORM;
} else {
stvis->color_format = PIPE_FORMAT_B5G6R5_UNORM;
}
diff --git a/src/gallium/state_trackers/dri/dri_screen.h b/src/gallium/state_trackers/dri/dri_screen.h
index 173f4038cdb..4bcb0291d86 100644
--- a/src/gallium/state_trackers/dri/dri_screen.h
+++ b/src/gallium/state_trackers/dri/dri_screen.h
@@ -97,7 +97,7 @@ struct dri_screen
};
/** cast wrapper */
-static INLINE struct dri_screen *
+static inline struct dri_screen *
dri_screen(__DRIscreen * sPriv)
{
return (struct dri_screen *)sPriv->driverPrivate;
@@ -122,9 +122,7 @@ struct __DRIimageRec {
};
-#ifndef __NOT_HAVE_DRM_H
-
-static INLINE boolean
+static inline boolean
dri_with_format(__DRIscreen * sPriv)
{
const __DRIdri2LoaderExtension *loader = sPriv->dri2.loader;
@@ -134,16 +132,6 @@ dri_with_format(__DRIscreen * sPriv)
&& (loader->getBuffersWithFormat != NULL);
}
-#else
-
-static INLINE boolean
-dri_with_format(__DRIscreen * sPriv)
-{
- return TRUE;
-}
-
-#endif
-
void
dri_fill_st_visual(struct st_visual *stvis, struct dri_screen *screen,
const struct gl_config *mode);
diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c
index 4a2c1bbc2ee..4ec6992643a 100644
--- a/src/gallium/state_trackers/dri/drisw.c
+++ b/src/gallium/state_trackers/dri/drisw.c
@@ -50,7 +50,7 @@
DEBUG_GET_ONCE_BOOL_OPTION(swrast_no_present, "SWRAST_NO_PRESENT", FALSE);
static boolean swrast_no_present = FALSE;
-static INLINE void
+static inline void
get_drawable_info(__DRIdrawable *dPriv, int *x, int *y, int *w, int *h)
{
__DRIscreen *sPriv = dPriv->driScreenPriv;
@@ -61,7 +61,7 @@ get_drawable_info(__DRIdrawable *dPriv, int *x, int *y, int *w, int *h)
dPriv->loaderPrivate);
}
-static INLINE void
+static inline void
put_image(__DRIdrawable *dPriv, void *data, unsigned width, unsigned height)
{
__DRIscreen *sPriv = dPriv->driScreenPriv;
@@ -72,7 +72,7 @@ put_image(__DRIdrawable *dPriv, void *data, unsigned width, unsigned height)
data, dPriv->loaderPrivate);
}
-static INLINE void
+static inline void
put_image2(__DRIdrawable *dPriv, void *data, int x, int y,
unsigned width, unsigned height, unsigned stride)
{
@@ -84,7 +84,7 @@ put_image2(__DRIdrawable *dPriv, void *data, int x, int y,
data, dPriv->loaderPrivate);
}
-static INLINE void
+static inline void
get_image(__DRIdrawable *dPriv, int x, int y, int width, int height, void *data)
{
__DRIscreen *sPriv = dPriv->driScreenPriv;
@@ -123,7 +123,7 @@ drisw_put_image2(struct dri_drawable *drawable,
put_image2(dPriv, data, x, y, width, height, stride);
}
-static INLINE void
+static inline void
drisw_present_texture(__DRIdrawable *dPriv,
struct pipe_resource *ptex, struct pipe_box *sub_box)
{
@@ -136,7 +136,7 @@ drisw_present_texture(__DRIdrawable *dPriv,
screen->base.screen->flush_frontbuffer(screen->base.screen, ptex, 0, 0, drawable, sub_box);
}
-static INLINE void
+static inline void
drisw_invalidate_drawable(__DRIdrawable *dPriv)
{
struct dri_drawable *drawable = dri_drawable(dPriv);
@@ -146,7 +146,7 @@ drisw_invalidate_drawable(__DRIdrawable *dPriv)
p_atomic_inc(&drawable->base.stamp);
}
-static INLINE void
+static inline void
drisw_copy_to_front(__DRIdrawable * dPriv,
struct pipe_resource *ptex)
{
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.h b/src/gallium/state_trackers/glx/xlib/xm_api.h
index 6d0bc3f4d81..ffdffc0940f 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.h
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.h
@@ -378,13 +378,13 @@ xmesa_check_buffer_size(XMesaBuffer b);
extern void
xmesa_destroy_buffers_on_display(Display *dpy);
-static INLINE GLuint
+static inline GLuint
xmesa_buffer_width(XMesaBuffer b)
{
return b->width;
}
-static INLINE GLuint
+static inline GLuint
xmesa_buffer_height(XMesaBuffer b)
{
return b->height;
diff --git a/src/gallium/state_trackers/glx/xlib/xm_st.c b/src/gallium/state_trackers/glx/xlib/xm_st.c
index 7f73a3a44fe..9d0f2d25025 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_st.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_st.c
@@ -46,7 +46,7 @@ struct xmesa_st_framebuffer {
};
-static INLINE struct xmesa_st_framebuffer *
+static inline struct xmesa_st_framebuffer *
xmesa_st_framebuffer(struct st_framebuffer_iface *stfbi)
{
return (struct xmesa_st_framebuffer *) stfbi->st_manager_private;
diff --git a/src/gallium/state_trackers/hgl/hgl.c b/src/gallium/state_trackers/hgl/hgl.c
index 1e804c07e6b..0e122fe86ae 100644
--- a/src/gallium/state_trackers/hgl/hgl.c
+++ b/src/gallium/state_trackers/hgl/hgl.c
@@ -32,7 +32,7 @@
// Perform a safe void to hgl_context cast
-static INLINE struct hgl_context*
+static inline struct hgl_context*
hgl_st_context(struct st_context_iface *stctxi)
{
struct hgl_context* context;
@@ -44,7 +44,7 @@ hgl_st_context(struct st_context_iface *stctxi)
// Perform a safe void to hgl_buffer cast
-static INLINE struct hgl_buffer*
+static inline struct hgl_buffer*
hgl_st_framebuffer(struct st_framebuffer_iface *stfbi)
{
struct hgl_buffer* buffer;
diff --git a/src/gallium/state_trackers/nine/adapter9.c b/src/gallium/state_trackers/nine/adapter9.c
index 9d6d6590e00..c5ffcb15a18 100644
--- a/src/gallium/state_trackers/nine/adapter9.c
+++ b/src/gallium/state_trackers/nine/adapter9.c
@@ -163,7 +163,7 @@ NineAdapter9_GetAdapterIdentifier( struct NineAdapter9 *This,
return D3D_OK;
}
-static INLINE boolean
+static inline boolean
backbuffer_format( D3DFORMAT dfmt,
D3DFORMAT bfmt,
boolean win )
@@ -220,7 +220,7 @@ NineAdapter9_CheckDeviceType( struct NineAdapter9 *This,
return D3D_OK;
}
-static INLINE boolean
+static inline boolean
display_format( D3DFORMAT fmt,
boolean win )
{
diff --git a/src/gallium/state_trackers/nine/adapter9.h b/src/gallium/state_trackers/nine/adapter9.h
index df85b2dcc28..2129ec8edc0 100644
--- a/src/gallium/state_trackers/nine/adapter9.h
+++ b/src/gallium/state_trackers/nine/adapter9.h
@@ -49,7 +49,7 @@ struct NineAdapter9
struct d3dadapter9_context *ctx;
};
-static INLINE struct NineAdapter9 *
+static inline struct NineAdapter9 *
NineAdapter9( void *data )
{
return (struct NineAdapter9 *)data;
diff --git a/src/gallium/state_trackers/nine/authenticatedchannel9.h b/src/gallium/state_trackers/nine/authenticatedchannel9.h
index 7d374f67fca..63cb2269db4 100644
--- a/src/gallium/state_trackers/nine/authenticatedchannel9.h
+++ b/src/gallium/state_trackers/nine/authenticatedchannel9.h
@@ -29,7 +29,7 @@ struct NineAuthenticatedChannel9
{
struct NineUnknown base;
};
-static INLINE struct NineAuthenticatedChannel9 *
+static inline struct NineAuthenticatedChannel9 *
NineAuthenticatedChannel9( void *data )
{
return (struct NineAuthenticatedChannel9 *)data;
diff --git a/src/gallium/state_trackers/nine/basetexture9.h b/src/gallium/state_trackers/nine/basetexture9.h
index c803280decd..9d6fb0c002a 100644
--- a/src/gallium/state_trackers/nine/basetexture9.h
+++ b/src/gallium/state_trackers/nine/basetexture9.h
@@ -53,7 +53,7 @@ struct NineBaseTexture9
DWORD lod_resident;
} managed;
};
-static INLINE struct NineBaseTexture9 *
+static inline struct NineBaseTexture9 *
NineBaseTexture9( void *data )
{
return (struct NineBaseTexture9 *)data;
@@ -107,7 +107,7 @@ HRESULT
NineBaseTexture9_UpdateSamplerView( struct NineBaseTexture9 *This,
const int sRGB );
-static INLINE void
+static inline void
NineBaseTexture9_Validate( struct NineBaseTexture9 *This )
{
DBG_FLAG(DBG_BASETEXTURE, "This=%p dirty=%i dirty_mip=%i lod=%u/%u\n",
@@ -119,7 +119,7 @@ NineBaseTexture9_Validate( struct NineBaseTexture9 *This )
NineBaseTexture9_GenerateMipSubLevels(This);
}
-static INLINE struct pipe_sampler_view *
+static inline struct pipe_sampler_view *
NineBaseTexture9_GetSamplerView( struct NineBaseTexture9 *This, const int sRGB )
{
if (!This->view[sRGB])
@@ -131,7 +131,7 @@ NineBaseTexture9_GetSamplerView( struct NineBaseTexture9 *This, const int sRGB )
void
NineBaseTexture9_Dump( struct NineBaseTexture9 *This );
#else
-static INLINE void
+static inline void
NineBaseTexture9_Dump( struct NineBaseTexture9 *This ) { }
#endif
diff --git a/src/gallium/state_trackers/nine/cryptosession9.h b/src/gallium/state_trackers/nine/cryptosession9.h
index 660d246bfa0..d1eab72eb37 100644
--- a/src/gallium/state_trackers/nine/cryptosession9.h
+++ b/src/gallium/state_trackers/nine/cryptosession9.h
@@ -29,7 +29,7 @@ struct NineCryptoSession9
{
struct NineUnknown base;
};
-static INLINE struct NineCryptoSession9 *
+static inline struct NineCryptoSession9 *
NineCryptoSession9( void *data )
{
return (struct NineCryptoSession9 *)data;
diff --git a/src/gallium/state_trackers/nine/cubetexture9.h b/src/gallium/state_trackers/nine/cubetexture9.h
index ee7e275e4d8..999715c0a74 100644
--- a/src/gallium/state_trackers/nine/cubetexture9.h
+++ b/src/gallium/state_trackers/nine/cubetexture9.h
@@ -33,7 +33,7 @@ struct NineCubeTexture9
struct pipe_box dirty_rect[6]; /* covers all mip levels */
uint8_t *managed_buffer;
};
-static INLINE struct NineCubeTexture9 *
+static inline struct NineCubeTexture9 *
NineCubeTexture9( void *data )
{
return (struct NineCubeTexture9 *)data;
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 466b9376ce5..55948cbb67f 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -510,7 +510,7 @@ NineDevice9_GetCaps( struct NineDevice9 *This )
return &This->caps;
}
-static INLINE void
+static inline void
NineDevice9_PauseRecording( struct NineDevice9 *This )
{
if (This->record) {
@@ -519,7 +519,7 @@ NineDevice9_PauseRecording( struct NineDevice9 *This )
}
}
-static INLINE void
+static inline void
NineDevice9_ResumeRecording( struct NineDevice9 *This )
{
if (This->record) {
@@ -2697,7 +2697,7 @@ NineDevice9_GetNPatchMode( struct NineDevice9 *This )
STUB(0);
}
-static INLINE void
+static inline void
init_draw_info(struct pipe_draw_info *info,
struct NineDevice9 *dev, D3DPRIMITIVETYPE type, UINT count)
{
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index c66a273bf2e..74607451c5f 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -132,7 +132,7 @@ struct NineDevice9
* is not bound to anything by the vertex declaration */
struct pipe_resource *dummy_vbo;
};
-static INLINE struct NineDevice9 *
+static inline struct NineDevice9 *
NineDevice9( void *data )
{
return (struct NineDevice9 *)data;
diff --git a/src/gallium/state_trackers/nine/device9ex.h b/src/gallium/state_trackers/nine/device9ex.h
index a31c720553a..8375622d8a1 100644
--- a/src/gallium/state_trackers/nine/device9ex.h
+++ b/src/gallium/state_trackers/nine/device9ex.h
@@ -29,7 +29,7 @@ struct NineDevice9Ex
{
struct NineDevice9 base;
};
-static INLINE struct NineDevice9Ex *
+static inline struct NineDevice9Ex *
NineDevice9Ex( void *data )
{
return (struct NineDevice9Ex *)data;
diff --git a/src/gallium/state_trackers/nine/device9video.h b/src/gallium/state_trackers/nine/device9video.h
index ca041e55fbc..fc2faeb624a 100644
--- a/src/gallium/state_trackers/nine/device9video.h
+++ b/src/gallium/state_trackers/nine/device9video.h
@@ -29,7 +29,7 @@ struct NineDevice9Video
{
struct NineUnknown base;
};
-static INLINE struct NineDevice9Video *
+static inline struct NineDevice9Video *
NineDevice9Video( void *data )
{
return (struct NineDevice9Video *)data;
diff --git a/src/gallium/state_trackers/nine/indexbuffer9.h b/src/gallium/state_trackers/nine/indexbuffer9.h
index 0982a93fbb1..f10578f47ba 100644
--- a/src/gallium/state_trackers/nine/indexbuffer9.h
+++ b/src/gallium/state_trackers/nine/indexbuffer9.h
@@ -45,7 +45,7 @@ struct NineIndexBuffer9
D3DINDEXBUFFER_DESC desc;
};
-static INLINE struct NineIndexBuffer9 *
+static inline struct NineIndexBuffer9 *
NineIndexBuffer9( void *data )
{
return (struct NineIndexBuffer9 *)data;
diff --git a/src/gallium/state_trackers/nine/iunknown.h b/src/gallium/state_trackers/nine/iunknown.h
index 4c83ddd8e4e..628d984553e 100644
--- a/src/gallium/state_trackers/nine/iunknown.h
+++ b/src/gallium/state_trackers/nine/iunknown.h
@@ -52,7 +52,7 @@ struct NineUnknown
void (*dtor)(void *data); /* top-level dtor */
};
-static INLINE struct NineUnknown *
+static inline struct NineUnknown *
NineUnknown( void *data )
{
return (struct NineUnknown *)data;
@@ -94,14 +94,14 @@ NineUnknown_GetDevice( struct NineUnknown *This,
/*** Nine private methods ***/
-static INLINE void
+static inline void
NineUnknown_Destroy( struct NineUnknown *This )
{
assert(!(This->refs | This->bind));
This->dtor(This);
}
-static INLINE UINT
+static inline UINT
NineUnknown_Bind( struct NineUnknown *This )
{
UINT b = ++This->bind;
@@ -113,7 +113,7 @@ NineUnknown_Bind( struct NineUnknown *This )
return b;
}
-static INLINE UINT
+static inline UINT
NineUnknown_Unbind( struct NineUnknown *This )
{
UINT b = --This->bind;
@@ -129,7 +129,7 @@ NineUnknown_Unbind( struct NineUnknown *This )
return b;
}
-static INLINE void
+static inline void
NineUnknown_ConvertRefToBind( struct NineUnknown *This )
{
NineUnknown_Bind(This);
@@ -137,7 +137,7 @@ NineUnknown_ConvertRefToBind( struct NineUnknown *This )
}
/* Detach from container. */
-static INLINE void
+static inline void
NineUnknown_Detach( struct NineUnknown *This )
{
assert(This->container && !This->forward);
diff --git a/src/gallium/state_trackers/nine/nine_dump.h b/src/gallium/state_trackers/nine/nine_dump.h
index d0d4a9eb3aa..a0ffe7bf6ab 100644
--- a/src/gallium/state_trackers/nine/nine_dump.h
+++ b/src/gallium/state_trackers/nine/nine_dump.h
@@ -31,19 +31,19 @@ nine_dump_D3DTSS_value(unsigned, D3DTEXTURESTAGESTATETYPE, DWORD);
#else /* !DEBUG */
-static INLINE void
+static inline void
nine_dump_D3DADAPTER_IDENTIFIER9(unsigned ch, const D3DADAPTER_IDENTIFIER9 *id)
{ }
-static INLINE void
+static inline void
nine_dump_D3DCAPS9(unsigned ch, const D3DCAPS9 *caps)
{ }
-static INLINE void
+static inline void
nine_dump_D3DLIGHT9(unsigned ch, const D3DLIGHT9 *light)
{ }
-static INLINE void
+static inline void
nine_dump_D3DMATERIAL9(unsigned ch, const D3DMATERIAL9 *mat)
{ }
-static INLINE void
+static inline void
nine_dump_D3DTSS_value(unsigned ch, D3DTEXTURESTAGESTATETYPE tss, DWORD value)
{ }
diff --git a/src/gallium/state_trackers/nine/nine_ff.c b/src/gallium/state_trackers/nine/nine_ff.c
index c2213e6bf11..8a53f0d9038 100644
--- a/src/gallium/state_trackers/nine/nine_ff.c
+++ b/src/gallium/state_trackers/nine/nine_ff.c
@@ -295,7 +295,7 @@ struct vs_build_ctx
struct ureg_src mtlE;
};
-static INLINE unsigned
+static inline unsigned
get_texcoord_sn(struct pipe_screen *screen)
{
if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
@@ -303,7 +303,7 @@ get_texcoord_sn(struct pipe_screen *screen)
return TGSI_SEMANTIC_GENERIC;
}
-static INLINE struct ureg_src
+static inline struct ureg_src
build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
{
const unsigned i = vs->num_inputs++;
@@ -313,7 +313,7 @@ build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
}
/* NOTE: dst may alias src */
-static INLINE void
+static inline void
ureg_normalize3(struct ureg_program *ureg,
struct ureg_dst dst, struct ureg_src src,
struct ureg_dst tmp)
@@ -1033,7 +1033,7 @@ static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
}
}
-static INLINE boolean
+static inline boolean
is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
{
return !dst.WriteMask ||
@@ -1973,7 +1973,7 @@ nine_D3DMATRIX_print(const D3DMATRIX *M)
}
*/
-static INLINE float
+static inline float
nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
{
return A->m[r][0] * B->m[0][c] +
@@ -1982,7 +1982,7 @@ nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
A->m[r][3] * B->m[3][c];
}
-static INLINE float
+static inline float
nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
{
return v->x * M->m[0][c] +
@@ -1991,7 +1991,7 @@ nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
1.0f * M->m[3][c];
}
-static INLINE float
+static inline float
nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
{
return v->x * M->m[0][c] +
diff --git a/src/gallium/state_trackers/nine/nine_helpers.c b/src/gallium/state_trackers/nine/nine_helpers.c
index ed179f9aedc..98c2ae30eba 100644
--- a/src/gallium/state_trackers/nine/nine_helpers.c
+++ b/src/gallium/state_trackers/nine/nine_helpers.c
@@ -49,7 +49,7 @@ nine_range_pool_more(struct nine_range_pool *pool)
return pool->free;
}
-static INLINE struct nine_range *
+static inline struct nine_range *
nine_range_pool_get(struct nine_range_pool *pool, int16_t bgn, int16_t end)
{
struct nine_range *r = pool->free;
@@ -62,7 +62,7 @@ nine_range_pool_get(struct nine_range_pool *pool, int16_t bgn, int16_t end)
return r;
}
-static INLINE void
+static inline void
nine_ranges_coalesce(struct nine_range *r, struct nine_range_pool *pool)
{
struct nine_range *n;
diff --git a/src/gallium/state_trackers/nine/nine_helpers.h b/src/gallium/state_trackers/nine/nine_helpers.h
index 6751a822ec2..b382c5b72b3 100644
--- a/src/gallium/state_trackers/nine/nine_helpers.h
+++ b/src/gallium/state_trackers/nine/nine_helpers.h
@@ -123,7 +123,7 @@ static inline void _nine_bind(void **dst, void *obj)
} \
return D3D_OK
-static INLINE float asfloat(DWORD value)
+static inline float asfloat(DWORD value)
{
union {
float f;
@@ -149,14 +149,14 @@ struct nine_range_pool
unsigned num_slabs_max;
};
-static INLINE void
+static inline void
nine_range_pool_put(struct nine_range_pool *pool, struct nine_range *r)
{
r->next = pool->free;
pool->free = r;
}
-static INLINE void
+static inline void
nine_range_pool_put_chain(struct nine_range_pool *pool,
struct nine_range *head,
struct nine_range *tail)
diff --git a/src/gallium/state_trackers/nine/nine_pipe.c b/src/gallium/state_trackers/nine/nine_pipe.c
index 0da0b20263d..4cf37b9f59c 100644
--- a/src/gallium/state_trackers/nine/nine_pipe.c
+++ b/src/gallium/state_trackers/nine/nine_pipe.c
@@ -118,7 +118,7 @@ nine_convert_rasterizer_state(struct cso_context *ctx, const DWORD *rs)
cso_set_rasterizer(ctx, &rast);
}
-static INLINE void
+static inline void
nine_convert_blend_state_fixup(struct pipe_blend_state *blend, const DWORD *rs)
{
if (unlikely(rs[D3DRS_SRCBLEND] == D3DBLEND_BOTHSRCALPHA ||
diff --git a/src/gallium/state_trackers/nine/nine_pipe.h b/src/gallium/state_trackers/nine/nine_pipe.h
index 91da5630122..43a7737cdf9 100644
--- a/src/gallium/state_trackers/nine/nine_pipe.h
+++ b/src/gallium/state_trackers/nine/nine_pipe.h
@@ -43,7 +43,7 @@ void nine_convert_sampler_state(struct cso_context *, int idx, const DWORD *);
void nine_pipe_context_clear(struct NineDevice9 *);
-static INLINE unsigned d3dlock_buffer_to_pipe_transfer_usage(DWORD Flags)
+static inline unsigned d3dlock_buffer_to_pipe_transfer_usage(DWORD Flags)
{
unsigned usage;
@@ -70,7 +70,7 @@ static INLINE unsigned d3dlock_buffer_to_pipe_transfer_usage(DWORD Flags)
return usage;
}
-static INLINE void
+static inline void
rect_to_pipe_box(struct pipe_box *dst, const RECT *src)
{
dst->x = src->left;
@@ -81,7 +81,7 @@ rect_to_pipe_box(struct pipe_box *dst, const RECT *src)
dst->depth = 1;
}
-static INLINE boolean
+static inline boolean
rect_to_pipe_box_clamp(struct pipe_box *dst, const RECT *src)
{
rect_to_pipe_box(dst, src);
@@ -95,7 +95,7 @@ rect_to_pipe_box_clamp(struct pipe_box *dst, const RECT *src)
return FALSE;
}
-static INLINE boolean
+static inline boolean
rect_to_pipe_box_flip(struct pipe_box *dst, const RECT *src)
{
rect_to_pipe_box(dst, src);
@@ -107,7 +107,7 @@ rect_to_pipe_box_flip(struct pipe_box *dst, const RECT *src)
return TRUE;
}
-static INLINE void
+static inline void
rect_to_pipe_box_xy_only(struct pipe_box *dst, const RECT *src)
{
user_warn(src->left > src->right || src->top > src->bottom);
@@ -118,7 +118,7 @@ rect_to_pipe_box_xy_only(struct pipe_box *dst, const RECT *src)
dst->height = src->bottom - src->top;
}
-static INLINE boolean
+static inline boolean
rect_to_pipe_box_xy_only_clamp(struct pipe_box *dst, const RECT *src)
{
rect_to_pipe_box_xy_only(dst, src);
@@ -132,7 +132,7 @@ rect_to_pipe_box_xy_only_clamp(struct pipe_box *dst, const RECT *src)
return FALSE;
}
-static INLINE void
+static inline void
rect_to_g3d_u_rect(struct u_rect *dst, const RECT *src)
{
user_warn(src->left > src->right || src->top > src->bottom);
@@ -143,7 +143,7 @@ rect_to_g3d_u_rect(struct u_rect *dst, const RECT *src)
dst->y1 = src->bottom;
}
-static INLINE void
+static inline void
d3dbox_to_pipe_box(struct pipe_box *dst, const D3DBOX *src)
{
user_warn(src->Left > src->Right);
@@ -158,13 +158,13 @@ d3dbox_to_pipe_box(struct pipe_box *dst, const D3DBOX *src)
dst->depth = src->Back - src->Front;
}
-static INLINE D3DFORMAT
+static inline D3DFORMAT
pipe_to_d3d9_format(enum pipe_format format)
{
return nine_pipe_to_d3d9_format_map[format];
}
-static INLINE boolean
+static inline boolean
depth_stencil_format( D3DFORMAT fmt )
{
static D3DFORMAT allowed[] = {
@@ -190,7 +190,7 @@ depth_stencil_format( D3DFORMAT fmt )
return FALSE;
}
-static INLINE unsigned
+static inline unsigned
d3d9_get_pipe_depth_format_bindings(D3DFORMAT format)
{
switch (format) {
@@ -215,7 +215,7 @@ d3d9_get_pipe_depth_format_bindings(D3DFORMAT format)
}
}
-static INLINE enum pipe_format
+static inline enum pipe_format
d3d9_to_pipe_format_internal(D3DFORMAT format)
{
if (format <= D3DFMT_A2B10G10R10_XR_BIAS)
@@ -257,7 +257,7 @@ d3d9_to_pipe_format_internal(D3DFORMAT format)
screen->is_format_supported(screen, pipe_format, target, \
sample_count, bindings)
-static INLINE enum pipe_format
+static inline enum pipe_format
d3d9_to_pipe_format_checked(struct pipe_screen *screen,
D3DFORMAT format,
enum pipe_texture_target target,
@@ -298,7 +298,7 @@ d3d9_to_pipe_format_checked(struct pipe_screen *screen,
return PIPE_FORMAT_NONE;
}
-static INLINE const char *
+static inline const char *
d3dformat_to_string(D3DFORMAT fmt)
{
switch (fmt) {
@@ -381,7 +381,7 @@ d3dformat_to_string(D3DFORMAT fmt)
return "Unknown";
}
-static INLINE unsigned
+static inline unsigned
nine_fvf_stride( DWORD fvf )
{
unsigned texcount, i, size = 0;
@@ -428,7 +428,7 @@ nine_fvf_stride( DWORD fvf )
return size;
}
-static INLINE void
+static inline void
d3dcolor_to_rgba(float *rgba, D3DCOLOR color)
{
rgba[0] = (float)((color >> 16) & 0xFF) / 0xFF;
@@ -437,13 +437,13 @@ d3dcolor_to_rgba(float *rgba, D3DCOLOR color)
rgba[3] = (float)((color >> 24) & 0xFF) / 0xFF;
}
-static INLINE void
+static inline void
d3dcolor_to_pipe_color_union(union pipe_color_union *rgba, D3DCOLOR color)
{
d3dcolor_to_rgba(&rgba->f[0], color);
}
-static INLINE unsigned
+static inline unsigned
d3dprimitivetype_to_pipe_prim(D3DPRIMITIVETYPE prim)
{
switch (prim) {
@@ -459,7 +459,7 @@ d3dprimitivetype_to_pipe_prim(D3DPRIMITIVETYPE prim)
}
}
-static INLINE unsigned
+static inline unsigned
prim_count_to_vertex_count(D3DPRIMITIVETYPE prim, UINT count)
{
switch (prim) {
@@ -475,7 +475,7 @@ prim_count_to_vertex_count(D3DPRIMITIVETYPE prim, UINT count)
}
}
-static INLINE unsigned
+static inline unsigned
d3dcmpfunc_to_pipe_func(D3DCMPFUNC func)
{
switch (func) {
@@ -494,7 +494,7 @@ d3dcmpfunc_to_pipe_func(D3DCMPFUNC func)
}
}
-static INLINE unsigned
+static inline unsigned
d3dstencilop_to_pipe_stencil_op(D3DSTENCILOP op)
{
switch (op) {
@@ -511,7 +511,7 @@ d3dstencilop_to_pipe_stencil_op(D3DSTENCILOP op)
}
}
-static INLINE unsigned
+static inline unsigned
d3dcull_to_pipe_face(D3DCULL cull)
{
switch (cull) {
@@ -524,7 +524,7 @@ d3dcull_to_pipe_face(D3DCULL cull)
}
}
-static INLINE unsigned
+static inline unsigned
d3dfillmode_to_pipe_polygon_mode(D3DFILLMODE mode)
{
switch (mode) {
@@ -538,7 +538,7 @@ d3dfillmode_to_pipe_polygon_mode(D3DFILLMODE mode)
}
}
-static INLINE unsigned
+static inline unsigned
d3dblendop_to_pipe_blend(D3DBLENDOP op)
{
switch (op) {
@@ -557,7 +557,7 @@ d3dblendop_to_pipe_blend(D3DBLENDOP op)
* Drivers may check RGB and ALPHA factors for equality so we should not
* simply substitute the ALPHA variants.
*/
-static INLINE unsigned
+static inline unsigned
d3dblend_alpha_to_pipe_blendfactor(D3DBLEND b)
{
switch (b) {
@@ -584,7 +584,7 @@ d3dblend_alpha_to_pipe_blendfactor(D3DBLEND b)
}
}
-static INLINE unsigned
+static inline unsigned
d3dblend_color_to_pipe_blendfactor(D3DBLEND b)
{
switch (b) {
@@ -611,7 +611,7 @@ d3dblend_color_to_pipe_blendfactor(D3DBLEND b)
}
}
-static INLINE unsigned
+static inline unsigned
d3dtextureaddress_to_pipe_tex_wrap(D3DTEXTUREADDRESS addr)
{
switch (addr) {
@@ -626,7 +626,7 @@ d3dtextureaddress_to_pipe_tex_wrap(D3DTEXTUREADDRESS addr)
}
}
-static INLINE unsigned
+static inline unsigned
d3dtexturefiltertype_to_pipe_tex_filter(D3DTEXTUREFILTERTYPE filter)
{
switch (filter) {
@@ -644,7 +644,7 @@ d3dtexturefiltertype_to_pipe_tex_filter(D3DTEXTUREFILTERTYPE filter)
}
}
-static INLINE unsigned
+static inline unsigned
d3dtexturefiltertype_to_pipe_tex_mipfilter(D3DTEXTUREFILTERTYPE filter)
{
switch (filter) {
@@ -662,7 +662,7 @@ d3dtexturefiltertype_to_pipe_tex_mipfilter(D3DTEXTUREFILTERTYPE filter)
}
}
-static INLINE unsigned nine_format_get_stride(enum pipe_format format,
+static inline unsigned nine_format_get_stride(enum pipe_format format,
unsigned width)
{
unsigned stride = util_format_get_stride(format, width);
@@ -670,7 +670,7 @@ static INLINE unsigned nine_format_get_stride(enum pipe_format format,
return align(stride, 4);
}
-static INLINE unsigned nine_format_get_level_alloc_size(enum pipe_format format,
+static inline unsigned nine_format_get_level_alloc_size(enum pipe_format format,
unsigned width,
unsigned height,
unsigned level)
@@ -684,7 +684,7 @@ static INLINE unsigned nine_format_get_level_alloc_size(enum pipe_format format,
return size;
}
-static INLINE unsigned nine_format_get_size_and_offsets(enum pipe_format format,
+static inline unsigned nine_format_get_size_and_offsets(enum pipe_format format,
unsigned *offsets,
unsigned width,
unsigned height,
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 22a58825f78..754f5af6b8e 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -43,7 +43,7 @@ struct shader_translator;
typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
-static INLINE const char *d3dsio_to_string(unsigned opcode);
+static inline const char *d3dsio_to_string(unsigned opcode);
#define NINED3D_SM1_VS 0xfffe
@@ -239,7 +239,7 @@ struct sm1_dst_param
BYTE type;
};
-static INLINE void
+static inline void
assert_replicate_swizzle(const struct ureg_src *reg)
{
assert(reg->SwizzleY == reg->SwizzleX &&
@@ -608,7 +608,7 @@ tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
tx_scratch(struct shader_translator *tx)
{
if (tx->num_scratch >= Elements(tx->regs.t)) {
@@ -620,13 +620,13 @@ tx_scratch(struct shader_translator *tx)
return tx->regs.t[tx->num_scratch++];
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator *tx)
{
return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
}
-static INLINE struct ureg_src
+static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)
{
struct ureg_src src = ureg_src(dst);
@@ -636,7 +636,7 @@ tx_src_scalar(struct ureg_dst dst)
return src;
}
-static INLINE void
+static inline void
tx_temp_alloc(struct shader_translator *tx, INT idx)
{
assert(idx >= 0);
@@ -654,7 +654,7 @@ tx_temp_alloc(struct shader_translator *tx, INT idx)
tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
}
-static INLINE void
+static inline void
tx_addr_alloc(struct shader_translator *tx, INT idx)
{
assert(idx == 0);
@@ -664,7 +664,7 @@ tx_addr_alloc(struct shader_translator *tx, INT idx)
tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
}
-static INLINE void
+static inline void
tx_pred_alloc(struct shader_translator *tx, INT idx)
{
assert(idx == 0);
@@ -672,7 +672,7 @@ tx_pred_alloc(struct shader_translator *tx, INT idx)
tx->regs.p = ureg_DECL_predicate(tx->ureg);
}
-static INLINE void
+static inline void
tx_texcoord_alloc(struct shader_translator *tx, INT idx)
{
assert(IS_PS);
@@ -682,7 +682,7 @@ tx_texcoord_alloc(struct shader_translator *tx, INT idx)
TGSI_INTERPOLATE_PERSPECTIVE);
}
-static INLINE unsigned *
+static inline unsigned *
tx_bgnloop(struct shader_translator *tx)
{
tx->loop_depth++;
@@ -692,7 +692,7 @@ tx_bgnloop(struct shader_translator *tx)
return &tx->loop_labels[tx->loop_depth - 1];
}
-static INLINE unsigned *
+static inline unsigned *
tx_endloop(struct shader_translator *tx)
{
assert(tx->loop_depth);
@@ -741,7 +741,7 @@ tx_get_loopal(struct shader_translator *tx)
return ureg_src_undef();
}
-static INLINE unsigned *
+static inline unsigned *
tx_cond(struct shader_translator *tx)
{
assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
@@ -749,14 +749,14 @@ tx_cond(struct shader_translator *tx)
return &tx->cond_labels[tx->cond_depth - 1];
}
-static INLINE unsigned *
+static inline unsigned *
tx_elsecond(struct shader_translator *tx)
{
assert(tx->cond_depth);
return &tx->cond_labels[tx->cond_depth - 1];
}
-static INLINE void
+static inline void
tx_endcond(struct shader_translator *tx)
{
assert(tx->cond_depth);
@@ -765,7 +765,7 @@ tx_endcond(struct shader_translator *tx)
ureg_get_instruction_number(tx->ureg));
}
-static INLINE struct ureg_dst
+static inline struct ureg_dst
nine_ureg_dst_register(unsigned file, int index)
{
return ureg_dst(ureg_src_register(file, index));
@@ -1240,7 +1240,7 @@ NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, co
#define VNOTSUPPORTED 0, 0
#define V(maj, min) (((maj) << 8) | (min))
-static INLINE const char *
+static inline const char *
d3dsio_to_string( unsigned opcode )
{
static const char *names[] = {
@@ -1657,7 +1657,7 @@ DECL_SPECIAL(IF)
return D3D_OK;
}
-static INLINE unsigned
+static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)
{
switch (flags) {
@@ -1724,7 +1724,7 @@ static const char *sm1_declusage_names[] =
[D3DDECLUSAGE_SAMPLE] = "SAMPLE"
};
-static INLINE unsigned
+static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic *dcl)
{
return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
@@ -1833,7 +1833,7 @@ sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
#define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
#define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
-static INLINE unsigned
+static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)
{
switch (sampler_type) {
@@ -1846,7 +1846,7 @@ d3dstt_to_tgsi_tex(BYTE sampler_type)
return TGSI_TEXTURE_UNKNOWN;
}
}
-static INLINE unsigned
+static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
{
switch (sampler_type) {
@@ -1859,7 +1859,7 @@ d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
return TGSI_TEXTURE_UNKNOWN;
}
}
-static INLINE unsigned
+static inline unsigned
ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
{
switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
@@ -1884,7 +1884,7 @@ sm1_sampler_type_name(BYTE sampler_type)
}
}
-static INLINE unsigned
+static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
{
switch (sem->Name) {
@@ -2685,7 +2685,7 @@ create_op_info_map(struct shader_translator *tx)
}
}
-static INLINE HRESULT
+static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator *tx)
{
struct ureg_dst dst[1];
@@ -2703,19 +2703,19 @@ NineTranslateInstruction_Generic(struct shader_translator *tx)
return D3D_OK;
}
-static INLINE DWORD
+static inline DWORD
TOKEN_PEEK(struct shader_translator *tx)
{
return *(tx->parse);
}
-static INLINE DWORD
+static inline DWORD
TOKEN_NEXT(struct shader_translator *tx)
{
return *(tx->parse)++;
}
-static INLINE void
+static inline void
TOKEN_JUMP(struct shader_translator *tx)
{
if (tx->parse_next && tx->parse != tx->parse_next) {
@@ -2724,7 +2724,7 @@ TOKEN_JUMP(struct shader_translator *tx)
}
}
-static INLINE boolean
+static inline boolean
sm1_parse_eof(struct shader_translator *tx)
{
return TOKEN_PEEK(tx) == NINED3DSP_END;
@@ -3063,7 +3063,7 @@ tx_dtor(struct shader_translator *tx)
FREE(tx);
}
-static INLINE unsigned
+static inline unsigned
tgsi_processor_from_type(unsigned shader_type)
{
switch (shader_type) {
diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h
index 56c5d99b4d2..ec256c153a9 100644
--- a/src/gallium/state_trackers/nine/nine_shader.h
+++ b/src/gallium/state_trackers/nine/nine_shader.h
@@ -70,19 +70,19 @@ struct nine_shader_info
struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */
};
-static INLINE void
+static inline void
nine_info_mark_const_f_used(struct nine_shader_info *info, int idx)
{
if (info->const_float_slots < (idx + 1))
info->const_float_slots = idx + 1;
}
-static INLINE void
+static inline void
nine_info_mark_const_i_used(struct nine_shader_info *info, int idx)
{
if (info->const_int_slots < (idx + 1))
info->const_int_slots = idx + 1;
}
-static INLINE void
+static inline void
nine_info_mark_const_b_used(struct nine_shader_info *info, int idx)
{
if (info->const_bool_slots < (idx + 1))
@@ -100,7 +100,7 @@ struct nine_shader_variant
uint32_t key;
};
-static INLINE void *
+static inline void *
nine_shader_variant_get(struct nine_shader_variant *list, uint32_t key)
{
while (list->key != key && list->next)
@@ -110,7 +110,7 @@ nine_shader_variant_get(struct nine_shader_variant *list, uint32_t key)
return NULL;
}
-static INLINE boolean
+static inline boolean
nine_shader_variant_add(struct nine_shader_variant *list,
uint32_t key, void *cso)
{
@@ -127,7 +127,7 @@ nine_shader_variant_add(struct nine_shader_variant *list,
return TRUE;
}
-static INLINE void
+static inline void
nine_shader_variants_free(struct nine_shader_variant *list)
{
while (list->next) {
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 435118bc93f..6c835858d18 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -176,7 +176,7 @@ update_viewport(struct NineDevice9 *device)
pipe->set_viewport_states(pipe, 0, 1, &pvport);
}
-static INLINE void
+static inline void
update_scissor(struct NineDevice9 *device)
{
struct pipe_context *pipe = device->pipe;
@@ -184,19 +184,19 @@ update_scissor(struct NineDevice9 *device)
pipe->set_scissor_states(pipe, 0, 1, &device->state.scissor);
}
-static INLINE void
+static inline void
update_blend(struct NineDevice9 *device)
{
nine_convert_blend_state(device->cso, device->state.rs);
}
-static INLINE void
+static inline void
update_dsa(struct NineDevice9 *device)
{
nine_convert_dsa_state(device->cso, device->state.rs);
}
-static INLINE void
+static inline void
update_rasterizer(struct NineDevice9 *device)
{
nine_convert_rasterizer_state(device->cso, device->state.rs);
@@ -294,7 +294,7 @@ update_vertex_elements(struct NineDevice9 *device)
state->changed.stream_freq = 0;
}
-static INLINE uint32_t
+static inline uint32_t
update_shader_variant_keys(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
@@ -332,7 +332,7 @@ update_shader_variant_keys(struct NineDevice9 *device)
return mask;
}
-static INLINE uint32_t
+static inline uint32_t
update_vs(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
@@ -359,7 +359,7 @@ update_vs(struct NineDevice9 *device)
return changed_group;
}
-static INLINE uint32_t
+static inline uint32_t
update_ps(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
@@ -656,7 +656,7 @@ update_vertex_buffers(struct NineDevice9 *device)
state->changed.vtxbuf = 0;
}
-static INLINE void
+static inline void
update_index_buffer(struct NineDevice9 *device)
{
struct pipe_context *pipe = device->pipe;
@@ -677,7 +677,7 @@ validate_textures(struct NineDevice9 *device)
}
}
-static INLINE boolean
+static inline boolean
update_sampler_derived(struct nine_state *state, unsigned s)
{
boolean changed = FALSE;
diff --git a/src/gallium/state_trackers/nine/nineexoverlayextension.h b/src/gallium/state_trackers/nine/nineexoverlayextension.h
index a16d690dc8c..1616ed0532c 100644
--- a/src/gallium/state_trackers/nine/nineexoverlayextension.h
+++ b/src/gallium/state_trackers/nine/nineexoverlayextension.h
@@ -29,7 +29,7 @@ struct Nine9ExOverlayExtension
{
struct NineUnknown base;
};
-static INLINE struct Nine9ExOverlayExtension *
+static inline struct Nine9ExOverlayExtension *
Nine9ExOverlayExtension( void *data )
{
return (struct Nine9ExOverlayExtension *)data;
diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h
index 5e2219c946a..6dad1d1ee76 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.h
+++ b/src/gallium/state_trackers/nine/pixelshader9.h
@@ -47,7 +47,7 @@ struct NinePixelShader9
uint64_t ff_key[6];
};
-static INLINE struct NinePixelShader9 *
+static inline struct NinePixelShader9 *
NinePixelShader9( void *data )
{
return (struct NinePixelShader9 *)data;
diff --git a/src/gallium/state_trackers/nine/query9.c b/src/gallium/state_trackers/nine/query9.c
index 04f4aadabba..3afa9007f61 100644
--- a/src/gallium/state_trackers/nine/query9.c
+++ b/src/gallium/state_trackers/nine/query9.c
@@ -57,7 +57,7 @@ d3dquerytype_to_pipe_query(struct pipe_screen *screen, D3DQUERYTYPE type)
#define GET_DATA_SIZE_CASE2(a, b) case D3DQUERYTYPE_##a: return sizeof(D3DDEVINFO_##b)
#define GET_DATA_SIZE_CASET(a, b) case D3DQUERYTYPE_##a: return sizeof(b)
-static INLINE DWORD
+static inline DWORD
nine_query_result_size(D3DQUERYTYPE type)
{
switch (type) {
diff --git a/src/gallium/state_trackers/nine/query9.h b/src/gallium/state_trackers/nine/query9.h
index ad1ca50f26d..9cc1e317055 100644
--- a/src/gallium/state_trackers/nine/query9.h
+++ b/src/gallium/state_trackers/nine/query9.h
@@ -41,7 +41,7 @@ struct NineQuery9
enum nine_query_state state;
boolean instant; /* true if D3DISSUE_BEGIN is not needed / invalid */
};
-static INLINE struct NineQuery9 *
+static inline struct NineQuery9 *
NineQuery9( void *data )
{
return (struct NineQuery9 *)data;
diff --git a/src/gallium/state_trackers/nine/resource9.h b/src/gallium/state_trackers/nine/resource9.h
index da1dd6320e0..906f90806ce 100644
--- a/src/gallium/state_trackers/nine/resource9.h
+++ b/src/gallium/state_trackers/nine/resource9.h
@@ -46,7 +46,7 @@ struct NineResource9
/* for [GS]etPrivateData/FreePrivateData */
struct util_hash_table *pdata;
};
-static INLINE struct NineResource9 *
+static inline struct NineResource9 *
NineResource9( void *data )
{
return (struct NineResource9 *)data;
diff --git a/src/gallium/state_trackers/nine/stateblock9.h b/src/gallium/state_trackers/nine/stateblock9.h
index bcaf634d933..a580c6a2302 100644
--- a/src/gallium/state_trackers/nine/stateblock9.h
+++ b/src/gallium/state_trackers/nine/stateblock9.h
@@ -43,7 +43,7 @@ struct NineStateBlock9
enum nine_stateblock_type type;
};
-static INLINE struct NineStateBlock9 *
+static inline struct NineStateBlock9 *
NineStateBlock9( void *data )
{
return (struct NineStateBlock9 *)data;
diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c
index e46afd91157..7533cb3a454 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -261,7 +261,7 @@ NineSurface9_GetDesc( struct NineSurface9 *This,
}
/* Add the dirty rects to the source texture */
-INLINE void
+inline void
NineSurface9_AddDirtyRect( struct NineSurface9 *This,
const struct pipe_box *box )
{
@@ -295,7 +295,7 @@ NineSurface9_AddDirtyRect( struct NineSurface9 *This,
}
}
-static INLINE uint8_t *
+static inline uint8_t *
NineSurface9_GetSystemMemPointer(struct NineSurface9 *This, int x, int y)
{
unsigned x_offset = util_format_get_stride(This->base.info.format, x);
diff --git a/src/gallium/state_trackers/nine/surface9.h b/src/gallium/state_trackers/nine/surface9.h
index 2e409558609..73092ab8cf5 100644
--- a/src/gallium/state_trackers/nine/surface9.h
+++ b/src/gallium/state_trackers/nine/surface9.h
@@ -50,7 +50,7 @@ struct NineSurface9
uint8_t *data; /* system memory backing */
unsigned stride; /* for system memory backing */
};
-static INLINE struct NineSurface9 *
+static inline struct NineSurface9 *
NineSurface9( void *data )
{
return (struct NineSurface9 *)data;
@@ -89,7 +89,7 @@ NineSurface9_MarkContainerDirty( struct NineSurface9 *This );
struct pipe_surface *
NineSurface9_CreatePipeSurface( struct NineSurface9 *This, const int sRGB );
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
NineSurface9_GetSurface( struct NineSurface9 *This, int sRGB )
{
if (This->surface[sRGB])
@@ -97,13 +97,13 @@ NineSurface9_GetSurface( struct NineSurface9 *This, int sRGB )
return NineSurface9_CreatePipeSurface(This, sRGB);
}
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
NineSurface9_GetResource( struct NineSurface9 *This )
{
return This->base.resource;
}
-static INLINE void
+static inline void
NineSurface9_SetResource( struct NineSurface9 *This,
struct pipe_resource *resource, unsigned level )
{
@@ -131,7 +131,7 @@ NineSurface9_CopySurface( struct NineSurface9 *This,
const POINT *pDestPoint,
const RECT *pSourceRect );
-static INLINE boolean
+static inline boolean
NineSurface9_IsOffscreenPlain (struct NineSurface9 *This )
{
return This->base.usage == 0 && !This->texture;
@@ -141,7 +141,7 @@ NineSurface9_IsOffscreenPlain (struct NineSurface9 *This )
void
NineSurface9_Dump( struct NineSurface9 *This );
#else
-static INLINE void
+static inline void
NineSurface9_Dump( struct NineSurface9 *This ) { }
#endif
diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c
index c40bc602460..a62e6ad99d8 100644
--- a/src/gallium/state_trackers/nine/swapchain9.c
+++ b/src/gallium/state_trackers/nine/swapchain9.c
@@ -631,7 +631,7 @@ static void pend_present(struct NineSwapChain9 *This,
return;
}
-static INLINE HRESULT
+static inline HRESULT
present( struct NineSwapChain9 *This,
const RECT *pSourceRect,
const RECT *pDestRect,
@@ -726,7 +726,7 @@ bypass_rendering:
BOOL still_draw = FALSE;
fence = swap_fences_see_front(This);
if (fence) {
- still_draw = !This->screen->fence_signalled(This->screen, fence);
+ still_draw = !This->screen->fence_finish(This->screen, fence, 0);
This->screen->fence_reference(This->screen, &fence, NULL);
}
if (still_draw)
diff --git a/src/gallium/state_trackers/nine/swapchain9.h b/src/gallium/state_trackers/nine/swapchain9.h
index 2afd6ab2954..5e48dde5004 100644
--- a/src/gallium/state_trackers/nine/swapchain9.h
+++ b/src/gallium/state_trackers/nine/swapchain9.h
@@ -76,7 +76,7 @@ struct NineSwapChain9
BOOL enable_threadpool;
};
-static INLINE struct NineSwapChain9 *
+static inline struct NineSwapChain9 *
NineSwapChain9( void *data )
{
return (struct NineSwapChain9 *)data;
diff --git a/src/gallium/state_trackers/nine/swapchain9ex.h b/src/gallium/state_trackers/nine/swapchain9ex.h
index bf407836099..075f8835222 100644
--- a/src/gallium/state_trackers/nine/swapchain9ex.h
+++ b/src/gallium/state_trackers/nine/swapchain9ex.h
@@ -29,7 +29,7 @@ struct NineSwapChain9Ex
{
struct NineSwapChain9 base;
};
-static INLINE struct NineSwapChain9Ex *
+static inline struct NineSwapChain9Ex *
NineSwapChain9Ex( void *data )
{
return (struct NineSwapChain9Ex *)data;
diff --git a/src/gallium/state_trackers/nine/texture9.h b/src/gallium/state_trackers/nine/texture9.h
index 65db874b2a3..6f80be9ccde 100644
--- a/src/gallium/state_trackers/nine/texture9.h
+++ b/src/gallium/state_trackers/nine/texture9.h
@@ -33,7 +33,7 @@ struct NineTexture9
struct pipe_box dirty_rect; /* covers all mip levels */
uint8_t *managed_buffer;
};
-static INLINE struct NineTexture9 *
+static inline struct NineTexture9 *
NineTexture9( void *data )
{
return (struct NineTexture9 *)data;
diff --git a/src/gallium/state_trackers/nine/vertexbuffer9.h b/src/gallium/state_trackers/nine/vertexbuffer9.h
index 0d88b839cad..6174de4df08 100644
--- a/src/gallium/state_trackers/nine/vertexbuffer9.h
+++ b/src/gallium/state_trackers/nine/vertexbuffer9.h
@@ -40,7 +40,7 @@ struct NineVertexBuffer9
D3DVERTEXBUFFER_DESC desc;
};
-static INLINE struct NineVertexBuffer9 *
+static inline struct NineVertexBuffer9 *
NineVertexBuffer9( void *data )
{
return (struct NineVertexBuffer9 *)data;
diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.c b/src/gallium/state_trackers/nine/vertexdeclaration9.c
index 9e4cb55bc67..2047b91abc4 100644
--- a/src/gallium/state_trackers/nine/vertexdeclaration9.c
+++ b/src/gallium/state_trackers/nine/vertexdeclaration9.c
@@ -34,7 +34,7 @@
#define DBG_CHANNEL DBG_VERTEXDECLARATION
-static INLINE enum pipe_format decltype_format(BYTE type)
+static inline enum pipe_format decltype_format(BYTE type)
{
switch (type) {
case D3DDECLTYPE_FLOAT1: return PIPE_FORMAT_R32_FLOAT;
@@ -60,7 +60,7 @@ static INLINE enum pipe_format decltype_format(BYTE type)
return PIPE_FORMAT_NONE;
}
-static INLINE unsigned decltype_size(BYTE type)
+static inline unsigned decltype_size(BYTE type)
{
switch (type) {
case D3DDECLTYPE_FLOAT1: return 1 * sizeof(float);
@@ -90,7 +90,7 @@ static INLINE unsigned decltype_size(BYTE type)
* simple lookup table won't work in that case. Let's just wait
* with making this more generic until we need it.
*/
-static INLINE boolean
+static inline boolean
nine_d3ddeclusage_check(unsigned usage, unsigned usage_idx)
{
switch (usage) {
@@ -162,7 +162,7 @@ static const char *nine_declusage_names[] =
[NINE_DECLUSAGE_FOG] = "FOG",
[NINE_DECLUSAGE_NONE] = "(NONE)",
};
-static INLINE const char *
+static inline const char *
nine_declusage_name(unsigned ndcl)
{
return nine_declusage_names[ndcl % NINE_DECLUSAGE_COUNT];
diff --git a/src/gallium/state_trackers/nine/vertexdeclaration9.h b/src/gallium/state_trackers/nine/vertexdeclaration9.h
index a4d4a0445d5..655bcfbf165 100644
--- a/src/gallium/state_trackers/nine/vertexdeclaration9.h
+++ b/src/gallium/state_trackers/nine/vertexdeclaration9.h
@@ -47,7 +47,7 @@ struct NineVertexDeclaration9
D3DVERTEXELEMENT9 *decls;
DWORD fvf;
};
-static INLINE struct NineVertexDeclaration9 *
+static inline struct NineVertexDeclaration9 *
NineVertexDeclaration9( void *data )
{
return (struct NineVertexDeclaration9 *)data;
diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h
index 3495c9f9c55..66c602c7b3c 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.h
+++ b/src/gallium/state_trackers/nine/vertexshader9.h
@@ -56,7 +56,7 @@ struct NineVertexShader9
uint64_t ff_key[2];
};
-static INLINE struct NineVertexShader9 *
+static inline struct NineVertexShader9 *
NineVertexShader9( void *data )
{
return (struct NineVertexShader9 *)data;
diff --git a/src/gallium/state_trackers/nine/volume9.c b/src/gallium/state_trackers/nine/volume9.c
index b34ee07dce9..4dfc5599a8e 100644
--- a/src/gallium/state_trackers/nine/volume9.c
+++ b/src/gallium/state_trackers/nine/volume9.c
@@ -152,7 +152,7 @@ NineVolume9_GetContainer( struct NineVolume9 *This,
return NineUnknown_QueryInterface(NineUnknown(This)->container, riid, ppContainer);
}
-static INLINE void
+static inline void
NineVolume9_MarkContainerDirty( struct NineVolume9 *This )
{
struct NineBaseTexture9 *tex;
@@ -182,13 +182,13 @@ NineVolume9_GetDesc( struct NineVolume9 *This,
return D3D_OK;
}
-static INLINE boolean
+static inline boolean
NineVolume9_IsDirty(struct NineVolume9 *This)
{
return This->dirty_box[0].width != 0;
}
-INLINE void
+inline void
NineVolume9_AddDirtyRegion( struct NineVolume9 *This,
const struct pipe_box *box )
{
@@ -226,7 +226,7 @@ NineVolume9_AddDirtyRegion( struct NineVolume9 *This,
}
}
-static INLINE uint8_t *
+static inline uint8_t *
NineVolume9_GetSystemMemPointer(struct NineVolume9 *This, int x, int y, int z)
{
unsigned x_offset = util_format_get_stride(This->info.format, x);
diff --git a/src/gallium/state_trackers/nine/volume9.h b/src/gallium/state_trackers/nine/volume9.h
index 802836659c2..fae24310a50 100644
--- a/src/gallium/state_trackers/nine/volume9.h
+++ b/src/gallium/state_trackers/nine/volume9.h
@@ -57,7 +57,7 @@ struct NineVolume9
/* for [GS]etPrivateData/FreePrivateData */
struct util_hash_table *pdata;
};
-static INLINE struct NineVolume9 *
+static inline struct NineVolume9 *
NineVolume9( void *data )
{
return (struct NineVolume9 *)data;
@@ -73,7 +73,7 @@ NineVolume9_new( struct NineDevice9 *pDevice,
/*** Nine private ***/
-static INLINE void
+static inline void
NineVolume9_SetResource( struct NineVolume9 *This,
struct pipe_resource *resource, unsigned level )
{
@@ -85,7 +85,7 @@ void
NineVolume9_AddDirtyRegion( struct NineVolume9 *This,
const struct pipe_box *box );
-static INLINE void
+static inline void
NineVolume9_ClearDirtyRegion( struct NineVolume9 *This )
{
memset(&This->dirty_box, 0, sizeof(This->dirty_box));
diff --git a/src/gallium/state_trackers/nine/volumetexture9.h b/src/gallium/state_trackers/nine/volumetexture9.h
index 313fa1a91fb..b8f250ad72e 100644
--- a/src/gallium/state_trackers/nine/volumetexture9.h
+++ b/src/gallium/state_trackers/nine/volumetexture9.h
@@ -32,7 +32,7 @@ struct NineVolumeTexture9
struct NineVolume9 **volumes;
struct pipe_box dirty_box;
};
-static INLINE struct NineVolumeTexture9 *
+static inline struct NineVolumeTexture9 *
NineVolumeTexture9( void *data )
{
return (struct NineVolumeTexture9 *)data;
diff --git a/src/gallium/state_trackers/omx/vid_enc.c b/src/gallium/state_trackers/omx/vid_enc.c
index ae1a98f5be3..2bd0194189f 100644
--- a/src/gallium/state_trackers/omx/vid_enc.c
+++ b/src/gallium/state_trackers/omx/vid_enc.c
@@ -180,6 +180,11 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED))
return OMX_ErrorBadParameter;
+ priv->stacked_frames_num = screen->get_video_param(screen,
+ PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
+ PIPE_VIDEO_ENTRYPOINT_ENCODE,
+ PIPE_VIDEO_CAP_STACKED_FRAMES);
+
priv->s_pipe = screen->context_create(screen, priv->screen);
if (!priv->s_pipe)
return OMX_ErrorInsufficientResources;
@@ -259,6 +264,7 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
LIST_INITHEAD(&priv->free_tasks);
LIST_INITHEAD(&priv->used_tasks);
LIST_INITHEAD(&priv->b_frames);
+ LIST_INITHEAD(&priv->stacked_tasks);
return OMX_ErrorNone;
}
@@ -271,6 +277,7 @@ static OMX_ERRORTYPE vid_enc_Destructor(OMX_COMPONENTTYPE *comp)
enc_ReleaseTasks(&priv->free_tasks);
enc_ReleaseTasks(&priv->used_tasks);
enc_ReleaseTasks(&priv->b_frames);
+ enc_ReleaseTasks(&priv->stacked_tasks);
if (priv->ports) {
for (i = 0; i < priv->sPortTypesParam[OMX_PortDomainVideo].nPorts; ++i) {
@@ -1116,6 +1123,7 @@ static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEAD
struct input_buf_private *inp = buf->pInputPortPrivate;
enum pipe_h264_enc_picture_type picture_type;
struct encode_task *task;
+ unsigned stacked_num = 0;
OMX_ERRORTYPE err;
enc_MoveTasks(&inp->tasks, &priv->free_tasks);
@@ -1127,6 +1135,8 @@ static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEAD
if (buf->nFlags & OMX_BUFFERFLAG_EOS) {
buf->nFilledLen = buf->nAllocLen;
enc_ClearBframes(port, inp);
+ enc_MoveTasks(&priv->stacked_tasks, &inp->tasks);
+ priv->codec->flush(priv->codec);
}
return base_port_SendBufferFunction(port, buf);
}
@@ -1166,7 +1176,16 @@ static OMX_ERRORTYPE vid_enc_EncodeFrame(omx_base_PortType *port, OMX_BUFFERHEAD
/* handle I or P frame */
priv->ref_idx_l0 = priv->ref_idx_l1;
enc_HandleTask(port, task, picture_type);
- LIST_ADDTAIL(&task->list, &inp->tasks);
+ LIST_ADDTAIL(&task->list, &priv->stacked_tasks);
+ LIST_FOR_EACH_ENTRY(task, &priv->stacked_tasks, list) {
+ ++stacked_num;
+ }
+ if (stacked_num == priv->stacked_frames_num) {
+ struct encode_task *t;
+ t = LIST_ENTRY(struct encode_task, priv->stacked_tasks.next, list);
+ LIST_DEL(&t->list);
+ LIST_ADDTAIL(&t->list, &inp->tasks);
+ }
priv->ref_idx_l1 = priv->frame_num++;
/* handle B frames */
diff --git a/src/gallium/state_trackers/omx/vid_enc.h b/src/gallium/state_trackers/omx/vid_enc.h
index c8d192b9c60..a83374450b5 100644
--- a/src/gallium/state_trackers/omx/vid_enc.h
+++ b/src/gallium/state_trackers/omx/vid_enc.h
@@ -73,6 +73,7 @@ DERIVEDCLASS(vid_enc_PrivateType, omx_base_filter_PrivateType)
struct list_head free_tasks; \
struct list_head used_tasks; \
struct list_head b_frames; \
+ struct list_head stacked_tasks; \
OMX_U32 frame_rate; \
OMX_U32 frame_num; \
OMX_U32 pic_order_cnt; \
@@ -86,7 +87,8 @@ DERIVEDCLASS(vid_enc_PrivateType, omx_base_filter_PrivateType)
struct vl_compositor_state cstate; \
struct pipe_video_buffer *scale_buffer[OMX_VID_ENC_NUM_SCALING_BUFFERS]; \
OMX_CONFIG_SCALEFACTORTYPE scale; \
- OMX_U32 current_scale_buffer;
+ OMX_U32 current_scale_buffer; \
+ OMX_U32 stacked_frames_num;
ENDCLASS(vid_enc_PrivateType)
OMX_ERRORTYPE vid_enc_LoaderComponent(stLoaderComponentType *comp);
diff --git a/src/gallium/state_trackers/osmesa/osmesa.c b/src/gallium/state_trackers/osmesa/osmesa.c
index 2d5d096d8ed..0285cb0dac2 100644
--- a/src/gallium/state_trackers/osmesa/osmesa.c
+++ b/src/gallium/state_trackers/osmesa/osmesa.c
@@ -168,7 +168,7 @@ get_st_manager(void)
}
-static INLINE boolean
+static inline boolean
little_endian(void)
{
const unsigned ui = 1;
@@ -292,7 +292,7 @@ osmesa_init_st_visual(struct st_visual *vis,
/**
* Return the osmesa_buffer that corresponds to an st_framebuffer_iface.
*/
-static INLINE struct osmesa_buffer *
+static inline struct osmesa_buffer *
stfbi_to_osbuffer(struct st_framebuffer_iface *stfbi)
{
return (struct osmesa_buffer *) stfbi->st_manager_private;
@@ -886,7 +886,7 @@ static struct name_function functions[] = {
{ "OSMesaDestroyContext", (OSMESAproc) OSMesaDestroyContext },
{ "OSMesaMakeCurrent", (OSMESAproc) OSMesaMakeCurrent },
{ "OSMesaGetCurrentContext", (OSMESAproc) OSMesaGetCurrentContext },
- { "OSMesaPixelsStore", (OSMESAproc) OSMesaPixelStore },
+ { "OSMesaPixelStore", (OSMESAproc) OSMesaPixelStore },
{ "OSMesaGetIntegerv", (OSMESAproc) OSMesaGetIntegerv },
{ "OSMesaGetDepthBuffer", (OSMESAproc) OSMesaGetDepthBuffer },
{ "OSMesaGetColorBuffer", (OSMESAproc) OSMesaGetColorBuffer },
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 0634ba72fda..3233799d650 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -413,6 +413,115 @@ vlVdpDecoderRenderH264(struct pipe_h264_picture_desc *picture,
return VDP_STATUS_OK;
}
+static VdpStatus
+vlVdpDecoderRenderH265(struct pipe_h265_picture_desc *picture,
+ VdpPictureInfoHEVC *picture_info)
+{
+ unsigned i;
+
+ picture->pps->sps->chroma_format_idc = picture_info->chroma_format_idc;
+ picture->pps->sps->separate_colour_plane_flag = picture_info->separate_colour_plane_flag;
+ picture->pps->sps->pic_width_in_luma_samples = picture_info->pic_width_in_luma_samples;
+ picture->pps->sps->pic_height_in_luma_samples = picture_info->pic_height_in_luma_samples;
+ picture->pps->sps->bit_depth_luma_minus8 = picture_info->bit_depth_luma_minus8;
+ picture->pps->sps->bit_depth_chroma_minus8 = picture_info->bit_depth_chroma_minus8;
+ picture->pps->sps->log2_max_pic_order_cnt_lsb_minus4 = picture_info->log2_max_pic_order_cnt_lsb_minus4;
+ picture->pps->sps->sps_max_dec_pic_buffering_minus1 = picture_info->sps_max_dec_pic_buffering_minus1;
+ picture->pps->sps->log2_min_luma_coding_block_size_minus3 = picture_info->log2_min_luma_coding_block_size_minus3;
+ picture->pps->sps->log2_diff_max_min_luma_coding_block_size = picture_info->log2_diff_max_min_luma_coding_block_size;
+ picture->pps->sps->log2_min_transform_block_size_minus2 = picture_info->log2_min_transform_block_size_minus2;
+ picture->pps->sps->log2_diff_max_min_transform_block_size = picture_info->log2_diff_max_min_transform_block_size;
+ picture->pps->sps->max_transform_hierarchy_depth_inter = picture_info->max_transform_hierarchy_depth_inter;
+ picture->pps->sps->max_transform_hierarchy_depth_intra = picture_info->max_transform_hierarchy_depth_intra;
+ picture->pps->sps->scaling_list_enabled_flag = picture_info->scaling_list_enabled_flag;
+ memcpy(picture->pps->sps->ScalingList4x4, picture_info->ScalingList4x4, 6*16);
+ memcpy(picture->pps->sps->ScalingList8x8, picture_info->ScalingList8x8, 6*64);
+ memcpy(picture->pps->sps->ScalingList16x16, picture_info->ScalingList16x16, 6*64);
+ memcpy(picture->pps->sps->ScalingList32x32, picture_info->ScalingList32x32, 2*64);
+ memcpy(picture->pps->sps->ScalingListDCCoeff16x16, picture_info->ScalingListDCCoeff16x16, 6);
+ memcpy(picture->pps->sps->ScalingListDCCoeff32x32, picture_info->ScalingListDCCoeff32x32, 2);
+ picture->pps->sps->amp_enabled_flag = picture_info->amp_enabled_flag;
+ picture->pps->sps->sample_adaptive_offset_enabled_flag = picture_info->sample_adaptive_offset_enabled_flag;
+ picture->pps->sps->pcm_enabled_flag = picture_info->pcm_enabled_flag;
+ picture->pps->sps->pcm_sample_bit_depth_luma_minus1 = picture_info->pcm_sample_bit_depth_luma_minus1;
+ picture->pps->sps->pcm_sample_bit_depth_chroma_minus1 = picture_info->pcm_sample_bit_depth_chroma_minus1;
+ picture->pps->sps->log2_min_pcm_luma_coding_block_size_minus3 = picture_info->log2_min_pcm_luma_coding_block_size_minus3;
+ picture->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size = picture_info->log2_diff_max_min_pcm_luma_coding_block_size;
+ picture->pps->sps->pcm_loop_filter_disabled_flag = picture_info->pcm_loop_filter_disabled_flag;
+ picture->pps->sps->num_short_term_ref_pic_sets = picture_info->num_short_term_ref_pic_sets;
+ picture->pps->sps->long_term_ref_pics_present_flag = picture_info->long_term_ref_pics_present_flag;
+ picture->pps->sps->num_long_term_ref_pics_sps = picture_info->num_long_term_ref_pics_sps;
+ picture->pps->sps->sps_temporal_mvp_enabled_flag = picture_info->sps_temporal_mvp_enabled_flag;
+ picture->pps->sps->strong_intra_smoothing_enabled_flag = picture_info->strong_intra_smoothing_enabled_flag;
+
+ picture->pps->dependent_slice_segments_enabled_flag = picture_info->dependent_slice_segments_enabled_flag;
+ picture->pps->output_flag_present_flag = picture_info->output_flag_present_flag;
+ picture->pps->num_extra_slice_header_bits = picture_info->num_extra_slice_header_bits;
+ picture->pps->sign_data_hiding_enabled_flag = picture_info->sign_data_hiding_enabled_flag;
+ picture->pps->cabac_init_present_flag = picture_info->cabac_init_present_flag;
+ picture->pps->num_ref_idx_l0_default_active_minus1 = picture_info->num_ref_idx_l0_default_active_minus1;
+ picture->pps->num_ref_idx_l1_default_active_minus1 = picture_info->num_ref_idx_l1_default_active_minus1;
+ picture->pps->init_qp_minus26 = picture_info->init_qp_minus26;
+ picture->pps->constrained_intra_pred_flag = picture_info->constrained_intra_pred_flag;
+ picture->pps->transform_skip_enabled_flag = picture_info->transform_skip_enabled_flag;
+ picture->pps->cu_qp_delta_enabled_flag = picture_info->cu_qp_delta_enabled_flag;
+ picture->pps->diff_cu_qp_delta_depth = picture_info->diff_cu_qp_delta_depth;
+ picture->pps->pps_cb_qp_offset = picture_info->pps_cb_qp_offset;
+ picture->pps->pps_cr_qp_offset = picture_info->pps_cr_qp_offset;
+ picture->pps->pps_slice_chroma_qp_offsets_present_flag = picture_info->pps_slice_chroma_qp_offsets_present_flag;
+ picture->pps->weighted_pred_flag = picture_info->weighted_pred_flag;
+ picture->pps->weighted_bipred_flag = picture_info->weighted_bipred_flag;
+ picture->pps->transquant_bypass_enabled_flag = picture_info->transquant_bypass_enabled_flag;
+ picture->pps->tiles_enabled_flag = picture_info->tiles_enabled_flag;
+ picture->pps->entropy_coding_sync_enabled_flag = picture_info->entropy_coding_sync_enabled_flag;
+ picture->pps->num_tile_columns_minus1 = picture_info->num_tile_columns_minus1;
+ picture->pps->num_tile_rows_minus1 = picture_info->num_tile_rows_minus1;
+ picture->pps->uniform_spacing_flag = picture_info->uniform_spacing_flag;
+ memcpy(picture->pps->column_width_minus1, picture_info->column_width_minus1, 20 * 2);
+ memcpy(picture->pps->row_height_minus1, picture_info->row_height_minus1, 22 * 2);
+ picture->pps->loop_filter_across_tiles_enabled_flag = picture_info->loop_filter_across_tiles_enabled_flag;
+ picture->pps->pps_loop_filter_across_slices_enabled_flag = picture_info->pps_loop_filter_across_slices_enabled_flag;
+ picture->pps->deblocking_filter_control_present_flag = picture_info->deblocking_filter_control_present_flag;
+ picture->pps->deblocking_filter_override_enabled_flag = picture_info->deblocking_filter_override_enabled_flag;
+ picture->pps->pps_deblocking_filter_disabled_flag = picture_info->pps_deblocking_filter_disabled_flag;
+ picture->pps->pps_beta_offset_div2 = picture_info->pps_beta_offset_div2;
+ picture->pps->pps_tc_offset_div2 = picture_info->pps_tc_offset_div2;
+ picture->pps->lists_modification_present_flag = picture_info->lists_modification_present_flag;
+ picture->pps->log2_parallel_merge_level_minus2 = picture_info->log2_parallel_merge_level_minus2;
+ picture->pps->slice_segment_header_extension_present_flag = picture_info->slice_segment_header_extension_present_flag;
+
+ picture->IDRPicFlag = picture_info->IDRPicFlag;
+ picture->RAPPicFlag = picture_info->RAPPicFlag;
+ picture->CurrRpsIdx = picture_info->CurrRpsIdx;
+ picture->NumPocTotalCurr = picture_info->NumPocTotalCurr;
+ picture->NumDeltaPocsOfRefRpsIdx = picture_info->NumDeltaPocsOfRefRpsIdx;
+ picture->NumShortTermPictureSliceHeaderBits = picture_info->NumShortTermPictureSliceHeaderBits;
+ picture->NumLongTermPictureSliceHeaderBits = picture_info->NumLongTermPictureSliceHeaderBits;
+ picture->CurrPicOrderCntVal = picture_info->CurrPicOrderCntVal;
+
+ for (i = 0; i < 16; ++i) {
+ VdpStatus ret = vlVdpGetReferenceFrame
+ (
+ picture_info->RefPics[i],
+ &picture->ref[i]
+ );
+ if (ret != VDP_STATUS_OK)
+ return ret;
+
+ picture->PicOrderCntVal[i] = picture_info->PicOrderCntVal[i];
+ picture->IsLongTerm[i] = picture_info->IsLongTerm[i];
+ }
+
+ picture->NumPocStCurrBefore = picture_info->NumPocStCurrBefore;
+ picture->NumPocStCurrAfter = picture_info->NumPocStCurrAfter;
+ picture->NumPocLtCurr = picture_info->NumPocLtCurr;
+ memcpy(picture->RefPicSetStCurrBefore, picture_info->RefPicSetStCurrBefore, 8);
+ memcpy(picture->RefPicSetStCurrAfter, picture_info->RefPicSetStCurrAfter, 8);
+ memcpy(picture->RefPicSetLtCurr, picture_info->RefPicSetLtCurr, 8);
+
+ return VDP_STATUS_OK;
+}
+
static void
vlVdpDecoderFixVC1Startcode(uint32_t *num_buffers, const void *buffers[], unsigned sizes[])
{
@@ -461,14 +570,17 @@ vlVdpDecoderRender(VdpDecoder decoder,
struct pipe_video_codec *dec;
bool buffer_support[2];
unsigned i;
- struct pipe_h264_sps sps = {};
- struct pipe_h264_pps pps = { &sps };
+ struct pipe_h264_sps sps_h264 = {};
+ struct pipe_h264_pps pps_h264 = { &sps_h264 };
+ struct pipe_h265_sps sps_h265 = {};
+ struct pipe_h265_pps pps_h265 = { &sps_h265 };
union {
struct pipe_picture_desc base;
struct pipe_mpeg12_picture_desc mpeg12;
struct pipe_mpeg4_picture_desc mpeg4;
struct pipe_vc1_picture_desc vc1;
struct pipe_h264_picture_desc h264;
+ struct pipe_h265_picture_desc h265;
} desc;
if (!(picture_info && bitstream_buffers))
@@ -547,9 +659,13 @@ vlVdpDecoderRender(VdpDecoder decoder,
ret = vlVdpDecoderRenderVC1(&desc.vc1, (VdpPictureInfoVC1 *)picture_info);
break;
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- desc.h264.pps = &pps;
+ desc.h264.pps = &pps_h264;
ret = vlVdpDecoderRenderH264(&desc.h264, (VdpPictureInfoH264 *)picture_info);
break;
+ case PIPE_VIDEO_FORMAT_HEVC:
+ desc.h265.pps = &pps_h265;
+ ret = vlVdpDecoderRenderH265(&desc.h265, (VdpPictureInfoHEVC *)picture_info);
+ break;
default:
return VDP_STATUS_INVALID_DECODER_PROFILE;
}
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index 4118eb86997..c0b1ecc55fa 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -49,7 +49,8 @@ vlVdpVideoMixerCreate(VdpDevice device,
vlVdpVideoMixer *vmixer = NULL;
VdpStatus ret;
struct pipe_screen *screen;
- unsigned max_width, max_height, i;
+ uint32_t max_2d_texture_level;
+ unsigned max_size, i;
vlVdpDevice *dev = vlGetDataHTAB(device);
if (!dev)
@@ -134,18 +135,17 @@ vlVdpVideoMixerCreate(VdpDevice device,
VDPAU_MSG(VDPAU_WARN, "[VDPAU] Max layers > 4 not supported\n", vmixer->max_layers);
goto no_params;
}
- max_width = screen->get_video_param(screen, PIPE_VIDEO_PROFILE_UNKNOWN,
- PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_MAX_WIDTH);
- max_height = screen->get_video_param(screen, PIPE_VIDEO_PROFILE_UNKNOWN,
- PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_MAX_HEIGHT);
- if (vmixer->video_width < 48 ||
- vmixer->video_width > max_width) {
- VDPAU_MSG(VDPAU_WARN, "[VDPAU] 48 < %u < %u not valid for width\n", vmixer->video_width, max_width);
+
+ max_2d_texture_level = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+ max_size = pow(2, max_2d_texture_level-1);
+ if (vmixer->video_width < 48 || vmixer->video_width > max_size) {
+ VDPAU_MSG(VDPAU_WARN, "[VDPAU] 48 < %u < %u not valid for width\n",
+ vmixer->video_width, max_size);
goto no_params;
}
- if (vmixer->video_height < 48 ||
- vmixer->video_height > max_height) {
- VDPAU_MSG(VDPAU_WARN, "[VDPAU] 48 < %u < %u not valid for height\n", vmixer->video_height, max_height);
+ if (vmixer->video_height < 48 || vmixer->video_height > max_size) {
+ VDPAU_MSG(VDPAU_WARN, "[VDPAU] 48 < %u < %u not valid for height\n",
+ vmixer->video_height, max_size);
goto no_params;
}
vmixer->luma_key_min = 0.f;
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 7f8dbed7ee2..e53303708b2 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -369,7 +369,7 @@ vlVdpPresentationQueueQuerySurfaceStatus(VdpPresentationQueue presentation_queue
} else {
pipe_mutex_lock(pq->device->mutex);
screen = pq->device->vscreen->pscreen;
- if (screen->fence_signalled(screen, surf->fence)) {
+ if (screen->fence_finish(screen, surf->fence, 0)) {
screen->fence_reference(screen, &surf->fence, NULL);
*status = VDP_PRESENTATION_QUEUE_STATUS_VISIBLE;
pipe_mutex_unlock(pq->device->mutex);
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index e14ce041947..27ac44cd9c1 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -261,6 +261,16 @@ ProfileToPipe(VdpDecoderProfile vdpau_profile)
return PIPE_VIDEO_PROFILE_VC1_MAIN;
case VDP_DECODER_PROFILE_VC1_ADVANCED:
return PIPE_VIDEO_PROFILE_VC1_ADVANCED;
+ case VDP_DECODER_PROFILE_HEVC_MAIN:
+ return PIPE_VIDEO_PROFILE_HEVC_MAIN;
+ case VDP_DECODER_PROFILE_HEVC_MAIN_10:
+ return PIPE_VIDEO_PROFILE_HEVC_MAIN_10;
+ case VDP_DECODER_PROFILE_HEVC_MAIN_STILL:
+ return PIPE_VIDEO_PROFILE_HEVC_MAIN_STILL;
+ case VDP_DECODER_PROFILE_HEVC_MAIN_12:
+ return PIPE_VIDEO_PROFILE_HEVC_MAIN_12;
+ case VDP_DECODER_PROFILE_HEVC_MAIN_444:
+ return PIPE_VIDEO_PROFILE_HEVC_MAIN_444;
default:
return PIPE_VIDEO_PROFILE_UNKNOWN;
}
@@ -292,6 +302,16 @@ PipeToProfile(enum pipe_video_profile p_profile)
return VDP_DECODER_PROFILE_VC1_MAIN;
case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
return VDP_DECODER_PROFILE_VC1_ADVANCED;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+ return VDP_DECODER_PROFILE_HEVC_MAIN;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
+ return VDP_DECODER_PROFILE_HEVC_MAIN_10;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_STILL:
+ return VDP_DECODER_PROFILE_HEVC_MAIN_STILL;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_12:
+ return VDP_DECODER_PROFILE_HEVC_MAIN_12;
+ case PIPE_VIDEO_PROFILE_HEVC_MAIN_444:
+ return VDP_DECODER_PROFILE_HEVC_MAIN_444;
default:
assert(0);
return -1;
diff --git a/src/gallium/state_trackers/wgl/Makefile.sources b/src/gallium/state_trackers/wgl/Makefile.sources
index 8c463d5f18e..1e00caf97b7 100644
--- a/src/gallium/state_trackers/wgl/Makefile.sources
+++ b/src/gallium/state_trackers/wgl/Makefile.sources
@@ -9,6 +9,7 @@ C_SOURCES := \
stw_framebuffer.c \
stw_getprocaddress.c \
stw_nopfuncs.c \
+ stw_nopfuncs.h \
stw_pixelformat.c \
stw_st.c \
stw_tls.c \
diff --git a/src/gallium/state_trackers/wgl/stw_device.h b/src/gallium/state_trackers/wgl/stw_device.h
index 4a930b5bef8..e35a4b94036 100644
--- a/src/gallium/state_trackers/wgl/stw_device.h
+++ b/src/gallium/state_trackers/wgl/stw_device.h
@@ -80,7 +80,7 @@ struct stw_device
extern struct stw_device *stw_dev;
-static INLINE struct stw_context *
+static inline struct stw_context *
stw_lookup_context_locked( DHGLRC dhglrc )
{
if (dhglrc == 0 || stw_dev == NULL)
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index 2b81b820495..7b34fcbb5ed 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -45,7 +45,7 @@
* Search the framebuffer with the matching HWND while holding the
* stw_dev::fb_mutex global lock.
*/
-static INLINE struct stw_framebuffer *
+static inline struct stw_framebuffer *
stw_framebuffer_from_hwnd_locked(
HWND hwnd )
{
@@ -376,7 +376,7 @@ stw_framebuffer_cleanup(void)
/**
* Given an hdc, return the corresponding stw_framebuffer.
*/
-static INLINE struct stw_framebuffer *
+static inline struct stw_framebuffer *
stw_framebuffer_from_hdc_locked(
HDC hdc )
{
diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c
index 0a9116cbb73..b41171a9195 100644
--- a/src/gallium/state_trackers/wgl/stw_st.c
+++ b/src/gallium/state_trackers/wgl/stw_st.c
@@ -46,7 +46,7 @@ struct stw_st_framebuffer {
unsigned texture_mask;
};
-static INLINE struct stw_st_framebuffer *
+static inline struct stw_st_framebuffer *
stw_st_framebuffer(struct st_framebuffer_iface *stfb)
{
return (struct stw_st_framebuffer *) stfb;
diff --git a/src/gallium/state_trackers/wgl/stw_tls.c b/src/gallium/state_trackers/wgl/stw_tls.c
index ca27a53433c..041066f5007 100644
--- a/src/gallium/state_trackers/wgl/stw_tls.c
+++ b/src/gallium/state_trackers/wgl/stw_tls.c
@@ -50,7 +50,7 @@ static CRITICAL_SECTION g_mutex = {
static struct stw_tls_data *g_pendingTlsData = NULL;
-static INLINE struct stw_tls_data *
+static inline struct stw_tls_data *
stw_tls_data_create(DWORD dwThreadId);
static struct stw_tls_data *
@@ -111,7 +111,7 @@ stw_tls_init(void)
/**
* Install windows hook for a given thread (not necessarily the current one).
*/
-static INLINE struct stw_tls_data *
+static inline struct stw_tls_data *
stw_tls_data_create(DWORD dwThreadId)
{
struct stw_tls_data *data;
diff --git a/src/gallium/state_trackers/xa/xa_composite.c b/src/gallium/state_trackers/xa/xa_composite.c
index c283a0d1892..7cfd1e136d1 100644
--- a/src/gallium/state_trackers/xa/xa_composite.c
+++ b/src/gallium/state_trackers/xa/xa_composite.c
@@ -167,7 +167,7 @@ blend_for_op(struct xa_composite_blend *blend,
}
-static INLINE int
+static inline int
xa_repeat_to_gallium(int mode)
{
switch(mode) {
@@ -185,7 +185,7 @@ xa_repeat_to_gallium(int mode)
return PIPE_TEX_WRAP_REPEAT;
}
-static INLINE boolean
+static inline boolean
xa_filter_to_gallium(int xrender_filter, int *out_filter)
{
diff --git a/src/gallium/state_trackers/xa/xa_context.c b/src/gallium/state_trackers/xa/xa_context.c
index fd49c82a559..ebfb290af13 100644
--- a/src/gallium/state_trackers/xa/xa_context.c
+++ b/src/gallium/state_trackers/xa/xa_context.c
@@ -37,7 +37,11 @@
XA_EXPORT void
xa_context_flush(struct xa_context *ctx)
{
- ctx->pipe->flush(ctx->pipe, &ctx->last_fence, 0);
+ if (ctx->last_fence) {
+ struct pipe_screen *screen = ctx->xa->screen;
+ screen->fence_reference(screen, &ctx->last_fence, NULL);
+ }
+ ctx->pipe->flush(ctx->pipe, &ctx->last_fence, 0);
}
XA_EXPORT struct xa_context *
diff --git a/src/gallium/state_trackers/xa/xa_priv.h b/src/gallium/state_trackers/xa/xa_priv.h
index f71c06c6c19..13a0e86f66d 100644
--- a/src/gallium/state_trackers/xa/xa_priv.h
+++ b/src/gallium/state_trackers/xa/xa_priv.h
@@ -123,7 +123,7 @@ struct xa_context {
const struct xa_composite *comp;
};
-static INLINE void
+static inline void
xa_scissor_reset(struct xa_context *ctx)
{
ctx->scissor.maxx = 0;
@@ -133,7 +133,7 @@ xa_scissor_reset(struct xa_context *ctx)
ctx->scissor_valid = FALSE;
}
-static INLINE void
+static inline void
xa_scissor_update(struct xa_context *ctx, unsigned minx, unsigned miny,
unsigned maxx, unsigned maxy)
{
@@ -189,13 +189,13 @@ struct xa_shaders;
* Inline utilities
*/
-static INLINE int
+static inline int
xa_min(int a, int b)
{
return ((a <= b) ? a : b);
}
-static INLINE void
+static inline void
xa_pixel_to_float4(uint32_t pixel, float *color)
{
uint32_t r, g, b, a;
@@ -210,7 +210,7 @@ xa_pixel_to_float4(uint32_t pixel, float *color)
color[3] = ((float)a) / 255.;
}
-static INLINE void
+static inline void
xa_pixel_to_float4_a8(uint32_t pixel, float *color)
{
uint32_t a;
diff --git a/src/gallium/state_trackers/xa/xa_renderer.c b/src/gallium/state_trackers/xa/xa_renderer.c
index 7b28afc907f..fda07e5b68e 100644
--- a/src/gallium/state_trackers/xa/xa_renderer.c
+++ b/src/gallium/state_trackers/xa/xa_renderer.c
@@ -45,14 +45,14 @@ void
renderer_set_constants(struct xa_context *r,
int shader_type, const float *params, int param_bytes);
-static INLINE boolean
+static inline boolean
is_affine(float *matrix)
{
return floatIsZero(matrix[2]) && floatIsZero(matrix[5])
&& floatsEqual(matrix[8], 1);
}
-static INLINE void
+static inline void
map_point(float *mat, float x, float y, float *out_x, float *out_y)
{
if (!mat) {
@@ -71,7 +71,7 @@ map_point(float *mat, float x, float y, float *out_x, float *out_y)
}
}
-static INLINE void
+static inline void
renderer_draw(struct xa_context *r)
{
int num_verts = r->buffer_size / (r->attrs_per_vertex * NUM_COMPONENTS);
@@ -97,7 +97,7 @@ renderer_draw(struct xa_context *r)
xa_scissor_reset(r);
}
-static INLINE void
+static inline void
renderer_draw_conditional(struct xa_context *r, int next_batch)
{
if (r->buffer_size + next_batch >= XA_VB_SIZE ||
@@ -135,7 +135,7 @@ renderer_init_state(struct xa_context *r)
}
}
-static INLINE void
+static inline void
add_vertex_color(struct xa_context *r, float x, float y, float color[4])
{
float *vertex = r->buffer + r->buffer_size;
@@ -153,7 +153,7 @@ add_vertex_color(struct xa_context *r, float x, float y, float color[4])
r->buffer_size += 8;
}
-static INLINE void
+static inline void
add_vertex_1tex(struct xa_context *r, float x, float y, float s, float t)
{
float *vertex = r->buffer + r->buffer_size;
@@ -171,7 +171,7 @@ add_vertex_1tex(struct xa_context *r, float x, float y, float s, float t)
r->buffer_size += 8;
}
-static INLINE void
+static inline void
add_vertex_2tex(struct xa_context *r,
float x, float y, float s0, float t0, float s1, float t1)
{
diff --git a/src/gallium/state_trackers/xa/xa_tgsi.c b/src/gallium/state_trackers/xa/xa_tgsi.c
index c7454c9d6ac..5d8b8079c4b 100644
--- a/src/gallium/state_trackers/xa/xa_tgsi.c
+++ b/src/gallium/state_trackers/xa/xa_tgsi.c
@@ -106,7 +106,7 @@ struct xa_shaders {
struct cso_hash *fs_hash;
};
-static INLINE void
+static inline void
src_in_mask(struct ureg_program *ureg,
struct ureg_dst dst,
struct ureg_src src,
@@ -368,7 +368,7 @@ create_yuv_shader(struct pipe_context *pipe, struct ureg_program *ureg)
return ureg_create_shader_and_destroy(ureg, pipe);
}
-static INLINE void
+static inline void
xrender_tex(struct ureg_program *ureg,
struct ureg_dst dst,
struct ureg_src coords,
@@ -617,7 +617,7 @@ xa_shaders_destroy(struct xa_shaders *sc)
FREE(sc);
}
-static INLINE void *
+static inline void *
shader_from_cache(struct pipe_context *pipe,
unsigned type, struct cso_hash *hash, unsigned key)
{
diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c
index f69ac8edf27..21ca57ca633 100644
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -153,7 +153,7 @@ xa_tracker_create(int drm_fd)
loader_fd = dup(drm_fd);
if (loader_fd == -1)
return NULL;
- if (pipe_loader_drm_probe_fd(&xa->dev, loader_fd, false))
+ if (pipe_loader_drm_probe_fd(&xa->dev, loader_fd))
xa->screen = pipe_loader_create_screen(xa->dev, PIPE_SEARCH_DIR);
#endif
if (!xa->screen)
@@ -461,7 +461,7 @@ xa_surface_redefine(struct xa_surface *srf,
xa_min(save_height, template->height0), &src_box);
pipe->resource_copy_region(pipe, texture,
0, 0, 0, 0, srf->tex, 0, &src_box);
- pipe->flush(pipe, &xa->default_ctx->last_fence, 0);
+ xa_context_flush(xa->default_ctx);
}
pipe_resource_reference(&srf->tex, texture);
diff --git a/src/gallium/state_trackers/xa/xa_yuv.c b/src/gallium/state_trackers/xa/xa_yuv.c
index 15196392ac7..97a1833ff15 100644
--- a/src/gallium/state_trackers/xa/xa_yuv.c
+++ b/src/gallium/state_trackers/xa/xa_yuv.c
@@ -154,7 +154,7 @@ xa_yuv_planar_blit(struct xa_context *r,
box++;
}
- r->pipe->flush(r->pipe, &r->last_fence, 0);
+ xa_context_flush(r);
xa_ctx_sampler_views_destroy(r);
xa_ctx_srf_destroy(r);
diff --git a/src/gallium/state_trackers/xvmc/Makefile.am b/src/gallium/state_trackers/xvmc/Makefile.am
index 047d05b3719..3c7c35c8c37 100644
--- a/src/gallium/state_trackers/xvmc/Makefile.am
+++ b/src/gallium/state_trackers/xvmc/Makefile.am
@@ -20,7 +20,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
diff --git a/src/gallium/state_trackers/xvmc/surface.c b/src/gallium/state_trackers/xvmc/surface.c
index f32e85bf489..15eae59ff6e 100644
--- a/src/gallium/state_trackers/xvmc/surface.c
+++ b/src/gallium/state_trackers/xvmc/surface.c
@@ -489,7 +489,7 @@ Status XvMCGetSurfaceStatus(Display *dpy, XvMCSurface *surface, int *status)
*status = 0;
if (surface_priv->fence)
- if (!pipe->screen->fence_signalled(pipe->screen, surface_priv->fence))
+ if (!pipe->screen->fence_finish(pipe->screen, surface_priv->fence, 0))
*status |= XVMC_RENDERING;
return Success;
diff --git a/src/gallium/state_trackers/xvmc/xvmc_private.h b/src/gallium/state_trackers/xvmc/xvmc_private.h
index 84c7b6cba0b..a1d026f704e 100644
--- a/src/gallium/state_trackers/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xvmc/xvmc_private.h
@@ -106,7 +106,7 @@ typedef struct
#define XVMC_WARN 2
#define XVMC_TRACE 3
-static INLINE void XVMC_MSG(int level, const char *fmt, ...)
+static inline void XVMC_MSG(int level, const char *fmt, ...)
{
static int debug_level = -1;
diff --git a/src/gallium/targets/d3dadapter9/Makefile.am b/src/gallium/targets/d3dadapter9/Makefile.am
index 591978f1f61..fe5b0b11679 100644
--- a/src/gallium/targets/d3dadapter9/Makefile.am
+++ b/src/gallium/targets/d3dadapter9/Makefile.am
@@ -118,8 +118,7 @@ else # HAVE_GALLIUM_STATIC_TARGETS
d3dadapter9_la_LIBADD += \
$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(GALLIUM_PIPE_LOADER_LIBS)
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index 6342ab801a9..680f5164e60 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -101,7 +101,7 @@ drm_destroy( struct d3dadapter9_context *ctx )
/* read a DWORD in the form 0xnnnnnnnn, which is how sysfs pci id stuff is
* formatted. */
-static INLINE DWORD
+static inline DWORD
read_file_dword( const char *name )
{
char buf[32];
@@ -123,7 +123,7 @@ read_file_dword( const char *name )
* dword at an offset in the raw PCI header. The reason this isn't used for all
* data is that the kernel will make corrections but not expose them in the raw
* header bytes. */
-static INLINE DWORD
+static inline DWORD
read_config_dword( int fd,
unsigned offset )
{
@@ -135,7 +135,7 @@ read_config_dword( int fd,
return r;
}
-static INLINE void
+static inline void
get_bus_info( int fd,
DWORD *vendorid,
DWORD *deviceid,
@@ -160,7 +160,7 @@ get_bus_info( int fd,
}
}
-static INLINE void
+static inline void
read_descriptor( struct d3dadapter9_context *ctx,
int fd )
{
@@ -243,7 +243,7 @@ drm_create_adapter( int fd,
ctx->base.hal = dd_create_screen(fd);
#else
/* use pipe-loader to dlopen appropriate drm driver */
- if (!pipe_loader_drm_probe_fd(&ctx->dev, fd, FALSE)) {
+ if (!pipe_loader_drm_probe_fd(&ctx->dev, fd)) {
ERR("Failed to probe drm fd %d.\n", fd);
FREE(ctx);
close(fd);
diff --git a/src/gallium/targets/dri-vdpau.dyn b/src/gallium/targets/dri-vdpau.dyn
index e5923a23b39..a7919f7d3ba 100644
--- a/src/gallium/targets/dri-vdpau.dyn
+++ b/src/gallium/targets/dri-vdpau.dyn
@@ -1,4 +1,5 @@
{
nouveau_drm_screen_create;
radeon_drm_winsys_create;
+ amdgpu_winsys_create;
};
diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk
index 5ba129b7961..7168e1dbfb3 100644
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -35,17 +35,15 @@ endif
LOCAL_SRC_FILES := target.c
-LOCAL_CFLAGS := -DDRI_TARGET -DHAVE_LIBDRM
+LOCAL_CFLAGS := -DDRI_TARGET
LOCAL_SHARED_LIBRARIES := \
libdl \
libglapi \
libexpat \
-# swrast only?
-ifeq ($(MESA_GPU_DRIVERS),swrast)
-LOCAL_CFLAGS += -D__NOT_HAVE_DRM_H
-else
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DHAVE_LIBDRM
LOCAL_SHARED_LIBRARIES += libdrm
endif
@@ -87,7 +85,7 @@ gallium_DRIVERS += libmesa_winsys_radeon libmesa_pipe_radeon
LOCAL_SHARED_LIBRARIES += libdrm_radeon
endif
ifneq ($(filter swrast,$(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_dri libmesa_winsys_sw_kms_dri
+gallium_DRIVERS += libmesa_pipe_softpipe libmesa_winsys_sw_dri
LOCAL_CFLAGS += -DGALLIUM_SOFTPIPE
endif
ifneq ($(filter vc4,$(MESA_GPU_DRIVERS)),)
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index 96483964589..7c86ea13652 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -95,8 +95,7 @@ else # HAVE_GALLIUM_STATIC_TARGETS
gallium_dri_la_LIBADD += \
$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(GALLIUM_PIPE_LOADER_LIBS)
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/dri/SConscript b/src/gallium/targets/dri/SConscript
index a51ed564344..2fb0da09200 100644
--- a/src/gallium/targets/dri/SConscript
+++ b/src/gallium/targets/dri/SConscript
@@ -25,11 +25,12 @@ if env['llvm']:
env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE')
env.Prepend(LIBS = [llvmpipe])
+env.PkgUseModules('DRM')
+
env.Append(CPPDEFINES = [
'GALLIUM_VMWGFX',
'GALLIUM_SOFTPIPE',
'DRI_TARGET',
- 'HAVE_LIBDRM',
])
env.Prepend(LIBS = [
@@ -37,7 +38,6 @@ env.Prepend(LIBS = [
svgadrm,
svga,
ws_dri,
- ws_kms_dri,
softpipe,
libloader,
mesautil,
@@ -58,9 +58,6 @@ module = env.LoadableModule(
env.Command('vmwgfx_dri.so', 'gallium_dri.so', "ln -f ${SOURCE} ${TARGET}")
# swrast_dri.so
env.Command('swrast_dri.so', 'gallium_dri.so', "ln -f ${SOURCE} ${TARGET}")
-# kms_swrast_dri.so
-env.Command('kms_swrast_dri.so', 'gallium_dri.so', "ln -f ${SOURCE} ${TARGET}")
env.Alias('dri-vmwgfx', module)
env.Alias('dri-swrast', module)
-env.Alias('dri-kms-swrast', module)
diff --git a/src/gallium/targets/dri/dri.sym b/src/gallium/targets/dri/dri.sym
index 49a2cc9fcf2..8e26fb960b7 100644
--- a/src/gallium/targets/dri/dri.sym
+++ b/src/gallium/targets/dri/dri.sym
@@ -4,6 +4,7 @@
__driDriverGetExtensions*;
nouveau_drm_screen_create;
radeon_drm_winsys_create;
+ amdgpu_winsys_create;
local:
*;
};
diff --git a/src/gallium/targets/omx/Makefile.am b/src/gallium/targets/omx/Makefile.am
index f52e66946ed..a4dff487dd8 100644
--- a/src/gallium/targets/omx/Makefile.am
+++ b/src/gallium/targets/omx/Makefile.am
@@ -57,8 +57,7 @@ else # HAVE_GALLIUM_STATIC_TARGETS
libomx_mesa_la_LIBADD += \
$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(GALLIUM_PIPE_LOADER_LIBS)
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am
index 5daf327fb47..4ab706ef2ac 100644
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
include $(top_srcdir)/src/gallium/Automake.inc
lib_LTLIBRARIES = lib@OPENCL_LIBNAME@.la
@@ -7,7 +5,7 @@ lib_LTLIBRARIES = lib@OPENCL_LIBNAME@.la
lib@OPENCL_LIBNAME@_la_LDFLAGS = \
$(LLVM_LDFLAGS) \
-no-undefined \
- -version-number 1:0 \
+ -version-number @OPENCL_VERSION@:0 \
$(GC_SECTIONS) \
$(LD_NO_UNDEFINED)
@@ -17,12 +15,11 @@ lib@OPENCL_LIBNAME@_la_LDFLAGS += \
endif
lib@OPENCL_LIBNAME@_la_LIBADD = \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_client.la \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
$(top_builddir)/src/gallium/state_trackers/clover/libclover.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(GALLIUM_PIPE_LOADER_CLIENT_LIBS) \
$(ELF_LIB) \
-ldl \
-lclangCodeGen \
diff --git a/src/gallium/targets/opencl/mesa.icd b/src/gallium/targets/opencl/mesa.icd
deleted file mode 100644
index 6a6a8706d7c..00000000000
--- a/src/gallium/targets/opencl/mesa.icd
+++ /dev/null
@@ -1 +0,0 @@
-libMesaOpenCL.so
diff --git a/src/gallium/targets/opencl/mesa.icd.in b/src/gallium/targets/opencl/mesa.icd.in
new file mode 100644
index 00000000000..1b77b4e4929
--- /dev/null
+++ b/src/gallium/targets/opencl/mesa.icd.in
@@ -0,0 +1 @@
+lib@OPENCL_LIBNAME@.so.@OPENCL_VERSION@
diff --git a/src/gallium/targets/pipe-loader/Makefile.am b/src/gallium/targets/pipe-loader/Makefile.am
index e4048b58605..4d9f7be2ec9 100644
--- a/src/gallium/targets/pipe-loader/Makefile.am
+++ b/src/gallium/targets/pipe-loader/Makefile.am
@@ -155,10 +155,12 @@ nodist_EXTRA_pipe_radeonsi_la_SOURCES = dummy.cpp
pipe_radeonsi_la_LIBADD = \
$(PIPE_LIBS) \
$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la \
+ $(top_builddir)/src/gallium/winsys/amdgpu/drm/libamdgpuwinsys.la \
$(top_builddir)/src/gallium/drivers/radeon/libradeon.la \
$(top_builddir)/src/gallium/drivers/radeonsi/libradeonsi.la \
$(LIBDRM_LIBS) \
- $(RADEON_LIBS)
+ $(RADEON_LIBS) \
+ $(AMDGPU_LIBS)
endif
diff --git a/src/gallium/targets/pipe-loader/pipe_radeonsi.c b/src/gallium/targets/pipe-loader/pipe_radeonsi.c
index 5457b5b5e32..31077af6a04 100644
--- a/src/gallium/targets/pipe-loader/pipe_radeonsi.c
+++ b/src/gallium/targets/pipe-loader/pipe_radeonsi.c
@@ -2,6 +2,7 @@
#include "target-helpers/inline_debug_helper.h"
#include "radeon/drm/radeon_drm_public.h"
#include "radeon/radeon_winsys.h"
+#include "amdgpu/drm/amdgpu_public.h"
#include "radeonsi/si_public.h"
static struct pipe_screen *
@@ -9,7 +10,12 @@ create_screen(int fd)
{
struct radeon_winsys *rw;
- rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+ /* First, try amdgpu. */
+ rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
+
+ if (!rw)
+ rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+
return rw ? debug_screen_wrap(rw->screen) : NULL;
}
diff --git a/src/gallium/targets/va/Makefile.am b/src/gallium/targets/va/Makefile.am
index 57c7e353ae9..9613f041b58 100644
--- a/src/gallium/targets/va/Makefile.am
+++ b/src/gallium/targets/va/Makefile.am
@@ -54,8 +54,7 @@ else # HAVE_GALLIUM_STATIC_TARGETS
gallium_drv_video_la_LIBADD += \
$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(GALLIUM_PIPE_LOADER_LIBS)
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/vdpau/Makefile.am b/src/gallium/targets/vdpau/Makefile.am
index 9455fc4cae5..7eb62c1cc78 100644
--- a/src/gallium/targets/vdpau/Makefile.am
+++ b/src/gallium/targets/vdpau/Makefile.am
@@ -66,8 +66,7 @@ else # HAVE_GALLIUM_STATIC_TARGETS
libvdpau_gallium_la_LIBADD += \
$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(GALLIUM_PIPE_LOADER_LIBS)
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/vdpau/vdpau.sym b/src/gallium/targets/vdpau/vdpau.sym
index f184193c055..5e71c6285a6 100644
--- a/src/gallium/targets/vdpau/vdpau.sym
+++ b/src/gallium/targets/vdpau/vdpau.sym
@@ -3,6 +3,7 @@
vdp_imp_device_create_x11;
nouveau_drm_screen_create;
radeon_drm_winsys_create;
+ amdgpu_winsys_create;
local:
*;
};
diff --git a/src/gallium/targets/xa/Makefile.am b/src/gallium/targets/xa/Makefile.am
index 8ddb9672bd7..92173dedce3 100644
--- a/src/gallium/targets/xa/Makefile.am
+++ b/src/gallium/targets/xa/Makefile.am
@@ -81,8 +81,7 @@ else # HAVE_GALLIUM_STATIC_TARGETS
libxatracker_la_LIBADD += \
$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(GALLIUM_PIPE_LOADER_LIBS)
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/targets/xvmc/Makefile.am b/src/gallium/targets/xvmc/Makefile.am
index 3c16c8d51eb..b3285890822 100644
--- a/src/gallium/targets/xvmc/Makefile.am
+++ b/src/gallium/targets/xvmc/Makefile.am
@@ -52,11 +52,9 @@ libXvMCgallium_la_LIBADD += $(TARGET_LIB_DEPS) \
$(TARGET_RADEON_WINSYS) $(TARGET_RADEON_COMMON)
else # HAVE_GALLIUM_STATIC_TARGETS
-# XXX: Use the pipe-loader-client over pipe-loader ?
libXvMCgallium_la_LIBADD += \
$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
- $(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(GALLIUM_PIPE_LOADER_LIBS)
+ $(GALLIUM_PIPE_LOADER_WINSYS_LIBS)
endif # HAVE_GALLIUM_STATIC_TARGETS
diff --git a/src/gallium/tests/graw/graw_util.h b/src/gallium/tests/graw/graw_util.h
index afcc584863e..e7cd0aa3ac3 100644
--- a/src/gallium/tests/graw/graw_util.h
+++ b/src/gallium/tests/graw/graw_util.h
@@ -26,7 +26,7 @@ struct graw_info
-static INLINE boolean
+static inline boolean
graw_util_create_window(struct graw_info *info,
int width, int height,
int num_cbufs, bool zstencil_buf)
@@ -144,7 +144,7 @@ graw_util_create_window(struct graw_info *info,
}
-static INLINE void
+static inline void
graw_util_default_state(struct graw_info *info, boolean depth_test)
{
{
@@ -181,7 +181,7 @@ graw_util_default_state(struct graw_info *info, boolean depth_test)
}
-static INLINE void
+static inline void
graw_util_viewport(struct graw_info *info,
float x, float y,
float width, float height,
@@ -205,7 +205,7 @@ graw_util_viewport(struct graw_info *info,
}
-static INLINE void
+static inline void
graw_util_flush_front(const struct graw_info *info)
{
info->screen->flush_frontbuffer(info->screen, info->color_buf[0],
@@ -213,7 +213,7 @@ graw_util_flush_front(const struct graw_info *info)
}
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
graw_util_create_tex2d(const struct graw_info *info,
int width, int height, enum pipe_format format,
const void *data)
@@ -278,7 +278,7 @@ graw_util_create_tex2d(const struct graw_info *info,
}
-static INLINE void *
+static inline void *
graw_util_create_simple_sampler(const struct graw_info *info,
unsigned wrap_mode,
unsigned img_filter)
@@ -304,7 +304,7 @@ graw_util_create_simple_sampler(const struct graw_info *info,
}
-static INLINE struct pipe_sampler_view *
+static inline struct pipe_sampler_view *
graw_util_create_simple_sampler_view(const struct graw_info *info,
struct pipe_resource *texture)
{
diff --git a/src/gallium/tests/trivial/Makefile.am b/src/gallium/tests/trivial/Makefile.am
index fcd240e85bb..56b7f3ffc66 100644
--- a/src/gallium/tests/trivial/Makefile.am
+++ b/src/gallium/tests/trivial/Makefile.am
@@ -12,11 +12,10 @@ AM_CPPFLAGS = \
$(GALLIUM_PIPE_LOADER_DEFINES)
LDADD = \
- $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_client.la \
+ $(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
- $(GALLIUM_PIPE_LOADER_CLIENT_LIBS) \
$(GALLIUM_COMMON_LIB_DEPS)
noinst_PROGRAMS = compute tri quad-tex
diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c
index daae577ec4b..c019c7bb0a3 100644
--- a/src/gallium/tests/trivial/quad-tex.c
+++ b/src/gallium/tests/trivial/quad-tex.c
@@ -297,6 +297,8 @@ static void close_prog(struct program *p)
static void draw(struct program *p)
{
+ const struct pipe_sampler_state *samplers[] = {&p->sampler};
+
/* set the render target */
cso_set_framebuffer(p->cso, &p->framebuffer);
@@ -310,8 +312,7 @@ static void draw(struct program *p)
cso_set_viewport(p->cso, &p->viewport);
/* sampler */
- cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler);
- cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+ cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
/* texture sampler view */
cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->view);
diff --git a/src/gallium/winsys/sw/kms-dri/Android.mk b/src/gallium/winsys/amdgpu/drm/Android.mk
similarity index 87%
rename from src/gallium/winsys/sw/kms-dri/Android.mk
rename to src/gallium/winsys/amdgpu/drm/Android.mk
index b065242aaf3..7d507aa79c6 100644
--- a/src/gallium/winsys/sw/kms-dri/Android.mk
+++ b/src/gallium/winsys/amdgpu/drm/Android.mk
@@ -1,7 +1,7 @@
# Mesa 3-D graphics library
#
-# Copyright (C) 2015 Chih-Wei Huang
-# Copyright (C) 2015 Android-x86 Open Source Project
+# Copyright (C) 2011 Chia-I Wu
+# Copyright (C) 2011 LunarG Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
@@ -23,15 +23,15 @@
LOCAL_PATH := $(call my-dir)
+# get C_SOURCES
include $(LOCAL_PATH)/Makefile.sources
include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(C_SOURCES)
-LOCAL_MODULE := libmesa_winsys_sw_kms_dri
-
-LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_amdgpu
+LOCAL_MODULE := libmesa_winsys_amdgpu
include $(GALLIUM_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)
diff --git a/src/gallium/winsys/amdgpu/drm/Makefile.am b/src/gallium/winsys/amdgpu/drm/Makefile.am
new file mode 100644
index 00000000000..a719913b157
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/Makefile.am
@@ -0,0 +1,17 @@
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+ $(GALLIUM_WINSYS_CFLAGS) \
+ $(AMDGPU_CFLAGS) \
+ -I$(srcdir)/addrlib \
+ -I$(srcdir)/addrlib/core \
+ -I$(srcdir)/addrlib/inc/chip/r800 \
+ -I$(srcdir)/addrlib/r800/chip \
+ -DBRAHMA_BUILD=1
+
+AM_CXXFLAGS = $(AM_CFLAGS)
+
+noinst_LTLIBRARIES = libamdgpuwinsys.la
+
+libamdgpuwinsys_la_SOURCES = $(C_SOURCES)
diff --git a/src/gallium/winsys/amdgpu/drm/Makefile.sources b/src/gallium/winsys/amdgpu/drm/Makefile.sources
new file mode 100644
index 00000000000..6b33841b204
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/Makefile.sources
@@ -0,0 +1,31 @@
+C_SOURCES := \
+ addrlib/addrinterface.cpp \
+ addrlib/addrinterface.h \
+ addrlib/addrtypes.h \
+ addrlib/core/addrcommon.h \
+ addrlib/core/addrelemlib.cpp \
+ addrlib/core/addrelemlib.h \
+ addrlib/core/addrlib.cpp \
+ addrlib/core/addrlib.h \
+ addrlib/core/addrobject.cpp \
+ addrlib/core/addrobject.h \
+ addrlib/inc/chip/r800/si_gb_reg.h \
+ addrlib/inc/lnx_common_defs.h \
+ addrlib/r800/chip/si_ci_merged_enum.h \
+ addrlib/r800/chip/si_ci_vi_merged_enum.h \
+ addrlib/r800/chip/si_enum.h \
+ addrlib/r800/ciaddrlib.cpp \
+ addrlib/r800/ciaddrlib.h \
+ addrlib/r800/egbaddrlib.cpp \
+ addrlib/r800/egbaddrlib.h \
+ addrlib/r800/siaddrlib.cpp \
+ addrlib/r800/siaddrlib.h \
+ amdgpu_bo.c \
+ amdgpu_bo.h \
+ amdgpu_cs.c \
+ amdgpu_cs.h \
+ amdgpu_id.h \
+ amdgpu_public.h \
+ amdgpu_surface.c \
+ amdgpu_winsys.c \
+ amdgpu_winsys.h
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.cpp b/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.cpp
new file mode 100644
index 00000000000..65569278b1e
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.cpp
@@ -0,0 +1,1008 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrinterface.cpp
+* @brief Contains the addrlib interface functions
+***************************************************************************************************
+*/
+#include "addrinterface.h"
+#include "addrlib.h"
+
+#include "addrcommon.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Create/Destroy/Config functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrCreate
+*
+* @brief
+* Create address lib object
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCreate(
+ const ADDR_CREATE_INPUT* pAddrCreateIn, ///< [in] infomation for creating address lib object
+ ADDR_CREATE_OUTPUT* pAddrCreateOut) ///< [out] address lib handle
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ returnCode = AddrLib::Create(pAddrCreateIn, pAddrCreateOut);
+
+ return returnCode;
+}
+
+
+
+/**
+***************************************************************************************************
+* AddrDestroy
+*
+* @brief
+* Destroy address lib object
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrDestroy(
+ ADDR_HANDLE hLib) ///< [in] address lib handle
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (hLib)
+ {
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+ pLib->Destroy();
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Surface functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrComputeSurfaceInfo
+*
+* @brief
+* Calculate surface width/height/depth/alignments and suitable tiling mode
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] surface information
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) ///< [out] surface parameters and alignments
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeSurfaceInfo(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+
+
+/**
+***************************************************************************************************
+* AddrComputeSurfaceAddrFromCoord
+*
+* @brief
+* Compute surface address according to coordinates
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] surface info and coordinates
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] surface address
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeSurfaceAddrFromCoord(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputeSurfaceCoordFromAddr
+*
+* @brief
+* Compute coordinates according to surface address
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] surface info and address
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) ///< [out] coordinates
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeSurfaceCoordFromAddr(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// HTile functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrComputeHtileInfo
+*
+* @brief
+* Compute Htile pitch, height, base alignment and size in bytes
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] Htile information
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) ///< [out] Htile pitch, height and size in bytes
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeHtileInfo(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputeHtileAddrFromCoord
+*
+* @brief
+* Compute Htile address according to coordinates (of depth buffer)
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] Htile info and coordinates
+ ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Htile address
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeHtileAddrFromCoord(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputeHtileCoordFromAddr
+*
+* @brief
+* Compute coordinates within depth buffer (1st pixel of a micro tile) according to
+* Htile address
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] Htile info and address
+ ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] Htile coordinates
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeHtileCoordFromAddr(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// C-mask functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrComputeCmaskInfo
+*
+* @brief
+* Compute Cmask pitch, height, base alignment and size in bytes from color buffer
+* info
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] Cmask pitch and height
+ ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) ///< [out] Cmask pitch, height and size in bytes
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeCmaskInfo(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputeCmaskAddrFromCoord
+*
+* @brief
+* Compute Cmask address according to coordinates (of MSAA color buffer)
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Cmask info and coordinates
+ ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Cmask address
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeCmaskAddrFromCoord(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputeCmaskCoordFromAddr
+*
+* @brief
+* Compute coordinates within color buffer (1st pixel of a micro tile) according to
+* Cmask address
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Cmask info and address
+ ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Cmask coordinates
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeCmaskCoordFromAddr(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// F-mask functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrComputeFmaskInfo
+*
+* @brief
+* Compute Fmask pitch/height/depth/alignments and size in bytes
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] Fmask information
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) ///< [out] Fmask pitch and height
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeFmaskInfo(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputeFmaskAddrFromCoord
+*
+* @brief
+* Compute Fmask address according to coordinates (x,y,slice,sample,plane)
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] Fmask info and coordinates
+ ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] Fmask address
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeFmaskAddrFromCoord(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputeFmaskCoordFromAddr
+*
+* @brief
+* Compute coordinates (x,y,slice,sample,plane) according to Fmask address
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] Fmask info and address
+ ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) ///< [out] Fmask coordinates
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeFmaskCoordFromAddr(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// DCC key functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrComputeDccInfo
+*
+* @brief
+* Compute DCC key size, base alignment based on color surface size, tile info or tile index
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
+ ADDR_HANDLE hLib, ///< [in] handle of addrlib
+ const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input
+ ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) ///< [out] output
+{
+ ADDR_E_RETURNCODE returnCode;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeDccInfo(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Below functions are element related or helper functions
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrGetVersion
+*
+* @brief
+* Get AddrLib version number. Client may check this return value against ADDRLIB_VERSION
+* defined in addrinterface.h to see if there is a mismatch.
+***************************************************************************************************
+*/
+UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib)
+{
+ UINT_32 version = 0;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_ASSERT(pLib != NULL);
+
+ if (pLib)
+ {
+ version = pLib->GetVersion();
+ }
+
+ return version;
+}
+
+/**
+***************************************************************************************************
+* AddrUseTileIndex
+*
+* @brief
+* Return TRUE if tileIndex is enabled in this address library
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib)
+{
+ BOOL_32 useTileIndex = FALSE;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_ASSERT(pLib != NULL);
+
+ if (pLib)
+ {
+ useTileIndex = pLib->UseTileIndex(0);
+ }
+
+ return useTileIndex;
+}
+
+/**
+***************************************************************************************************
+* AddrUseCombinedSwizzle
+*
+* @brief
+* Return TRUE if combined swizzle is enabled in this address library
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib)
+{
+ BOOL_32 useCombinedSwizzle = FALSE;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_ASSERT(pLib != NULL);
+
+ if (pLib)
+ {
+ useCombinedSwizzle = pLib->UseCombinedSwizzle();
+ }
+
+ return useCombinedSwizzle;
+}
+
+/**
+***************************************************************************************************
+* AddrExtractBankPipeSwizzle
+*
+* @brief
+* Extract Bank and Pipe swizzle from base256b
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
+ ADDR_HANDLE hLib, ///< [in] addrlib handle
+ const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure
+ ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) ///< [out] output structure
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ExtractBankPipeSwizzle(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrCombineBankPipeSwizzle
+*
+* @brief
+* Combine Bank and Pipe swizzle
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
+ ADDR_HANDLE hLib,
+ const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn,
+ ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut)
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->CombineBankPipeSwizzle(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputeSliceSwizzle
+*
+* @brief
+* Compute a swizzle for slice from a base swizzle
+* @return
+* ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut)
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeSliceTileSwizzle(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputeBaseSwizzle
+*
+* @brief
+* Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
+* @return
+* ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut)
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputeBaseSwizzle(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* ElemFlt32ToDepthPixel
+*
+* @brief
+* Convert a FLT_32 value to a depth/stencil pixel value
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
+ ADDR_HANDLE hLib, ///< [in] addrlib handle
+ const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn, ///< [in] per-component value
+ ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) ///< [out] final pixel value
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ if (pLib != NULL)
+ {
+ pLib->Flt32ToDepthPixel(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* ElemFlt32ToColorPixel
+*
+* @brief
+* Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
+ ADDR_HANDLE hLib, ///< [in] addrlib handle
+ const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn, ///< [in] format, surface number and swap value
+ ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) ///< [out] final pixel value
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ if (pLib != NULL)
+ {
+ pLib->Flt32ToColorPixel(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* ElemGetExportNorm
+*
+* @brief
+* Helper function to check one format can be EXPORT_NUM,
+* which is a register CB_COLOR_INFO.SURFACE_FORMAT.
+* FP16 can be reported as EXPORT_NORM for rv770 in r600
+* family
+*
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API ElemGetExportNorm(
+ ADDR_HANDLE hLib, ///< [in] addrlib handle
+ const ELEM_GETEXPORTNORM_INPUT* pIn) ///< [in] input structure
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+ BOOL_32 enabled = FALSE;
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ enabled = pLib->GetExportNorm(pIn);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ ADDR_ASSERT(returnCode == ADDR_OK);
+
+ return enabled;
+}
+
+/**
+***************************************************************************************************
+* AddrConvertTileInfoToHW
+*
+* @brief
+* Convert tile info from real value to hardware register value
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] tile info with real value
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) ///< [out] tile info with HW register value
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ConvertTileInfoToHW(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrConvertTileIndex
+*
+* @brief
+* Convert tile index to tile mode/type/info
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input - tile index
+ ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) ///< [out] tile mode/type/info
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ConvertTileIndex(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrConvertTileIndex1
+*
+* @brief
+* Convert tile index to tile mode/type/info
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
+ ADDR_HANDLE hLib, ///< [in] address lib handle
+ const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, ///< [in] input - tile index
+ ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) ///< [out] tile mode/type/info
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ConvertTileIndex1(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrGetTileIndex
+*
+* @brief
+* Get tile index from tile mode/type/info
+*
+* @return
+* ADDR_OK if successful, otherwise an error code of ADDR_E_RETURNCODE
+*
+* @note
+* Only meaningful for SI (and above)
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
+ ADDR_HANDLE hLib,
+ const ADDR_GET_TILEINDEX_INPUT* pIn,
+ ADDR_GET_TILEINDEX_OUTPUT* pOut)
+{
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->GetTileIndex(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrComputePrtInfo
+*
+* @brief
+* Interface function for ComputePrtInfo
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
+ ADDR_HANDLE hLib,
+ const ADDR_PRT_INFO_INPUT* pIn,
+ ADDR_PRT_INFO_OUTPUT* pOut)
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ AddrLib* pLib = AddrLib::GetAddrLib(hLib);
+
+ if (pLib != NULL)
+ {
+ returnCode = pLib->ComputePrtInfo(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.h b/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.h
new file mode 100644
index 00000000000..03fbf2bd0ee
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/addrinterface.h
@@ -0,0 +1,2166 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrinterface.h
+* @brief Contains the addrlib interfaces declaration and parameter defines
+***************************************************************************************************
+*/
+#ifndef __ADDR_INTERFACE_H__
+#define __ADDR_INTERFACE_H__
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+#include "addrtypes.h"
+
+#define ADDRLIB_VERSION_MAJOR 5
+#define ADDRLIB_VERSION_MINOR 25
+#define ADDRLIB_VERSION ((ADDRLIB_VERSION_MAJOR << 16) | ADDRLIB_VERSION_MINOR)
+
+/// Virtually all interface functions need ADDR_HANDLE as first parameter
+typedef VOID* ADDR_HANDLE;
+
+/// Client handle used in callbacks
+typedef VOID* ADDR_CLIENT_HANDLE;
+
+/**
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* // Callback functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
+* const ADDR_ALLOCSYSMEM_INPUT* pInput);
+* typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
+* VOID* pVirtAddr);
+* typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
+* const ADDR_DEBUGPRINT_INPUT* pInput);
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* // Create/Destroy/Config functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* AddrCreate()
+* AddrDestroy()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* // Surface functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* AddrComputeSurfaceInfo()
+* AddrComputeSurfaceAddrFromCoord()
+* AddrComputeSurfaceCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* // HTile functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* AddrComputeHtileInfo()
+* AddrComputeHtileAddrFromCoord()
+* AddrComputeHtileCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* // C-mask functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* AddrComputeCmaskInfo()
+* AddrComputeCmaskAddrFromCoord()
+* AddrComputeCmaskCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* // F-mask functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* AddrComputeFmaskInfo()
+* AddrComputeFmaskAddrFromCoord()
+* AddrComputeFmaskCoordFromAddr()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* // Element/Utility functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* ElemFlt32ToDepthPixel()
+* ElemFlt32ToColorPixel()
+* AddrExtractBankPipeSwizzle()
+* AddrCombineBankPipeSwizzle()
+* AddrComputeSliceSwizzle()
+* AddrConvertTileInfoToHW()
+* AddrConvertTileIndex()
+* AddrConvertTileIndex1()
+* AddrGetTileIndex()
+* AddrComputeBaseSwizzle()
+* AddrUseTileIndex()
+* AddrUseCombinedSwizzle()
+*
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* // Dump functions
+* /////////////////////////////////////////////////////////////////////////////////////////////////
+* AddrDumpSurfaceInfo()
+* AddrDumpFmaskInfo()
+* AddrDumpCmaskInfo()
+* AddrDumpHtileInfo()
+*
+**/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Callback functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* @brief Alloc system memory flags.
+* @note These flags are reserved for future use and if flags are added will minimize the impact
+* of the client.
+***************************************************************************************************
+*/
+typedef union _ADDR_ALLOCSYSMEM_FLAGS
+{
+ struct
+ {
+ UINT_32 reserved : 32; ///< Reserved for future use.
+ } fields;
+ UINT_32 value;
+
+} ADDR_ALLOCSYSMEM_FLAGS;
+
+/**
+***************************************************************************************************
+* @brief Alloc system memory input structure
+***************************************************************************************************
+*/
+typedef struct _ADDR_ALLOCSYSMEM_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ ADDR_ALLOCSYSMEM_FLAGS flags; ///< System memory flags.
+ UINT_32 sizeInBytes; ///< System memory allocation size in bytes.
+ ADDR_CLIENT_HANDLE hClient; ///< Client handle
+} ADDR_ALLOCSYSMEM_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_ALLOCSYSMEM
+* @brief
+* Allocate system memory callback function. Returns valid pointer on success.
+***************************************************************************************************
+*/
+typedef VOID* (ADDR_API* ADDR_ALLOCSYSMEM)(
+ const ADDR_ALLOCSYSMEM_INPUT* pInput);
+
+/**
+***************************************************************************************************
+* @brief Free system memory input structure
+***************************************************************************************************
+*/
+typedef struct _ADDR_FREESYSMEM_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ VOID* pVirtAddr; ///< Virtual address
+ ADDR_CLIENT_HANDLE hClient; ///< Client handle
+} ADDR_FREESYSMEM_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_FREESYSMEM
+* @brief
+* Free system memory callback function.
+* Returns ADDR_OK on success.
+***************************************************************************************************
+*/
+typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_FREESYSMEM)(
+ const ADDR_FREESYSMEM_INPUT* pInput);
+
+/**
+***************************************************************************************************
+* @brief Print debug message input structure
+***************************************************************************************************
+*/
+typedef struct _ADDR_DEBUGPRINT_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ CHAR* pDebugString; ///< Debug print string
+ va_list ap; ///< Variable argument list
+ ADDR_CLIENT_HANDLE hClient; ///< Client handle
+} ADDR_DEBUGPRINT_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_DEBUGPRINT
+* @brief
+* Print debug message callback function.
+* Returns ADDR_OK on success.
+***************************************************************************************************
+*/
+typedef ADDR_E_RETURNCODE (ADDR_API* ADDR_DEBUGPRINT)(
+ const ADDR_DEBUGPRINT_INPUT* pInput);
+
+/**
+***************************************************************************************************
+* ADDR_CALLBACKS
+*
+* @brief
+* Address Library needs client to provide system memory alloc/free routines.
+***************************************************************************************************
+*/
+typedef struct _ADDR_CALLBACKS
+{
+ ADDR_ALLOCSYSMEM allocSysMem; ///< Routine to allocate system memory
+ ADDR_FREESYSMEM freeSysMem; ///< Routine to free system memory
+ ADDR_DEBUGPRINT debugPrint; ///< Routine to print debug message
+} ADDR_CALLBACKS;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Create/Destroy functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* ADDR_CREATE_FLAGS
+*
+* @brief
+* This structure is used to pass some setup in creation of AddrLib
+* @note
+***************************************************************************************************
+*/
+typedef union _ADDR_CREATE_FLAGS
+{
+ struct
+ {
+ UINT_32 noCubeMipSlicesPad : 1; ///< Turn cubemap faces padding off
+ UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and
+ /// output structure
+ UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid
+ UINT_32 useCombinedSwizzle : 1; ///< Use combined tile swizzle
+ UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level
+ UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment
+ UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level
+ UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize
+ UINT_32 reserved : 24; ///< Reserved bits for future use
+ };
+
+ UINT_32 value;
+} ADDR_CREATE_FLAGS;
+
+/**
+***************************************************************************************************
+* ADDR_REGISTER_VALUE
+*
+* @brief
+* Data from registers to setup AddrLib global data, used in AddrCreate
+***************************************************************************************************
+*/
+typedef struct _ADDR_REGISTER_VALUE
+{
+ UINT_32 gbAddrConfig; ///< For R8xx, use GB_ADDR_CONFIG register value.
+ /// For R6xx/R7xx, use GB_TILING_CONFIG.
+ /// But they can be treated as the same.
+ /// if this value is 0, use chip to set default value
+ UINT_32 backendDisables; ///< 1 bit per backend, starting with LSB. 1=disabled,0=enabled.
+ /// Register value of CC_RB_BACKEND_DISABLE.BACKEND_DISABLE
+
+ /// R800 registers-----------------------------------------------
+ UINT_32 noOfBanks; ///< Number of h/w ram banks - For r800: MC_ARB_RAMCFG.NOOFBANK
+ /// No enums for this value in h/w header files
+ /// 0: 4
+ /// 1: 8
+ /// 2: 16
+ UINT_32 noOfRanks; /// MC_ARB_RAMCFG.NOOFRANK
+ /// 0: 1
+ /// 1: 2
+ /// SI (R1000) registers-----------------------------------------
+ const UINT_32* pTileConfig; ///< Global tile setting tables
+ UINT_32 noOfEntries; ///< Number of entries in pTileConfig
+
+ ///< CI registers-------------------------------------------------
+ const UINT_32* pMacroTileConfig; ///< Global macro tile mode table
+ UINT_32 noOfMacroEntries; ///< Number of entries in pMacroTileConfig
+
+} ADDR_REGISTER_VALUE;
+
+/**
+***************************************************************************************************
+* ADDR_CREATE_INPUT
+*
+* @brief
+* Parameters use to create an AddrLib Object. Caller must provide all fields.
+*
+***************************************************************************************************
+*/
+typedef struct _ADDR_CREATE_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 chipEngine; ///< Chip Engine
+ UINT_32 chipFamily; ///< Chip Family
+ UINT_32 chipRevision; ///< Chip Revision
+ ADDR_CALLBACKS callbacks; ///< Callbacks for sysmem alloc/free/print
+ ADDR_CREATE_FLAGS createFlags; ///< Flags to setup AddrLib
+ ADDR_REGISTER_VALUE regValue; ///< Data from registers to setup AddrLib global data
+ ADDR_CLIENT_HANDLE hClient; ///< Client handle
+ UINT_32 minPitchAlignPixels; ///< Minimum pitch alignment in pixels
+} ADDR_CREATE_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_CREATEINFO_OUTPUT
+*
+* @brief
+* Return AddrLib handle to client driver
+*
+***************************************************************************************************
+*/
+typedef struct _ADDR_CREATE_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ ADDR_HANDLE hLib; ///< Address lib handle
+} ADDR_CREATE_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrCreate
+*
+* @brief
+* Create AddrLib object, must be called before any interface calls
+*
+* @return
+* ADDR_OK if successful
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCreate(
+ const ADDR_CREATE_INPUT* pAddrCreateIn,
+ ADDR_CREATE_OUTPUT* pAddrCreateOut);
+
+
+
+/**
+***************************************************************************************************
+* AddrDestroy
+*
+* @brief
+* Destroy AddrLib object, must be called to free internally allocated resources.
+*
+* @return
+* ADDR_OK if successful
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrDestroy(
+ ADDR_HANDLE hLib);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Surface functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* @brief
+* Bank/tiling parameters. On function input, these can be set as desired or
+* left 0 for AddrLib to calculate/default. On function output, these are the actual
+* parameters used.
+* @note
+* Valid bankWidth/bankHeight value:
+* 1,2,4,8. They are factors instead of pixels or bytes.
+*
+* The bank number remains constant across each row of the
+* macro tile as each pipe is selected, so the number of
+* tiles in the x direction with the same bank number will
+* be bank_width * num_pipes.
+***************************************************************************************************
+*/
+typedef struct _ADDR_TILEINFO
+{
+ /// Any of these parameters can be set to 0 to use the HW default.
+ UINT_32 banks; ///< Number of banks, numerical value
+ UINT_32 bankWidth; ///< Number of tiles in the X direction in the same bank
+ UINT_32 bankHeight; ///< Number of tiles in the Y direction in the same bank
+ UINT_32 macroAspectRatio; ///< Macro tile aspect ratio. 1-1:1, 2-4:1, 4-16:1, 8-64:1
+ UINT_32 tileSplitBytes; ///< Tile split size, in bytes
+ AddrPipeCfg pipeConfig; ///< Pipe Config = HW enum + 1
+} ADDR_TILEINFO;
+
+// Create a define to avoid client change. The removal of R800 is because we plan to implement SI
+// within 800 HWL - An AddrPipeCfg is added in above data structure
+typedef ADDR_TILEINFO ADDR_R800_TILEINFO;
+
+/**
+***************************************************************************************************
+* @brief
+* Information needed by quad buffer stereo support
+***************************************************************************************************
+*/
+typedef struct _ADDR_QBSTEREOINFO
+{
+ UINT_32 eyeHeight; ///< Height (in pixel rows) to right eye
+ UINT_32 rightOffset; ///< Offset (in bytes) to right eye
+ UINT_32 rightSwizzle; ///< TileSwizzle for right eyes
+} ADDR_QBSTEREOINFO;
+
+/**
+***************************************************************************************************
+* ADDR_SURFACE_FLAGS
+*
+* @brief
+* Surface flags
+***************************************************************************************************
+*/
+typedef union _ADDR_SURFACE_FLAGS
+{
+ struct
+ {
+ UINT_32 color : 1; ///< Flag indicates this is a color buffer
+ UINT_32 depth : 1; ///< Flag indicates this is a depth/stencil buffer
+ UINT_32 stencil : 1; ///< Flag indicates this is a stencil buffer
+ UINT_32 texture : 1; ///< Flag indicates this is a texture
+ UINT_32 cube : 1; ///< Flag indicates this is a cubemap
+
+ UINT_32 volume : 1; ///< Flag indicates this is a volume texture
+ UINT_32 fmask : 1; ///< Flag indicates this is an fmask
+ UINT_32 cubeAsArray : 1; ///< Flag indicates if treat cubemap as arrays
+ UINT_32 compressZ : 1; ///< Flag indicates z buffer is compressed
+ UINT_32 overlay : 1; ///< Flag indicates this is an overlay surface
+ UINT_32 noStencil : 1; ///< Flag indicates this depth has no separate stencil
+ UINT_32 display : 1; ///< Flag indicates this should match display controller req.
+ UINT_32 opt4Space : 1; ///< Flag indicates this surface should be optimized for space
+ /// i.e. save some memory but may lose performance
+ UINT_32 prt : 1; ///< Flag for partially resident texture
+ UINT_32 qbStereo : 1; ///< Quad buffer stereo surface
+ UINT_32 pow2Pad : 1; ///< SI: Pad to pow2, must set for mipmap (include level0)
+ UINT_32 interleaved : 1; ///< Special flag for interleaved YUV surface padding
+ UINT_32 degrade4Space : 1; ///< Degrade base level's tile mode to save memory
+ UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable
+ UINT_32 dispTileType : 1; ///< NI: force display Tiling for 128 bit shared resoruce
+ UINT_32 dccCompatible : 1; ///< VI: whether to support dcc fast clear
+ UINT_32 czDispCompatible: 1; ///< SI+: CZ family (Carrizo) has a HW bug needs special alignment.
+ ///< This flag indicates we need to follow the alignment with
+ ///< CZ families or other ASICs under PX configuration + CZ.
+ UINT_32 reserved :10; ///< Reserved bits
+ };
+
+ UINT_32 value;
+} ADDR_SURFACE_FLAGS;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_SURFACE_INFO_INPUT
+*
+* @brief
+* Input structure for AddrComputeSurfaceInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_INFO_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ AddrTileMode tileMode; ///< Tile mode
+ AddrFormat format; ///< If format is set to valid one, bpp/width/height
+ /// might be overwritten
+ UINT_32 bpp; ///< Bits per pixel
+ UINT_32 numSamples; ///< Number of samples
+ UINT_32 width; ///< Width, in pixels
+ UINT_32 height; ///< Height, in pixels
+ UINT_32 numSlices; ///< Number surface slice/depth,
+ /// Note:
+ /// For cubemap, driver clients usually set numSlices
+ /// to 1 in per-face calc.
+ /// For 7xx and above, we need pad faces as slices.
+ /// In this case, clients should set numSlices to 6 and
+ /// this is also can be turned off by createFlags when
+ /// calling AddrCreate
+ UINT_32 slice; ///< Slice index
+ UINT_32 mipLevel; ///< Current mipmap level.
+ /// Padding/tiling have different rules for level0 and
+ /// sublevels
+ ADDR_SURFACE_FLAGS flags; ///< Surface type flags
+ UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as
+ /// number of samples for normal AA; Set it to the
+ /// number of fragments for EQAA
+ /// r800 and later HWL parameters
+ // Needed by 2D tiling, for linear and 1D tiling, just keep them 0's
+ ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Set to 0 to default/calculate
+ AddrTileType tileType; ///< Micro tiling type, not needed when tileIndex != -1
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ UINT_32 basePitch; ///< Base level pitch in pixels, 0 means ignored, is a
+ /// must for mip levels from SI+.
+ /// Don't use pitch in blocks for compressed formats!
+} ADDR_COMPUTE_SURFACE_INFO_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_SURFACE_INFO_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeSurfInfo
+* @note
+ Element: AddrLib unit for computing. e.g. BCn: 4x4 blocks; R32B32B32: 32bit with 3x pitch
+ Pixel: Original pixel
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_INFO_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 pitch; ///< Pitch in elements (in blocks for compressed formats)
+ UINT_32 height; ///< Height in elements (in blocks for compressed formats)
+ UINT_32 depth; ///< Number of slice/depth
+ UINT_64 surfSize; ///< Surface size in bytes
+ AddrTileMode tileMode; ///< Actual tile mode. May differ from that in input
+ UINT_32 baseAlign; ///< Base address alignment
+ UINT_32 pitchAlign; ///< Pitch alignment, in elements
+ UINT_32 heightAlign; ///< Height alignment, in elements
+ UINT_32 depthAlign; ///< Depth alignment, aligned to thickness, for 3d texture
+ UINT_32 bpp; ///< Bits per elements (e.g. blocks for BCn, 1/3 for 96bit)
+ UINT_32 pixelPitch; ///< Pitch in original pixels
+ UINT_32 pixelHeight; ///< Height in original pixels
+ UINT_32 pixelBits; ///< Original bits per pixel, passed from input
+ UINT_64 sliceSize; ///< Size of slice specified by input's slice
+ /// The result is controlled by surface flags & createFlags
+ /// By default this value equals to surfSize for volume
+ UINT_32 pitchTileMax; ///< PITCH_TILE_MAX value for h/w register
+ UINT_32 heightTileMax; ///< HEIGHT_TILE_MAX value for h/w register
+ UINT_32 sliceTileMax; ///< SLICE_TILE_MAX value for h/w register
+
+ UINT_32 numSamples; ///< Pass the effective numSamples processed in this call
+
+ /// r800 and later HWL parameters
+ ADDR_TILEINFO* pTileInfo; ///< Tile parameters used. Filled in if 0 on input
+ AddrTileType tileType; ///< Micro tiling type, only valid when tileIndex != -1
+ INT_32 tileIndex; ///< Tile index, MAY be "downgraded"
+
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ /// Special information to work around SI mipmap swizzle bug UBTS #317508
+ BOOL_32 last2DLevel; ///< TRUE if this is the last 2D(3D) tiled
+ ///< Only meaningful when create flag checkLast2DLevel is set
+ /// Stereo info
+ ADDR_QBSTEREOINFO* pStereoInfo;///< Stereo information, needed when .qbStereo flag is TRUE
+} ADDR_COMPUTE_SURFACE_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeSurfaceInfo
+*
+* @brief
+* Compute surface width/height/depth/alignments and suitable tiling mode
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceInfo(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
+*
+* @brief
+* Input structure for AddrComputeSurfaceAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 x; ///< X coordinate
+ UINT_32 y; ///< Y coordinate
+ UINT_32 slice; ///< Slice index
+ UINT_32 sample; ///< Sample index, use fragment index for EQAA
+
+ UINT_32 bpp; ///< Bits per pixel
+ UINT_32 pitch; ///< Surface pitch, in pixels
+ UINT_32 height; ///< Surface height, in pixels
+ UINT_32 numSlices; ///< Surface depth
+ UINT_32 numSamples; ///< Number of samples
+
+ AddrTileMode tileMode; ///< Tile mode
+ BOOL_32 isDepth; ///< TRUE if the surface uses depth sample ordering within
+ /// micro tile. Textures can also choose depth sample order
+ UINT_32 tileBase; ///< Base offset (in bits) inside micro tile which handles
+ /// the case that components are stored separately
+ UINT_32 compBits; ///< The component bits actually needed(for planar surface)
+
+ UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as
+ /// number of samples for normal AA; Set it to the
+ /// number of fragments for EQAA
+ /// r800 and later HWL parameters
+ // Used for 1D tiling above
+ AddrTileType tileType; ///< See defintion of AddrTileType
+ struct
+ {
+ UINT_32 ignoreSE : 1; ///< TRUE if shader engines are ignored. This is texture
+ /// only flag. Only non-RT texture can set this to TRUE
+ UINT_32 reserved :31; ///< Reserved for future use.
+ };
+ // 2D tiling needs following structure
+ ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ union
+ {
+ struct
+ {
+ UINT_32 bankSwizzle; ///< Bank swizzle
+ UINT_32 pipeSwizzle; ///< Pipe swizzle
+ };
+ UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE
+ };
+
+#if ADDR_AM_BUILD // These two fields are not valid in SW blt since no HTILE access
+ UINT_32 addr5Swizzle; ///< ADDR5_SWIZZLE_MASK of DB_DEPTH_INFO
+ BOOL_32 is32ByteTile; ///< Caller must have access to HTILE buffer and know if
+ /// this tile is compressed to 32B
+#endif
+} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeSurfaceAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_64 addr; ///< Byte address
+ UINT_32 bitPosition; ///< Bit position within surfaceAddr, 0-7.
+ /// For surface bpp < 8, e.g. FMT_1.
+ UINT_32 prtBlockIndex; ///< Index of a PRT tile (64K block)
+} ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeSurfaceAddrFromCoord
+*
+* @brief
+* Compute surface address from a given coordinate.
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceAddrFromCoord(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
+*
+* @brief
+* Input structure for AddrComputeSurfaceCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_64 addr; ///< Address in bytes
+ UINT_32 bitPosition; ///< Bit position in addr. 0-7. for surface bpp < 8,
+ /// e.g. FMT_1;
+ UINT_32 bpp; ///< Bits per pixel
+ UINT_32 pitch; ///< Pitch, in pixels
+ UINT_32 height; ///< Height in pixels
+ UINT_32 numSlices; ///< Surface depth
+ UINT_32 numSamples; ///< Number of samples
+
+ AddrTileMode tileMode; ///< Tile mode
+ BOOL_32 isDepth; ///< Surface uses depth sample ordering within micro tile.
+ /// Note: Textures can choose depth sample order as well.
+ UINT_32 tileBase; ///< Base offset (in bits) inside micro tile which handles
+ /// the case that components are stored separately
+ UINT_32 compBits; ///< The component bits actually needed(for planar surface)
+
+ UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as
+ /// number of samples for normal AA; Set it to the
+ /// number of fragments for EQAA
+ /// r800 and later HWL parameters
+ // Used for 1D tiling above
+ AddrTileType tileType; ///< See defintion of AddrTileType
+ struct
+ {
+ UINT_32 ignoreSE : 1; ///< TRUE if shader engines are ignored. This is texture
+ /// only flag. Only non-RT texture can set this to TRUE
+ UINT_32 reserved :31; ///< Reserved for future use.
+ };
+ // 2D tiling needs following structure
+ ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ union
+ {
+ struct
+ {
+ UINT_32 bankSwizzle; ///< Bank swizzle
+ UINT_32 pipeSwizzle; ///< Pipe swizzle
+ };
+ UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE
+ };
+} ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeSurfaceCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 x; ///< X coordinate
+ UINT_32 y; ///< Y coordinate
+ UINT_32 slice; ///< Index of slices
+ UINT_32 sample; ///< Index of samples, means fragment index for EQAA
+} ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeSurfaceCoordFromAddr
+*
+* @brief
+* Compute coordinate from a given surface address
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSurfaceCoordFromAddr(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// HTile functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* ADDR_HTILE_FLAGS
+*
+* @brief
+* HTILE flags
+***************************************************************************************************
+*/
+typedef union _ADDR_HTILE_FLAGS
+{
+ struct
+ {
+ UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable
+ UINT_32 reserved :31; ///< Reserved bits
+ };
+
+ UINT_32 value;
+} ADDR_HTILE_FLAGS;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_HTILE_INFO_INPUT
+*
+* @brief
+* Input structure of AddrComputeHtileInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_INFO_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ ADDR_HTILE_FLAGS flags; ///< HTILE flags
+ UINT_32 pitch; ///< Surface pitch, in pixels
+ UINT_32 height; ///< Surface height, in pixels
+ UINT_32 numSlices; ///< Number of slices
+ BOOL_32 isLinear; ///< Linear or tiled HTILE layout
+ AddrHtileBlockSize blockWidth; ///< 4 or 8. EG above only support 8
+ AddrHtileBlockSize blockHeight; ///< 4 or 8. EG above only support 8
+ ADDR_TILEINFO* pTileInfo; ///< Tile info
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_HTILE_INFO_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_HTILE_INFO_OUTPUT
+*
+* @brief
+* Output structure of AddrComputeHtileInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_INFO_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 pitch; ///< Pitch in pixels of depth buffer represented in this
+ /// HTile buffer. This might be larger than original depth
+ /// buffer pitch when called with an unaligned pitch.
+ UINT_32 height; ///< Height in pixels, as above
+ UINT_64 htileBytes; ///< Size of HTILE buffer, in bytes
+ UINT_32 baseAlign; ///< Base alignment
+ UINT_32 bpp; ///< Bits per pixel for HTILE is how many bits for an 8x8 block!
+ UINT_32 macroWidth; ///< Macro width in pixels, actually squared cache shape
+ UINT_32 macroHeight; ///< Macro height in pixels
+ UINT_64 sliceSize; ///< Slice size, in bytes.
+} ADDR_COMPUTE_HTILE_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeHtileInfo
+*
+* @brief
+* Compute Htile pitch, height, base alignment and size in bytes
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileInfo(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
+*
+* @brief
+* Input structure for AddrComputeHtileAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 pitch; ///< Pitch, in pixels
+ UINT_32 height; ///< Height in pixels
+ UINT_32 x; ///< X coordinate
+ UINT_32 y; ///< Y coordinate
+ UINT_32 slice; ///< Index of slice
+ UINT_32 numSlices; ///< Number of slices
+ BOOL_32 isLinear; ///< Linear or tiled HTILE layout
+ AddrHtileBlockSize blockWidth; ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
+ AddrHtileBlockSize blockHeight; ///< 4 or 8. 1 means 8, 0 means 4. EG above only support 8
+ ADDR_TILEINFO* pTileInfo; ///< Tile info
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeHtileAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_64 addr; ///< Address in bytes
+ UINT_32 bitPosition; ///< Bit position, 0 or 4. CMASK and HTILE shares some lib method.
+ /// So we keep bitPosition for HTILE as well
+} ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeHtileAddrFromCoord
+*
+* @brief
+* Compute Htile address according to coordinates (of depth buffer)
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileAddrFromCoord(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
+*
+* @brief
+* Input structure for AddrComputeHtileCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_64 addr; ///< Address
+ UINT_32 bitPosition; ///< Bit position 0 or 4. CMASK and HTILE share some methods
+ /// so we keep bitPosition for HTILE as well
+ UINT_32 pitch; ///< Pitch, in pixels
+ UINT_32 height; ///< Height, in pixels
+ UINT_32 numSlices; ///< Number of slices
+ BOOL_32 isLinear; ///< Linear or tiled HTILE layout
+ AddrHtileBlockSize blockWidth; ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
+ AddrHtileBlockSize blockHeight; ///< 4 or 8. 1 means 8, 0 means 4. R8xx/R9xx only support 8
+ ADDR_TILEINFO* pTileInfo; ///< Tile info
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeHtileCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 x; ///< X coordinate
+ UINT_32 y; ///< Y coordinate
+ UINT_32 slice; ///< Slice index
+} ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeHtileCoordFromAddr
+*
+* @brief
+* Compute coordinates within depth buffer (1st pixel of a micro tile) according to
+* Htile address
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeHtileCoordFromAddr(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// C-mask functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* ADDR_CMASK_FLAGS
+*
+* @brief
+* CMASK flags
+***************************************************************************************************
+*/
+typedef union _ADDR_CMASK_FLAGS
+{
+ struct
+ {
+ UINT_32 tcCompatible : 1; ///< Flag indicates surface needs to be shader readable
+ UINT_32 reserved :31; ///< Reserved bits
+ };
+
+ UINT_32 value;
+} ADDR_CMASK_FLAGS;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_CMASK_INFO_INPUT
+*
+* @brief
+* Input structure of AddrComputeCmaskInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASKINFO_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ ADDR_CMASK_FLAGS flags; ///< CMASK flags
+ UINT_32 pitch; ///< Pitch, in pixels, of color buffer
+ UINT_32 height; ///< Height, in pixels, of color buffer
+ UINT_32 numSlices; ///< Number of slices, of color buffer
+ BOOL_32 isLinear; ///< Linear or tiled layout, Only SI can be linear
+ ADDR_TILEINFO* pTileInfo; ///< Tile info
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_CMASK_INFO_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_CMASK_INFO_OUTPUT
+*
+* @brief
+* Output structure of AddrComputeCmaskInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_INFO_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 pitch; ///< Pitch in pixels of color buffer which
+ /// this Cmask matches. The size might be larger than
+ /// original color buffer pitch when called with
+ /// an unaligned pitch.
+ UINT_32 height; ///< Height in pixels, as above
+ UINT_64 cmaskBytes; ///< Size in bytes of CMask buffer
+ UINT_32 baseAlign; ///< Base alignment
+ UINT_32 blockMax; ///< Cmask block size. Need this to set CB_COLORn_MASK register
+ UINT_32 macroWidth; ///< Macro width in pixels, actually squared cache shape
+ UINT_32 macroHeight; ///< Macro height in pixels
+ UINT_64 sliceSize; ///< Slice size, in bytes.
+} ADDR_COMPUTE_CMASK_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeCmaskInfo
+*
+* @brief
+* Compute Cmask pitch, height, base alignment and size in bytes from color buffer
+* info
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskInfo(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn,
+ ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
+*
+* @brief
+* Input structure for AddrComputeCmaskAddrFromCoord
+*
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+ UINT_32 x; ///< X coordinate
+ UINT_32 y; ///< Y coordinate
+ UINT_64 fmaskAddr; ///< Fmask addr for tc compatible Cmask
+ UINT_32 slice; ///< Slice index
+ UINT_32 pitch; ///< Pitch in pixels, of color buffer
+ UINT_32 height; ///< Height in pixels, of color buffer
+ UINT_32 numSlices; ///< Number of slices
+ UINT_32 bpp;
+ BOOL_32 isLinear; ///< Linear or tiled layout, Only SI can be linear
+ ADDR_CMASK_FLAGS flags; ///< CMASK flags
+ ADDR_TILEINFO* pTileInfo; ///< Tile info
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ ///< while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeCmaskAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_64 addr; ///< CMASK address in bytes
+ UINT_32 bitPosition; ///< Bit position within addr, 0-7. CMASK is 4 bpp,
+ /// so the address may be located in bit 0 (0) or 4 (4)
+} ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeCmaskAddrFromCoord
+*
+* @brief
+* Compute Cmask address according to coordinates (of MSAA color buffer)
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskAddrFromCoord(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
+*
+* @brief
+* Input structure for AddrComputeCmaskCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_64 addr; ///< CMASK address in bytes
+ UINT_32 bitPosition; ///< Bit position within addr, 0-7. CMASK is 4 bpp,
+ /// so the address may be located in bit 0 (0) or 4 (4)
+ UINT_32 pitch; ///< Pitch, in pixels
+ UINT_32 height; ///< Height in pixels
+ UINT_32 numSlices; ///< Number of slices
+ BOOL_32 isLinear; ///< Linear or tiled layout, Only SI can be linear
+ ADDR_TILEINFO* pTileInfo; ///< Tile info
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeCmaskCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 x; ///< X coordinate
+ UINT_32 y; ///< Y coordinate
+ UINT_32 slice; ///< Slice index
+} ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeCmaskCoordFromAddr
+*
+* @brief
+* Compute coordinates within color buffer (1st pixel of a micro tile) according to
+* Cmask address
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeCmaskCoordFromAddr(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// F-mask functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_FMASK_INFO_INPUT
+*
+* @brief
+* Input structure for AddrComputeFmaskInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_INFO_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ AddrTileMode tileMode; ///< Tile mode
+ UINT_32 pitch; ///< Surface pitch, in pixels
+ UINT_32 height; ///< Surface height, in pixels
+ UINT_32 numSlices; ///< Number of slice/depth
+ UINT_32 numSamples; ///< Number of samples
+ UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as
+ /// number of samples for normal AA; Set it to the
+ /// number of fragments for EQAA
+ /// r800 and later HWL parameters
+ struct
+ {
+ UINT_32 resolved: 1; ///< TRUE if the surface is for resolved fmask, only used
+ /// by H/W clients. S/W should always set it to FALSE.
+ UINT_32 reserved: 31; ///< Reserved for future use.
+ };
+ ADDR_TILEINFO* pTileInfo; ///< 2D tiling parameters. Clients must give valid data
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+} ADDR_COMPUTE_FMASK_INFO_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_FMASK_INFO_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeFmaskInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_INFO_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 pitch; ///< Pitch of fmask in pixels
+ UINT_32 height; ///< Height of fmask in pixels
+ UINT_32 numSlices; ///< Slices of fmask
+ UINT_64 fmaskBytes; ///< Size of fmask in bytes
+ UINT_32 baseAlign; ///< Base address alignment
+ UINT_32 pitchAlign; ///< Pitch alignment
+ UINT_32 heightAlign; ///< Height alignment
+ UINT_32 bpp; ///< Bits per pixel of FMASK is: number of bit planes
+ UINT_32 numSamples; ///< Number of samples, used for dump, export this since input
+ /// may be changed in 9xx and above
+ /// r800 and later HWL parameters
+ ADDR_TILEINFO* pTileInfo; ///< Tile parameters used. Fmask can have different
+ /// bank_height from color buffer
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ UINT_64 sliceSize; ///< Size of slice in bytes
+} ADDR_COMPUTE_FMASK_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeFmaskInfo
+*
+* @brief
+* Compute Fmask pitch/height/depth/alignments and size in bytes
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskInfo(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
+*
+* @brief
+* Input structure for AddrComputeFmaskAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 x; ///< X coordinate
+ UINT_32 y; ///< Y coordinate
+ UINT_32 slice; ///< Slice index
+ UINT_32 plane; ///< Plane number
+ UINT_32 sample; ///< Sample index (fragment index for EQAA)
+
+ UINT_32 pitch; ///< Surface pitch, in pixels
+ UINT_32 height; ///< Surface height, in pixels
+ UINT_32 numSamples; ///< Number of samples
+ UINT_32 numFrags; ///< Number of fragments, leave it zero or the same as
+ /// number of samples for normal AA; Set it to the
+ /// number of fragments for EQAA
+
+ AddrTileMode tileMode; ///< Tile mode
+ union
+ {
+ struct
+ {
+ UINT_32 bankSwizzle; ///< Bank swizzle
+ UINT_32 pipeSwizzle; ///< Pipe swizzle
+ };
+ UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE
+ };
+
+ /// r800 and later HWL parameters
+ struct
+ {
+ UINT_32 resolved: 1; ///< TRUE if this is a resolved fmask, used by H/W clients
+ UINT_32 ignoreSE: 1; ///< TRUE if shader engines are ignored.
+ UINT_32 reserved: 30; ///< Reserved for future use.
+ };
+ ADDR_TILEINFO* pTileInfo; ///< 2D tiling parameters. Client must provide all data
+
+} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeFmaskAddrFromCoord
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_64 addr; ///< Fmask address
+ UINT_32 bitPosition; ///< Bit position within fmaskAddr, 0-7.
+} ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeFmaskAddrFromCoord
+*
+* @brief
+* Compute Fmask address according to coordinates (x,y,slice,sample,plane)
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskAddrFromCoord(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
+*
+* @brief
+* Input structure for AddrComputeFmaskCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_64 addr; ///< Address
+ UINT_32 bitPosition; ///< Bit position within addr, 0-7.
+
+ UINT_32 pitch; ///< Pitch, in pixels
+ UINT_32 height; ///< Height in pixels
+ UINT_32 numSamples; ///< Number of samples
+ UINT_32 numFrags; ///< Number of fragments
+ AddrTileMode tileMode; ///< Tile mode
+ union
+ {
+ struct
+ {
+ UINT_32 bankSwizzle; ///< Bank swizzle
+ UINT_32 pipeSwizzle; ///< Pipe swizzle
+ };
+ UINT_32 tileSwizzle; ///< Combined swizzle, if useCombinedSwizzle is TRUE
+ };
+
+ /// r800 and later HWL parameters
+ struct
+ {
+ UINT_32 resolved: 1; ///< TRUE if this is a resolved fmask, used by HW components
+ UINT_32 ignoreSE: 1; ///< TRUE if shader engines are ignored.
+ UINT_32 reserved: 30; ///< Reserved for future use.
+ };
+ ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data
+
+} ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
+*
+* @brief
+* Output structure for AddrComputeFmaskCoordFromAddr
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 x; ///< X coordinate
+ UINT_32 y; ///< Y coordinate
+ UINT_32 slice; ///< Slice index
+ UINT_32 plane; ///< Plane number
+ UINT_32 sample; ///< Sample index (fragment index for EQAA)
+} ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeFmaskCoordFromAddr
+*
+* @brief
+* Compute FMASK coordinate from an given address
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeFmaskCoordFromAddr(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut);
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Element/utility functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrGetVersion
+*
+* @brief
+* Get AddrLib version number
+***************************************************************************************************
+*/
+UINT_32 ADDR_API AddrGetVersion(ADDR_HANDLE hLib);
+
+/**
+***************************************************************************************************
+* AddrUseTileIndex
+*
+* @brief
+* Return TRUE if tileIndex is enabled in this address library
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseTileIndex(ADDR_HANDLE hLib);
+
+/**
+***************************************************************************************************
+* AddrUseCombinedSwizzle
+*
+* @brief
+* Return TRUE if combined swizzle is enabled in this address library
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API AddrUseCombinedSwizzle(ADDR_HANDLE hLib);
+
+/**
+***************************************************************************************************
+* ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
+*
+* @brief
+* Input structure of AddrExtractBankPipeSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 base256b; ///< Base256b value
+
+ /// r800 and later HWL parameters
+ ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
+*
+* @brief
+* Output structure of AddrExtractBankPipeSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 bankSwizzle; ///< Bank swizzle
+ UINT_32 pipeSwizzle; ///< Pipe swizzle
+} ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrExtractBankPipeSwizzle
+*
+* @brief
+* Extract Bank and Pipe swizzle from base256b
+* @return
+* ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrExtractBankPipeSwizzle(
+ ADDR_HANDLE hLib,
+ const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+ ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut);
+
+
+/**
+***************************************************************************************************
+* ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
+*
+* @brief
+* Input structure of AddrCombineBankPipeSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 bankSwizzle; ///< Bank swizzle
+ UINT_32 pipeSwizzle; ///< Pipe swizzle
+ UINT_64 baseAddr; ///< Base address (leave it zero for driver clients)
+
+ /// r800 and later HWL parameters
+ ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Client must provide all data
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
+*
+* @brief
+* Output structure of AddrCombineBankPipeSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 tileSwizzle; ///< Combined swizzle
+} ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrCombineBankPipeSwizzle
+*
+* @brief
+* Combine Bank and Pipe swizzle
+* @return
+* ADDR_OK if no error
+* @note
+* baseAddr here is full MCAddress instead of base256b
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrCombineBankPipeSwizzle(
+ ADDR_HANDLE hLib,
+ const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn,
+ ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_SLICESWIZZLE_INPUT
+*
+* @brief
+* Input structure of AddrComputeSliceSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SLICESWIZZLE_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ AddrTileMode tileMode; ///< Tile Mode
+ UINT_32 baseSwizzle; ///< Base tile swizzle
+ UINT_32 slice; ///< Slice index
+ UINT_64 baseAddr; ///< Base address, driver should leave it 0 in most cases
+
+ /// r800 and later HWL parameters
+ ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Actually banks needed here!
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_SLICESWIZZLE_INPUT;
+
+
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
+*
+* @brief
+* Output structure of AddrComputeSliceSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_SLICESWIZZLE_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 tileSwizzle; ///< Recalculated tileSwizzle value
+} ADDR_COMPUTE_SLICESWIZZLE_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeSliceSwizzle
+*
+* @brief
+* Extract Bank and Pipe swizzle from base256b
+* @return
+* ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeSliceSwizzle(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut);
+
+
+/**
+***************************************************************************************************
+* AddrSwizzleGenOption
+*
+* @brief
+* Which swizzle generating options: legacy or linear
+***************************************************************************************************
+*/
+typedef enum _AddrSwizzleGenOption
+{
+ ADDR_SWIZZLE_GEN_DEFAULT = 0, ///< As is in client driver implemention for swizzle
+ ADDR_SWIZZLE_GEN_LINEAR = 1, ///< Using a linear increment of swizzle
+} AddrSwizzleGenOption;
+
+/**
+***************************************************************************************************
+* AddrSwizzleOption
+*
+* @brief
+* Controls how swizzle is generated
+***************************************************************************************************
+*/
+typedef union _ADDR_SWIZZLE_OPTION
+{
+ struct
+ {
+ UINT_32 genOption : 1; ///< The way swizzle is generated, see AddrSwizzleGenOption
+ UINT_32 reduceBankBit : 1; ///< TRUE if we need reduce swizzle bits
+ UINT_32 reserved :30; ///< Reserved bits
+ };
+
+ UINT_32 value;
+
+} ADDR_SWIZZLE_OPTION;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_BASE_SWIZZLE_INPUT
+*
+* @brief
+* Input structure of AddrComputeBaseSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ ADDR_SWIZZLE_OPTION option; ///< Swizzle option
+ UINT_32 surfIndex; ///< Index of this surface type
+ AddrTileMode tileMode; ///< Tile Mode
+
+ /// r800 and later HWL parameters
+ ADDR_TILEINFO* pTileInfo; ///< 2D tile parameters. Actually banks needed here!
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_BASE_SWIZZLE_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
+*
+* @brief
+* Output structure of AddrComputeBaseSwizzle
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_32 tileSwizzle; ///< Combined swizzle
+} ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeBaseSwizzle
+*
+* @brief
+* Return a Combined Bank and Pipe swizzle base on surface based on surface type/index
+* @return
+* ADDR_OK if no error
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeBaseSwizzle(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ELEM_GETEXPORTNORM_INPUT
+*
+* @brief
+* Input structure for ElemGetExportNorm
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_GETEXPORTNORM_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ AddrColorFormat format; ///< Color buffer format; Client should use ColorFormat
+ AddrSurfaceNumber num; ///< Surface number type; Client should use NumberType
+ AddrSurfaceSwap swap; ///< Surface swap byte swap; Client should use SurfaceSwap
+ UINT_32 numSamples; ///< Number of samples
+} ELEM_GETEXPORTNORM_INPUT;
+
+/**
+***************************************************************************************************
+* ElemGetExportNorm
+*
+* @brief
+* Helper function to check one format can be EXPORT_NUM, which is a register
+* CB_COLOR_INFO.SURFACE_FORMAT. FP16 can be reported as EXPORT_NORM for rv770 in r600
+* family
+* @note
+* The implementation is only for r600.
+* 00 - EXPORT_FULL: PS exports are 4 pixels with 4 components with 32-bits-per-component. (two
+* clocks per export)
+* 01 - EXPORT_NORM: PS exports are 4 pixels with 4 components with 16-bits-per-component. (one
+* clock per export)
+*
+***************************************************************************************************
+*/
+BOOL_32 ADDR_API ElemGetExportNorm(
+ ADDR_HANDLE hLib,
+ const ELEM_GETEXPORTNORM_INPUT* pIn);
+
+
+
+/**
+***************************************************************************************************
+* ELEM_FLT32TODEPTHPIXEL_INPUT
+*
+* @brief
+* Input structure for addrFlt32ToDepthPixel
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TODEPTHPIXEL_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ AddrDepthFormat format; ///< Depth buffer format
+ ADDR_FLT_32 comps[2]; ///< Component values (Z/stencil)
+} ELEM_FLT32TODEPTHPIXEL_INPUT;
+
+/**
+***************************************************************************************************
+* ELEM_FLT32TODEPTHPIXEL_INPUT
+*
+* @brief
+* Output structure for ElemFlt32ToDepthPixel
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TODEPTHPIXEL_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_8* pPixel; ///< Real depth value. Same data type as depth buffer.
+ /// Client must provide enough storage for this type.
+ UINT_32 depthBase; ///< Tile base in bits for depth bits
+ UINT_32 stencilBase; ///< Tile base in bits for stencil bits
+ UINT_32 depthBits; ///< Bits for depth
+ UINT_32 stencilBits; ///< Bits for stencil
+} ELEM_FLT32TODEPTHPIXEL_OUTPUT;
+
+/**
+***************************************************************************************************
+* ElemFlt32ToDepthPixel
+*
+* @brief
+* Convert a FLT_32 value to a depth/stencil pixel value
+*
+* @return
+* Return code
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToDepthPixel(
+ ADDR_HANDLE hLib,
+ const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
+ ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ELEM_FLT32TOCOLORPIXEL_INPUT
+*
+* @brief
+* Input structure for addrFlt32ToColorPixel
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TOCOLORPIXEL_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ AddrColorFormat format; ///< Color buffer format
+ AddrSurfaceNumber surfNum; ///< Surface number
+ AddrSurfaceSwap surfSwap; ///< Surface swap
+ ADDR_FLT_32 comps[4]; ///< Component values (r/g/b/a)
+} ELEM_FLT32TOCOLORPIXEL_INPUT;
+
+/**
+***************************************************************************************************
+* ELEM_FLT32TOCOLORPIXEL_INPUT
+*
+* @brief
+* Output structure for ElemFlt32ToColorPixel
+*
+***************************************************************************************************
+*/
+typedef struct _ELEM_FLT32TOCOLORPIXEL_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ UINT_8* pPixel; ///< Real color value. Same data type as color buffer.
+ /// Client must provide enough storage for this type.
+} ELEM_FLT32TOCOLORPIXEL_OUTPUT;
+
+/**
+***************************************************************************************************
+* ElemFlt32ToColorPixel
+*
+* @brief
+* Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*
+* @return
+* Return code
+*
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API ElemFlt32ToColorPixel(
+ ADDR_HANDLE hLib,
+ const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
+ ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut);
+
+
+/**
+***************************************************************************************************
+* ADDR_CONVERT_TILEINFOTOHW_INPUT
+*
+* @brief
+* Input structure for AddrConvertTileInfoToHW
+* @note
+* When reverse is TRUE, indices are igonred
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINFOTOHW_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+ BOOL_32 reverse; ///< Convert control flag.
+ /// FALSE: convert from real value to HW value;
+ /// TRUE: convert from HW value to real value.
+
+ /// r800 and later HWL parameters
+ ADDR_TILEINFO* pTileInfo; ///< Tile parameters with real value
+
+ INT_32 tileIndex; ///< Tile index, MUST be -1 if you don't want to use it
+ /// while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_CONVERT_TILEINFOTOHW_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_CONVERT_TILEINFOTOHW_OUTPUT
+*
+* @brief
+* Output structure for AddrConvertTileInfoToHW
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINFOTOHW_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ /// r800 and later HWL parameters
+ ADDR_TILEINFO* pTileInfo; ///< Tile parameters with hardware register value
+
+} ADDR_CONVERT_TILEINFOTOHW_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrConvertTileInfoToHW
+*
+* @brief
+* Convert tile info from real value to hardware register value
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileInfoToHW(
+ ADDR_HANDLE hLib,
+ const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_CONVERT_TILEINDEX_INPUT
+*
+* @brief
+* Input structure for AddrConvertTileIndex
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINDEX_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ INT_32 tileIndex; ///< Tile index
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ BOOL_32 tileInfoHw; ///< Set to TRUE if client wants HW enum, otherwise actual
+} ADDR_CONVERT_TILEINDEX_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_CONVERT_TILEINDEX_OUTPUT
+*
+* @brief
+* Output structure for AddrConvertTileIndex
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINDEX_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ AddrTileMode tileMode; ///< Tile mode
+ AddrTileType tileType; ///< Tile type
+ ADDR_TILEINFO* pTileInfo; ///< Tile info
+
+} ADDR_CONVERT_TILEINDEX_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrConvertTileIndex
+*
+* @brief
+* Convert tile index to tile mode/type/info
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex(
+ ADDR_HANDLE hLib,
+ const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
+ ADDR_CONVERT_TILEINDEX_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_CONVERT_TILEINDEX1_INPUT
+*
+* @brief
+* Input structure for AddrConvertTileIndex1 (without macro mode index)
+***************************************************************************************************
+*/
+typedef struct _ADDR_CONVERT_TILEINDEX1_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ INT_32 tileIndex; ///< Tile index
+ UINT_32 bpp; ///< Bits per pixel
+ UINT_32 numSamples; ///< Number of samples
+ BOOL_32 tileInfoHw; ///< Set to TRUE if client wants HW enum, otherwise actual
+} ADDR_CONVERT_TILEINDEX1_INPUT;
+
+/**
+***************************************************************************************************
+* AddrConvertTileIndex1
+*
+* @brief
+* Convert tile index to tile mode/type/info
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrConvertTileIndex1(
+ ADDR_HANDLE hLib,
+ const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,
+ ADDR_CONVERT_TILEINDEX_OUTPUT* pOut);
+
+
+
+/**
+***************************************************************************************************
+* ADDR_GET_TILEINDEX_INPUT
+*
+* @brief
+* Input structure for AddrGetTileIndex
+***************************************************************************************************
+*/
+typedef struct _ADDR_GET_TILEINDEX_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ AddrTileMode tileMode; ///< Tile mode
+ AddrTileType tileType; ///< Tile-type: disp/non-disp/...
+ ADDR_TILEINFO* pTileInfo; ///< Pointer to tile-info structure, can be NULL for linear/1D
+} ADDR_GET_TILEINDEX_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_GET_TILEINDEX_OUTPUT
+*
+* @brief
+* Output structure for AddrGetTileIndex
+***************************************************************************************************
+*/
+typedef struct _ADDR_GET_TILEINDEX_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+
+ INT_32 index; ///< index in table
+} ADDR_GET_TILEINDEX_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrGetTileIndex
+*
+* @brief
+* Get the tiling mode index in table
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrGetTileIndex(
+ ADDR_HANDLE hLib,
+ const ADDR_GET_TILEINDEX_INPUT* pIn,
+ ADDR_GET_TILEINDEX_OUTPUT* pOut);
+
+
+
+
+/**
+***************************************************************************************************
+* ADDR_PRT_INFO_INPUT
+*
+* @brief
+* Input structure for AddrComputePrtInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_PRT_INFO_INPUT
+{
+ AddrFormat format; ///< Surface format
+ UINT_32 baseMipWidth; ///< Base mipmap width
+ UINT_32 baseMipHeight; ///< Base mipmap height
+ UINT_32 baseMipDepth; ///< Base mipmap depth
+ UINT_32 numFrags; ///< Number of fragments,
+} ADDR_PRT_INFO_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_PRT_INFO_OUTPUT
+*
+* @brief
+* Input structure for AddrComputePrtInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_PRT_INFO_OUTPUT
+{
+ UINT_32 prtTileWidth;
+ UINT_32 prtTileHeight;
+} ADDR_PRT_INFO_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputePrtInfo
+*
+* @brief
+* Compute prt surface related information
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputePrtInfo(
+ ADDR_HANDLE hLib,
+ const ADDR_PRT_INFO_INPUT* pIn,
+ ADDR_PRT_INFO_OUTPUT* pOut);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// DCC key functions
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* _ADDR_COMPUTE_DCCINFO_INPUT
+*
+* @brief
+* Input structure of AddrComputeDccInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_DCCINFO_INPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+ UINT_32 bpp; ///< BitPP of color surface
+ UINT_32 numSamples; ///< Sample number of color surface
+ UINT_64 colorSurfSize; ///< Size of color surface to which dcc key is bound
+ AddrTileMode tileMode; ///< Tile mode of color surface
+ ADDR_TILEINFO tileInfo; ///< Tile info of color surface
+ UINT_32 tileSwizzle; ///< Tile swizzle
+ INT_32 tileIndex; ///< Tile index of color surface,
+ ///< MUST be -1 if you don't want to use it
+ ///< while the global useTileIndex is set to 1
+ INT_32 macroModeIndex; ///< Index in macro tile mode table if there is one (CI)
+ ///< README: When tileIndex is not -1, this must be valid
+} ADDR_COMPUTE_DCCINFO_INPUT;
+
+/**
+***************************************************************************************************
+* ADDR_COMPUTE_DCCINFO_OUTPUT
+*
+* @brief
+* Output structure of AddrComputeDccInfo
+***************************************************************************************************
+*/
+typedef struct _ADDR_COMPUTE_DCCINFO_OUTPUT
+{
+ UINT_32 size; ///< Size of this structure in bytes
+ UINT_64 dccRamBaseAlign; ///< Base alignment of dcc key
+ UINT_64 dccRamSize; ///< Size of dcc key
+ UINT_64 dccFastClearSize; ///< Size of dcc key portion that can be fast cleared
+ BOOL_32 subLvlCompressible; ///< whether sub resource is compressiable
+} ADDR_COMPUTE_DCCINFO_OUTPUT;
+
+/**
+***************************************************************************************************
+* AddrComputeDccInfo
+*
+* @brief
+* Compute DCC key size, base alignment
+* info
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE ADDR_API AddrComputeDccInfo(
+ ADDR_HANDLE hLib,
+ const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+ ADDR_COMPUTE_DCCINFO_OUTPUT* pOut);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // __ADDR_INTERFACE_H__
+
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/addrtypes.h b/src/gallium/winsys/amdgpu/drm/addrlib/addrtypes.h
new file mode 100644
index 00000000000..4c68ac544b8
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/addrtypes.h
@@ -0,0 +1,590 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrtypes.h
+* @brief Contains the helper function and constants
+***************************************************************************************************
+*/
+#ifndef __ADDR_TYPES_H__
+#define __ADDR_TYPES_H__
+
+#if defined(__APPLE__) || defined(TCORE_BUILD)
+// External definitions header maintained by Mac driver team (and TCORE team)
+// Helps address compilation issues & reduces code covered by NDA
+#include "addrExtDef.h"
+
+#else
+
+// Windows and/or Linux
+#if !defined(VOID)
+typedef void VOID;
+#endif
+
+#if !defined(FLOAT)
+typedef float FLOAT;
+#endif
+
+#if !defined(CHAR)
+typedef char CHAR;
+#endif
+
+#if !defined(INT)
+typedef int INT;
+#endif
+
+#include // va_list...etc need this header
+
+#endif // defined (__APPLE__)
+
+/**
+***************************************************************************************************
+* Calling conventions
+***************************************************************************************************
+*/
+#ifndef ADDR_CDECL
+ #if defined(__GNUC__)
+ #define ADDR_CDECL __attribute__((cdecl))
+ #else
+ #define ADDR_CDECL __cdecl
+ #endif
+#endif
+
+#ifndef ADDR_STDCALL
+ #if defined(__GNUC__)
+ #if defined(__AMD64__)
+ #define ADDR_STDCALL
+ #else
+ #define ADDR_STDCALL __attribute__((stdcall))
+ #endif
+ #else
+ #define ADDR_STDCALL __stdcall
+ #endif
+#endif
+
+#ifndef ADDR_FASTCALL
+ #if defined(__GNUC__)
+ #define ADDR_FASTCALL __attribute__((regparm(0)))
+ #else
+ #define ADDR_FASTCALL __fastcall
+ #endif
+#endif
+
+#ifndef GC_CDECL
+ #define GC_CDECL ADDR_CDECL
+#endif
+
+#ifndef GC_STDCALL
+ #define GC_STDCALL ADDR_STDCALL
+#endif
+
+#ifndef GC_FASTCALL
+ #define GC_FASTCALL ADDR_FASTCALL
+#endif
+
+
+#if defined(__GNUC__)
+ #define ADDR_INLINE static inline // inline needs to be static to link
+#else
+ // win32, win64, other platforms
+ #define ADDR_INLINE __inline
+#endif // #if defined(__GNUC__)
+
+#define ADDR_API ADDR_FASTCALL //default call convention is fast call
+
+/**
+***************************************************************************************************
+* Global defines used by other modules
+***************************************************************************************************
+*/
+#if !defined(TILEINDEX_INVALID)
+#define TILEINDEX_INVALID -1
+#endif
+
+#if !defined(TILEINDEX_LINEAR_GENERAL)
+#define TILEINDEX_LINEAR_GENERAL -2
+#endif
+
+#if !defined(TILEINDEX_LINEAR_ALIGNED)
+#define TILEINDEX_LINEAR_ALIGNED 8
+#endif
+
+/**
+***************************************************************************************************
+* Return codes
+***************************************************************************************************
+*/
+typedef enum _ADDR_E_RETURNCODE
+{
+ // General Return
+ ADDR_OK = 0,
+ ADDR_ERROR = 1,
+
+ // Specific Errors
+ ADDR_OUTOFMEMORY,
+ ADDR_INVALIDPARAMS,
+ ADDR_NOTSUPPORTED,
+ ADDR_NOTIMPLEMENTED,
+ ADDR_PARAMSIZEMISMATCH,
+ ADDR_INVALIDGBREGVALUES,
+
+} ADDR_E_RETURNCODE;
+
+/**
+***************************************************************************************************
+* @brief
+* Neutral enums that define tile modes for all H/W
+* @note
+* R600/R800 tiling mode can be cast to hw enums directly but never cast into HW enum from
+* ADDR_TM_2D_TILED_XTHICK
+*
+***************************************************************************************************
+*/
+typedef enum _AddrTileMode
+{
+ ADDR_TM_LINEAR_GENERAL = 0, ///< Least restrictions, pitch: multiple of 8 if not buffer
+ ADDR_TM_LINEAR_ALIGNED = 1, ///< Requests pitch or slice to be multiple of 64 pixels
+ ADDR_TM_1D_TILED_THIN1 = 2, ///< Linear array of 8x8 tiles
+ ADDR_TM_1D_TILED_THICK = 3, ///< Linear array of 8x8x4 tiles
+ ADDR_TM_2D_TILED_THIN1 = 4, ///< A set of macro tiles consist of 8x8 tiles
+ ADDR_TM_2D_TILED_THIN2 = 5, ///< 600 HWL only, macro tile ratio is 1:4
+ ADDR_TM_2D_TILED_THIN4 = 6, ///< 600 HWL only, macro tile ratio is 1:16
+ ADDR_TM_2D_TILED_THICK = 7, ///< A set of macro tiles consist of 8x8x4 tiles
+ ADDR_TM_2B_TILED_THIN1 = 8, ///< 600 HWL only, with bank swap
+ ADDR_TM_2B_TILED_THIN2 = 9, ///< 600 HWL only, with bank swap and ratio is 1:4
+ ADDR_TM_2B_TILED_THIN4 = 10, ///< 600 HWL only, with bank swap and ratio is 1:16
+ ADDR_TM_2B_TILED_THICK = 11, ///< 600 HWL only, with bank swap, consists of 8x8x4 tiles
+ ADDR_TM_3D_TILED_THIN1 = 12, ///< Macro tiling w/ pipe rotation between slices
+ ADDR_TM_3D_TILED_THICK = 13, ///< Macro tiling w/ pipe rotation bwtween slices, thick
+ ADDR_TM_3B_TILED_THIN1 = 14, ///< 600 HWL only, with bank swap
+ ADDR_TM_3B_TILED_THICK = 15, ///< 600 HWL only, with bank swap, thick
+ ADDR_TM_2D_TILED_XTHICK = 16, ///< Tile is 8x8x8, valid from NI
+ ADDR_TM_3D_TILED_XTHICK = 17, ///< Tile is 8x8x8, valid from NI
+ ADDR_TM_POWER_SAVE = 18, ///< Power save mode, only used by KMD on NI
+ ADDR_TM_PRT_TILED_THIN1 = 19, ///< No bank/pipe rotation or hashing beyond macrotile size
+ ADDR_TM_PRT_2D_TILED_THIN1 = 20, ///< Same as 2D_TILED_THIN1, PRT only
+ ADDR_TM_PRT_3D_TILED_THIN1 = 21, ///< Same as 3D_TILED_THIN1, PRT only
+ ADDR_TM_PRT_TILED_THICK = 22, ///< No bank/pipe rotation or hashing beyond macrotile size
+ ADDR_TM_PRT_2D_TILED_THICK = 23, ///< Same as 2D_TILED_THICK, PRT only
+ ADDR_TM_PRT_3D_TILED_THICK = 24, ///< Same as 3D_TILED_THICK, PRT only
+ ADDR_TM_COUNT = 25, ///< Must be the value of the last tile mode
+} AddrTileMode;
+
+/**
+***************************************************************************************************
+* AddrFormat
+*
+* @brief
+* Neutral enum for SurfaceFormat
+*
+***************************************************************************************************
+*/
+typedef enum _AddrFormat {
+ ADDR_FMT_INVALID = 0x00000000,
+ ADDR_FMT_8 = 0x00000001,
+ ADDR_FMT_4_4 = 0x00000002,
+ ADDR_FMT_3_3_2 = 0x00000003,
+ ADDR_FMT_RESERVED_4 = 0x00000004,
+ ADDR_FMT_16 = 0x00000005,
+ ADDR_FMT_16_FLOAT = 0x00000006,
+ ADDR_FMT_8_8 = 0x00000007,
+ ADDR_FMT_5_6_5 = 0x00000008,
+ ADDR_FMT_6_5_5 = 0x00000009,
+ ADDR_FMT_1_5_5_5 = 0x0000000a,
+ ADDR_FMT_4_4_4_4 = 0x0000000b,
+ ADDR_FMT_5_5_5_1 = 0x0000000c,
+ ADDR_FMT_32 = 0x0000000d,
+ ADDR_FMT_32_FLOAT = 0x0000000e,
+ ADDR_FMT_16_16 = 0x0000000f,
+ ADDR_FMT_16_16_FLOAT = 0x00000010,
+ ADDR_FMT_8_24 = 0x00000011,
+ ADDR_FMT_8_24_FLOAT = 0x00000012,
+ ADDR_FMT_24_8 = 0x00000013,
+ ADDR_FMT_24_8_FLOAT = 0x00000014,
+ ADDR_FMT_10_11_11 = 0x00000015,
+ ADDR_FMT_10_11_11_FLOAT = 0x00000016,
+ ADDR_FMT_11_11_10 = 0x00000017,
+ ADDR_FMT_11_11_10_FLOAT = 0x00000018,
+ ADDR_FMT_2_10_10_10 = 0x00000019,
+ ADDR_FMT_8_8_8_8 = 0x0000001a,
+ ADDR_FMT_10_10_10_2 = 0x0000001b,
+ ADDR_FMT_X24_8_32_FLOAT = 0x0000001c,
+ ADDR_FMT_32_32 = 0x0000001d,
+ ADDR_FMT_32_32_FLOAT = 0x0000001e,
+ ADDR_FMT_16_16_16_16 = 0x0000001f,
+ ADDR_FMT_16_16_16_16_FLOAT = 0x00000020,
+ ADDR_FMT_RESERVED_33 = 0x00000021,
+ ADDR_FMT_32_32_32_32 = 0x00000022,
+ ADDR_FMT_32_32_32_32_FLOAT = 0x00000023,
+ ADDR_FMT_RESERVED_36 = 0x00000024,
+ ADDR_FMT_1 = 0x00000025,
+ ADDR_FMT_1_REVERSED = 0x00000026,
+ ADDR_FMT_GB_GR = 0x00000027,
+ ADDR_FMT_BG_RG = 0x00000028,
+ ADDR_FMT_32_AS_8 = 0x00000029,
+ ADDR_FMT_32_AS_8_8 = 0x0000002a,
+ ADDR_FMT_5_9_9_9_SHAREDEXP = 0x0000002b,
+ ADDR_FMT_8_8_8 = 0x0000002c,
+ ADDR_FMT_16_16_16 = 0x0000002d,
+ ADDR_FMT_16_16_16_FLOAT = 0x0000002e,
+ ADDR_FMT_32_32_32 = 0x0000002f,
+ ADDR_FMT_32_32_32_FLOAT = 0x00000030,
+ ADDR_FMT_BC1 = 0x00000031,
+ ADDR_FMT_BC2 = 0x00000032,
+ ADDR_FMT_BC3 = 0x00000033,
+ ADDR_FMT_BC4 = 0x00000034,
+ ADDR_FMT_BC5 = 0x00000035,
+ ADDR_FMT_BC6 = 0x00000036,
+ ADDR_FMT_BC7 = 0x00000037,
+ ADDR_FMT_32_AS_32_32_32_32 = 0x00000038,
+ ADDR_FMT_APC3 = 0x00000039,
+ ADDR_FMT_APC4 = 0x0000003a,
+ ADDR_FMT_APC5 = 0x0000003b,
+ ADDR_FMT_APC6 = 0x0000003c,
+ ADDR_FMT_APC7 = 0x0000003d,
+ ADDR_FMT_CTX1 = 0x0000003e,
+ ADDR_FMT_RESERVED_63 = 0x0000003f,
+} AddrFormat;
+
+/**
+***************************************************************************************************
+* AddrDepthFormat
+*
+* @brief
+* Neutral enum for addrFlt32ToDepthPixel
+*
+***************************************************************************************************
+*/
+typedef enum _AddrDepthFormat
+{
+ ADDR_DEPTH_INVALID = 0x00000000,
+ ADDR_DEPTH_16 = 0x00000001,
+ ADDR_DEPTH_X8_24 = 0x00000002,
+ ADDR_DEPTH_8_24 = 0x00000003,
+ ADDR_DEPTH_X8_24_FLOAT = 0x00000004,
+ ADDR_DEPTH_8_24_FLOAT = 0x00000005,
+ ADDR_DEPTH_32_FLOAT = 0x00000006,
+ ADDR_DEPTH_X24_8_32_FLOAT = 0x00000007,
+
+} AddrDepthFormat;
+
+/**
+***************************************************************************************************
+* AddrColorFormat
+*
+* @brief
+* Neutral enum for ColorFormat
+*
+***************************************************************************************************
+*/
+typedef enum _AddrColorFormat
+{
+ ADDR_COLOR_INVALID = 0x00000000,
+ ADDR_COLOR_8 = 0x00000001,
+ ADDR_COLOR_4_4 = 0x00000002,
+ ADDR_COLOR_3_3_2 = 0x00000003,
+ ADDR_COLOR_RESERVED_4 = 0x00000004,
+ ADDR_COLOR_16 = 0x00000005,
+ ADDR_COLOR_16_FLOAT = 0x00000006,
+ ADDR_COLOR_8_8 = 0x00000007,
+ ADDR_COLOR_5_6_5 = 0x00000008,
+ ADDR_COLOR_6_5_5 = 0x00000009,
+ ADDR_COLOR_1_5_5_5 = 0x0000000a,
+ ADDR_COLOR_4_4_4_4 = 0x0000000b,
+ ADDR_COLOR_5_5_5_1 = 0x0000000c,
+ ADDR_COLOR_32 = 0x0000000d,
+ ADDR_COLOR_32_FLOAT = 0x0000000e,
+ ADDR_COLOR_16_16 = 0x0000000f,
+ ADDR_COLOR_16_16_FLOAT = 0x00000010,
+ ADDR_COLOR_8_24 = 0x00000011,
+ ADDR_COLOR_8_24_FLOAT = 0x00000012,
+ ADDR_COLOR_24_8 = 0x00000013,
+ ADDR_COLOR_24_8_FLOAT = 0x00000014,
+ ADDR_COLOR_10_11_11 = 0x00000015,
+ ADDR_COLOR_10_11_11_FLOAT = 0x00000016,
+ ADDR_COLOR_11_11_10 = 0x00000017,
+ ADDR_COLOR_11_11_10_FLOAT = 0x00000018,
+ ADDR_COLOR_2_10_10_10 = 0x00000019,
+ ADDR_COLOR_8_8_8_8 = 0x0000001a,
+ ADDR_COLOR_10_10_10_2 = 0x0000001b,
+ ADDR_COLOR_X24_8_32_FLOAT = 0x0000001c,
+ ADDR_COLOR_32_32 = 0x0000001d,
+ ADDR_COLOR_32_32_FLOAT = 0x0000001e,
+ ADDR_COLOR_16_16_16_16 = 0x0000001f,
+ ADDR_COLOR_16_16_16_16_FLOAT = 0x00000020,
+ ADDR_COLOR_RESERVED_33 = 0x00000021,
+ ADDR_COLOR_32_32_32_32 = 0x00000022,
+ ADDR_COLOR_32_32_32_32_FLOAT = 0x00000023,
+} AddrColorFormat;
+
+/**
+***************************************************************************************************
+* AddrSurfaceNumber
+*
+* @brief
+* Neutral enum for SurfaceNumber
+*
+***************************************************************************************************
+*/
+typedef enum _AddrSurfaceNumber {
+ ADDR_NUMBER_UNORM = 0x00000000,
+ ADDR_NUMBER_SNORM = 0x00000001,
+ ADDR_NUMBER_USCALED = 0x00000002,
+ ADDR_NUMBER_SSCALED = 0x00000003,
+ ADDR_NUMBER_UINT = 0x00000004,
+ ADDR_NUMBER_SINT = 0x00000005,
+ ADDR_NUMBER_SRGB = 0x00000006,
+ ADDR_NUMBER_FLOAT = 0x00000007,
+} AddrSurfaceNumber;
+
+/**
+***************************************************************************************************
+* AddrSurfaceSwap
+*
+* @brief
+* Neutral enum for SurfaceSwap
+*
+***************************************************************************************************
+*/
+typedef enum _AddrSurfaceSwap {
+ ADDR_SWAP_STD = 0x00000000,
+ ADDR_SWAP_ALT = 0x00000001,
+ ADDR_SWAP_STD_REV = 0x00000002,
+ ADDR_SWAP_ALT_REV = 0x00000003,
+} AddrSurfaceSwap;
+
+/**
+***************************************************************************************************
+* AddrHtileBlockSize
+*
+* @brief
+* Size of HTILE blocks, valid values are 4 or 8 for now
+***************************************************************************************************
+*/
+typedef enum _AddrHtileBlockSize
+{
+ ADDR_HTILE_BLOCKSIZE_4 = 4,
+ ADDR_HTILE_BLOCKSIZE_8 = 8,
+} AddrHtileBlockSize;
+
+
+/**
+***************************************************************************************************
+* AddrPipeCfg
+*
+* @brief
+* The pipe configuration field specifies both the number of pipes and
+* how pipes are interleaved on the surface.
+* The expression of number of pipes, the shader engine tile size, and packer tile size
+* is encoded in a PIPE_CONFIG register field.
+* In general the number of pipes usually matches the number of memory channels of the
+* hardware configuration.
+* For hw configurations w/ non-pow2 memory number of memory channels, it usually matches
+* the number of ROP units(? TODO: which registers??)
+* The enum value = hw enum + 1 which is to reserve 0 for requesting default.
+***************************************************************************************************
+*/
+typedef enum _AddrPipeCfg
+{
+ ADDR_PIPECFG_INVALID = 0,
+ ADDR_PIPECFG_P2 = 1, /// 2 pipes,
+ ADDR_PIPECFG_P4_8x16 = 5, /// 4 pipes,
+ ADDR_PIPECFG_P4_16x16 = 6,
+ ADDR_PIPECFG_P4_16x32 = 7,
+ ADDR_PIPECFG_P4_32x32 = 8,
+ ADDR_PIPECFG_P8_16x16_8x16 = 9, /// 8 pipes
+ ADDR_PIPECFG_P8_16x32_8x16 = 10,
+ ADDR_PIPECFG_P8_32x32_8x16 = 11,
+ ADDR_PIPECFG_P8_16x32_16x16 = 12,
+ ADDR_PIPECFG_P8_32x32_16x16 = 13,
+ ADDR_PIPECFG_P8_32x32_16x32 = 14,
+ ADDR_PIPECFG_P8_32x64_32x32 = 15,
+ ADDR_PIPECFG_P16_32x32_8x16 = 17, /// 16 pipes
+ ADDR_PIPECFG_P16_32x32_16x16 = 18,
+ ADDR_PIPECFG_MAX = 19,
+} AddrPipeCfg;
+
+/**
+***************************************************************************************************
+* AddrTileType
+*
+* @brief
+* Neutral enums that specifies micro tile type (MICRO_TILE_MODE)
+***************************************************************************************************
+*/
+typedef enum _AddrTileType
+{
+ ADDR_DISPLAYABLE = 0, ///< Displayable tiling
+ ADDR_NON_DISPLAYABLE = 1, ///< Non-displayable tiling, a.k.a thin micro tiling
+ ADDR_DEPTH_SAMPLE_ORDER = 2, ///< Same as non-displayable plus depth-sample-order
+ ADDR_ROTATED = 3, ///< Rotated displayable tiling
+ ADDR_THICK = 4, ///< Thick micro-tiling, only valid for THICK and XTHICK
+} AddrTileType;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Type definitions: short system-independent names for address library types
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if !defined(__APPLE__)
+
+#ifndef BOOL_32 // no bool type in C
+/// @brief Boolean type, since none is defined in C
+/// @ingroup type
+#define BOOL_32 int
+#endif
+
+#ifndef INT_32
+#define INT_32 int
+#endif
+
+#ifndef UINT_32
+#define UINT_32 unsigned int
+#endif
+
+#ifndef INT_16
+#define INT_16 short
+#endif
+
+#ifndef UINT_16
+#define UINT_16 unsigned short
+#endif
+
+#ifndef INT_8
+#define INT_8 char
+#endif
+
+#ifndef UINT_8
+#define UINT_8 unsigned char
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+//
+// 64-bit integer types depend on the compiler
+//
+#if defined( __GNUC__ ) || defined( __WATCOMC__ )
+#define INT_64 long long
+#define UINT_64 unsigned long long
+
+#elif defined( _WIN32 )
+#define INT_64 __int64
+#define UINT_64 unsigned __int64
+
+#else
+#error Unsupported compiler and/or operating system for 64-bit integers
+
+/// @brief 64-bit signed integer type (compiler dependent)
+/// @ingroup type
+///
+/// The addrlib defines a 64-bit signed integer type for either
+/// Gnu/Watcom compilers (which use the first syntax) or for
+/// the Windows VCC compiler (which uses the second syntax).
+#define INT_64 long long OR __int64
+
+/// @brief 64-bit unsigned integer type (compiler dependent)
+/// @ingroup type
+///
+/// The addrlib defines a 64-bit unsigned integer type for either
+/// Gnu/Watcom compilers (which use the first syntax) or for
+/// the Windows VCC compiler (which uses the second syntax).
+///
+#define UINT_64 unsigned long long OR unsigned __int64
+#endif
+
+#endif // #if !defined(__APPLE__)
+
+// ADDR64X is used to print addresses in hex form on both Windows and Linux
+//
+#if defined( __GNUC__ ) || defined( __WATCOMC__ )
+#define ADDR64X "llx"
+#define ADDR64D "lld"
+
+#elif defined( _WIN32 )
+#define ADDR64X "I64x"
+#define ADDR64D "I64d"
+
+#else
+#error Unsupported compiler and/or operating system for 64-bit integers
+
+/// @brief Addrlib device address 64-bit printf tag (compiler dependent)
+/// @ingroup type
+///
+/// This allows printf to display an ADDR_64 for either the Windows VCC compiler
+/// (which used this value) or the Gnu/Watcom compilers (which use "llx".
+/// An example of use is printf("addr 0x%"ADDR64X"\n", address);
+///
+#define ADDR64X "llx" OR "I64x"
+#define ADDR64D "lld" OR "I64d"
+#endif
+
+
+/// @brief Union for storing a 32-bit float or 32-bit integer
+/// @ingroup type
+///
+/// This union provides a simple way to convert between a 32-bit float
+/// and a 32-bit integer. It also prevents the compiler from producing
+/// code that alters NaN values when assiging or coying floats.
+/// Therefore, all address library routines that pass or return 32-bit
+/// floating point data do so by passing or returning a FLT_32.
+///
+typedef union {
+ INT_32 i;
+ UINT_32 u;
+ float f;
+} ADDR_FLT_32;
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Macros for controlling linking and building on multiple systems
+//
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#if defined(_MSC_VER)
+#if defined(va_copy)
+#undef va_copy //redefine va_copy to support VC2013
+#endif
+#endif
+
+#if !defined(va_copy)
+#define va_copy(dst, src) \
+ ((void) memcpy(&(dst), &(src), sizeof(va_list)))
+#endif
+
+#endif // __ADDR_TYPES_H__
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/core/addrcommon.h b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrcommon.h
new file mode 100644
index 00000000000..f996c9a3402
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrcommon.h
@@ -0,0 +1,558 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrcommon.h
+* @brief Contains the helper function and constants
+***************************************************************************************************
+*/
+
+#ifndef __ADDR_COMMON_H__
+#define __ADDR_COMMON_H__
+
+#include "addrinterface.h"
+
+
+// ADDR_LNX_KERNEL_BUILD is for internal build
+// Moved from addrinterface.h so __KERNEL__ is not needed any more
+#if ADDR_LNX_KERNEL_BUILD // || (defined(__GNUC__) && defined(__KERNEL__))
+ #include "lnx_common_defs.h" // ported from cmmqs
+#elif !defined(__APPLE__)
+ #include
+ #include
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Common constants
+///////////////////////////////////////////////////////////////////////////////////////////////////
+static const UINT_32 MicroTileWidth = 8; ///< Micro tile width, for 1D and 2D tiling
+static const UINT_32 MicroTileHeight = 8; ///< Micro tile height, for 1D and 2D tiling
+static const UINT_32 ThickTileThickness = 4; ///< Micro tile thickness, for THICK modes
+static const UINT_32 XThickTileThickness = 8; ///< Extra thick tiling thickness
+static const UINT_32 PowerSaveTileBytes = 64; ///< Nuber of bytes per tile for power save 64
+static const UINT_32 CmaskCacheBits = 1024; ///< Number of bits for CMASK cache
+static const UINT_32 CmaskElemBits = 4; ///< Number of bits for CMASK element
+static const UINT_32 HtileCacheBits = 16384; ///< Number of bits for HTILE cache 512*32
+
+static const UINT_32 MicroTilePixels = MicroTileWidth * MicroTileHeight;
+
+static const INT_32 TileIndexInvalid = TILEINDEX_INVALID;
+static const INT_32 TileIndexLinearGeneral = TILEINDEX_LINEAR_GENERAL;
+static const INT_32 TileIndexNoMacroIndex = -3;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Common macros
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#define BITS_PER_BYTE 8
+#define BITS_TO_BYTES(x) ( ((x) + (BITS_PER_BYTE-1)) / BITS_PER_BYTE )
+#define BYTES_TO_BITS(x) ( (x) * BITS_PER_BYTE )
+
+/// Helper macros to select a single bit from an int (undefined later in section)
+#define _BIT(v,b) (((v) >> (b) ) & 1)
+
+/**
+***************************************************************************************************
+* @brief Enums to identify AddrLib type
+***************************************************************************************************
+*/
+enum AddrLibClass
+{
+ BASE_ADDRLIB = 0x0,
+ R600_ADDRLIB = 0x6,
+ R800_ADDRLIB = 0x8,
+ SI_ADDRLIB = 0xa,
+ CI_ADDRLIB = 0xb,
+};
+
+/**
+***************************************************************************************************
+* AddrChipFamily
+*
+* @brief
+* Neutral enums that specifies chip family.
+*
+***************************************************************************************************
+*/
+enum AddrChipFamily
+{
+ ADDR_CHIP_FAMILY_IVLD, ///< Invalid family
+ ADDR_CHIP_FAMILY_R6XX,
+ ADDR_CHIP_FAMILY_R7XX,
+ ADDR_CHIP_FAMILY_R8XX,
+ ADDR_CHIP_FAMILY_NI,
+ ADDR_CHIP_FAMILY_SI,
+ ADDR_CHIP_FAMILY_CI,
+ ADDR_CHIP_FAMILY_VI,
+};
+
+/**
+***************************************************************************************************
+* ADDR_CONFIG_FLAGS
+*
+* @brief
+* This structure is used to set addr configuration flags.
+***************************************************************************************************
+*/
+union ADDR_CONFIG_FLAGS
+{
+ struct
+ {
+ /// Clients do not need to set these flags except forceLinearAligned.
+ /// There flags are set up by AddrLib inside thru AddrInitGlobalParamsFromRegister
+ UINT_32 optimalBankSwap : 1; ///< New bank tiling for RV770 only
+ UINT_32 noCubeMipSlicesPad : 1; ///< Disables faces padding for cubemap mipmaps
+ UINT_32 fillSizeFields : 1; ///< If clients fill size fields in all input and
+ /// output structure
+ UINT_32 ignoreTileInfo : 1; ///< Don't use tile info structure
+ UINT_32 useTileIndex : 1; ///< Make tileIndex field in input valid
+ UINT_32 useCombinedSwizzle : 1; ///< Use combined swizzle
+ UINT_32 checkLast2DLevel : 1; ///< Check the last 2D mip sub level
+ UINT_32 useHtileSliceAlign : 1; ///< Do htile single slice alignment
+ UINT_32 degradeBaseLevel : 1; ///< Degrade to 1D modes automatically for base level
+ UINT_32 allowLargeThickTile : 1; ///< Allow 64*thickness*bytesPerPixel > rowSize
+ UINT_32 reserved : 22; ///< Reserved bits for future use
+ };
+
+ UINT_32 value;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Platform specific debug break defines
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#if DEBUG
+ #if defined(__GNUC__)
+ #define ADDR_DBG_BREAK()
+ #elif defined(__APPLE__)
+ #define ADDR_DBG_BREAK() { IOPanic("");}
+ #else
+ #define ADDR_DBG_BREAK() { __debugbreak(); }
+ #endif
+#else
+ #define ADDR_DBG_BREAK()
+#endif
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Debug assertions used in AddrLib
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#if DEBUG
+#define ADDR_ASSERT(__e) if ( !((__e) ? TRUE : FALSE)) { ADDR_DBG_BREAK(); }
+#define ADDR_ASSERT_ALWAYS() ADDR_DBG_BREAK()
+#define ADDR_UNHANDLED_CASE() ADDR_ASSERT(!"Unhandled case")
+#define ADDR_NOT_IMPLEMENTED() ADDR_ASSERT(!"Not implemented");
+#else //DEBUG
+#define ADDR_ASSERT(__e)
+#define ADDR_ASSERT_ALWAYS()
+#define ADDR_UNHANDLED_CASE()
+#define ADDR_NOT_IMPLEMENTED()
+#endif //DEBUG
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Debug print macro from legacy address library
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#if DEBUG
+
+#define ADDR_PRNT(a) AddrObject::DebugPrint a
+
+/// @brief Macro for reporting informational messages
+/// @ingroup util
+///
+/// This macro optionally prints an informational message to stdout.
+/// The first parameter is a condition -- if it is true, nothing is done.
+/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
+/// starting with a string. This is passed to printf() or an equivalent
+/// in order to format the informational message. For example,
+/// ADDR_INFO(0, ("test %d",3) ); prints out "test 3".
+///
+#define ADDR_INFO(cond, a) \
+{ if (!(cond)) { ADDR_PRNT(a); } }
+
+
+/// @brief Macro for reporting error warning messages
+/// @ingroup util
+///
+/// This macro optionally prints an error warning message to stdout,
+/// followed by the file name and line number where the macro was called.
+/// The first parameter is a condition -- if it is true, nothing is done.
+/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
+/// starting with a string. This is passed to printf() or an equivalent
+/// in order to format the informational message. For example,
+/// ADDR_WARN(0, ("test %d",3) ); prints out "test 3" followed by
+/// a second line with the file name and line number.
+///
+#define ADDR_WARN(cond, a) \
+{ if (!(cond)) \
+ { ADDR_PRNT(a); \
+ ADDR_PRNT((" WARNING in file %s, line %d\n", __FILE__, __LINE__)); \
+} }
+
+
+/// @brief Macro for reporting fatal error conditions
+/// @ingroup util
+///
+/// This macro optionally stops execution of the current routine
+/// after printing an error warning message to stdout,
+/// followed by the file name and line number where the macro was called.
+/// The first parameter is a condition -- if it is true, nothing is done.
+/// The second pararmeter MUST be a parenthesis-enclosed list of arguments,
+/// starting with a string. This is passed to printf() or an equivalent
+/// in order to format the informational message. For example,
+/// ADDR_EXIT(0, ("test %d",3) ); prints out "test 3" followed by
+/// a second line with the file name and line number, then stops execution.
+///
+#define ADDR_EXIT(cond, a) \
+{ if (!(cond)) \
+ { ADDR_PRNT(a); ADDR_DBG_BREAK();\
+} }
+
+#else // DEBUG
+
+#define ADDRDPF 1 ? (void)0 : (void)
+
+#define ADDR_PRNT(a)
+
+#define ADDR_DBG_BREAK()
+
+#define ADDR_INFO(cond, a)
+
+#define ADDR_WARN(cond, a)
+
+#define ADDR_EXIT(cond, a)
+
+#endif // DEBUG
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Misc helper functions
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrXorReduce
+*
+* @brief
+* Xor the right-side numberOfBits bits of x.
+***************************************************************************************************
+*/
+static inline UINT_32 XorReduce(
+ UINT_32 x,
+ UINT_32 numberOfBits)
+{
+ UINT_32 i;
+ UINT_32 result = x & 1;
+
+ for (i=1; i>i) & 1);
+ }
+
+ return result;
+}
+
+/**
+***************************************************************************************************
+* IsPow2
+*
+* @brief
+* Check if the size (UINT_32) is pow 2
+***************************************************************************************************
+*/
+static inline UINT_32 IsPow2(
+ UINT_32 dim) ///< [in] dimension of miplevel
+{
+ ADDR_ASSERT(dim > 0);
+ return !(dim & (dim - 1));
+}
+
+/**
+***************************************************************************************************
+* IsPow2
+*
+* @brief
+* Check if the size (UINT_64) is pow 2
+***************************************************************************************************
+*/
+static inline UINT_64 IsPow2(
+ UINT_64 dim) ///< [in] dimension of miplevel
+{
+ ADDR_ASSERT(dim > 0);
+ return !(dim & (dim - 1));
+}
+
+/**
+***************************************************************************************************
+* ByteAlign
+*
+* @brief
+* Align UINT_32 "x" to "align" alignment, "align" should be power of 2
+***************************************************************************************************
+*/
+static inline UINT_32 PowTwoAlign(
+ UINT_32 x,
+ UINT_32 align)
+{
+ //
+ // Assert that x is a power of two.
+ //
+ ADDR_ASSERT(IsPow2(align));
+ return (x + (align - 1)) & (~(align - 1));
+}
+
+/**
+***************************************************************************************************
+* ByteAlign
+*
+* @brief
+* Align UINT_64 "x" to "align" alignment, "align" should be power of 2
+***************************************************************************************************
+*/
+static inline UINT_64 PowTwoAlign(
+ UINT_64 x,
+ UINT_64 align)
+{
+ //
+ // Assert that x is a power of two.
+ //
+ ADDR_ASSERT(IsPow2(align));
+ return (x + (align - 1)) & (~(align - 1));
+}
+
+/**
+***************************************************************************************************
+* Min
+*
+* @brief
+* Get the min value between two unsigned values
+***************************************************************************************************
+*/
+static inline UINT_32 Min(
+ UINT_32 value1,
+ UINT_32 value2)
+{
+ return ((value1 < (value2)) ? (value1) : value2);
+}
+
+/**
+***************************************************************************************************
+* Min
+*
+* @brief
+* Get the min value between two signed values
+***************************************************************************************************
+*/
+static inline INT_32 Min(
+ INT_32 value1,
+ INT_32 value2)
+{
+ return ((value1 < (value2)) ? (value1) : value2);
+}
+
+/**
+***************************************************************************************************
+* Max
+*
+* @brief
+* Get the max value between two unsigned values
+***************************************************************************************************
+*/
+static inline UINT_32 Max(
+ UINT_32 value1,
+ UINT_32 value2)
+{
+ return ((value1 > (value2)) ? (value1) : value2);
+}
+
+/**
+***************************************************************************************************
+* Max
+*
+* @brief
+* Get the max value between two signed values
+***************************************************************************************************
+*/
+static inline INT_32 Max(
+ INT_32 value1,
+ INT_32 value2)
+{
+ return ((value1 > (value2)) ? (value1) : value2);
+}
+
+/**
+***************************************************************************************************
+* NextPow2
+*
+* @brief
+* Compute the mipmap's next level dim size
+***************************************************************************************************
+*/
+static inline UINT_32 NextPow2(
+ UINT_32 dim) ///< [in] dimension of miplevel
+{
+ UINT_32 newDim;
+
+ newDim = 1;
+
+ if (dim > 0x7fffffff)
+ {
+ ADDR_ASSERT_ALWAYS();
+ newDim = 0x80000000;
+ }
+ else
+ {
+ while (newDim < dim)
+ {
+ newDim <<= 1;
+ }
+ }
+
+ return newDim;
+}
+
+/**
+***************************************************************************************************
+* Log2
+*
+* @brief
+* Compute log of base 2
+***************************************************************************************************
+*/
+static inline UINT_32 Log2(
+ UINT_32 x) ///< [in] the value should calculate log based 2
+{
+ UINT_32 y;
+
+ //
+ // Assert that x is a power of two.
+ //
+ ADDR_ASSERT(IsPow2(x));
+
+ y = 0;
+ while (x > 1)
+ {
+ x >>= 1;
+ y++;
+ }
+
+ return y;
+}
+
+/**
+***************************************************************************************************
+* QLog2
+*
+* @brief
+* Compute log of base 2 quickly (<= 16)
+***************************************************************************************************
+*/
+static inline UINT_32 QLog2(
+ UINT_32 x) ///< [in] the value should calculate log based 2
+{
+ ADDR_ASSERT(x <= 16);
+
+ UINT_32 y = 0;
+
+ switch (x)
+ {
+ case 1:
+ y = 0;
+ break;
+ case 2:
+ y = 1;
+ break;
+ case 4:
+ y = 2;
+ break;
+ case 8:
+ y = 3;
+ break;
+ case 16:
+ y = 4;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ }
+
+ return y;
+}
+
+/**
+***************************************************************************************************
+* SafeAssign
+*
+* @brief
+* NULL pointer safe assignment
+***************************************************************************************************
+*/
+static inline VOID SafeAssign(
+ UINT_32* pLVal, ///< [in] Pointer to left val
+ UINT_32 rVal) ///< [in] Right value
+{
+ if (pLVal)
+ {
+ *pLVal = rVal;
+ }
+}
+
+/**
+***************************************************************************************************
+* SafeAssign
+*
+* @brief
+* NULL pointer safe assignment for 64bit values
+***************************************************************************************************
+*/
+static inline VOID SafeAssign(
+ UINT_64* pLVal, ///< [in] Pointer to left val
+ UINT_64 rVal) ///< [in] Right value
+{
+ if (pLVal)
+ {
+ *pLVal = rVal;
+ }
+}
+
+/**
+***************************************************************************************************
+* SafeAssign
+*
+* @brief
+* NULL pointer safe assignment for AddrTileMode
+***************************************************************************************************
+*/
+static inline VOID SafeAssign(
+ AddrTileMode* pLVal, ///< [in] Pointer to left val
+ AddrTileMode rVal) ///< [in] Right value
+{
+ if (pLVal)
+ {
+ *pLVal = rVal;
+ }
+}
+
+#endif // __ADDR_COMMON_H__
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.cpp b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.cpp
new file mode 100644
index 00000000000..76b1badf958
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.cpp
@@ -0,0 +1,1674 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrelemlib.cpp
+* @brief Contains the class implementation for element/pixel related functions
+***************************************************************************************************
+*/
+
+#include "addrelemlib.h"
+#include "addrlib.h"
+
+
+/**
+***************************************************************************************************
+* AddrElemLib::AddrElemLib
+*
+* @brief
+* constructor
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+AddrElemLib::AddrElemLib(
+ AddrLib* const pAddrLib) : ///< [in] Parent addrlib instance pointer
+ AddrObject(pAddrLib->GetClient()),
+ m_pAddrLib(pAddrLib)
+{
+ switch (m_pAddrLib->GetAddrChipFamily())
+ {
+ case ADDR_CHIP_FAMILY_R6XX:
+ m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
+ m_fp16ExportNorm = 0;
+ break;
+ case ADDR_CHIP_FAMILY_R7XX:
+ m_depthPlanarType = ADDR_DEPTH_PLANAR_R600;
+ m_fp16ExportNorm = 1;
+ break;
+ case ADDR_CHIP_FAMILY_R8XX:
+ case ADDR_CHIP_FAMILY_NI: // Same as 8xx
+ m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
+ m_fp16ExportNorm = 1;
+ break;
+ default:
+ m_fp16ExportNorm = 1;
+ m_depthPlanarType = ADDR_DEPTH_PLANAR_R800;
+ }
+
+ m_configFlags.value = 0;
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::~AddrElemLib
+*
+* @brief
+* destructor
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+AddrElemLib::~AddrElemLib()
+{
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::Create
+*
+* @brief
+* Creates and initializes AddrLib object.
+*
+* @return
+* Returns point to ADDR_CREATEINFO if successful.
+***************************************************************************************************
+*/
+AddrElemLib* AddrElemLib::Create(
+ const AddrLib* const pAddrLib) ///< [in] Pointer of parent AddrLib instance
+{
+ AddrElemLib* pElemLib = NULL;
+
+ if (pAddrLib)
+ {
+ pElemLib = new(pAddrLib->GetClient()) AddrElemLib(const_cast(pAddrLib));
+ }
+
+ return pElemLib;
+}
+
+/**************************************************************************************************
+* AddrElemLib::Flt32sToInt32s
+*
+* @brief
+* Convert a ADDR_FLT_32 value to Int32 value
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::Flt32sToInt32s(
+ ADDR_FLT_32 value, ///< [in] ADDR_FLT_32 value
+ UINT_32 bits, ///< [in] nubmer of bits in value
+ AddrNumberType numberType, ///< [in] the type of number
+ UINT_32* pResult) ///< [out] Int32 value
+{
+ UINT_8 round = 128; //ADDR_ROUND_BY_HALF
+ UINT_32 uscale;
+ UINT_32 sign;
+
+ //convert each component to an INT_32
+ switch ( numberType )
+ {
+ case ADDR_NO_NUMBER: //fall through
+ case ADDR_ZERO: //fall through
+ case ADDR_ONE: //fall through
+ case ADDR_EPSILON: //fall through
+ return; // these are zero-bit components, so don't set result
+
+ case ADDR_UINT_BITS: // unsigned integer bit field, clamped to range
+ uscale = (1< uscale))
+ {
+ *pResult = uscale;
+ }
+ else
+ {
+ *pResult = value.i;
+ }
+ return;
+ }
+
+ // The algorithm used in the DB and TX differs at one value for 24-bit unorms
+ case ADDR_UNORM_R6XXDB: // unsigned repeating fraction
+ if ((bits==24) && (value.i == 0x33000000))
+ {
+ *pResult = 1;
+ return;
+ } // Else treat like ADDR_UNORM_R6XX
+
+ case ADDR_UNORM_R6XX: // unsigned repeating fraction
+ if (value.f <= 0)
+ {
+ *pResult = 0; // first clamp to [0..1]
+ }
+ else
+ {
+ if (value.f >= 1)
+ {
+ *pResult = (1<(f + (round/256.0f));
+ }
+ #endif
+ else
+ {
+ ADDR_FLT_32 scaled;
+ ADDR_FLT_32 shifted;
+ UINT_64 truncated, rounded;
+ UINT_32 altShift;
+ UINT_32 mask = (1 << bits) - 1;
+ UINT_32 half = 1 << (bits - 1);
+ UINT_32 mant24 = (value.i & 0x7FFFFF) + 0x800000;
+ UINT_64 temp = mant24 - (mant24>>bits) -
+ static_cast((mant24 & mask) > half);
+ UINT_32 exp8 = value.i >> 23;
+ UINT_32 shift = 126 - exp8 + 24 - bits;
+ UINT_64 final;
+
+ if (shift >= 32) // This is zero, even with maximum dither add
+ {
+ final = 0;
+ }
+ else
+ {
+ final = ((temp<<8) + (static_cast(round)<> (shift+8);
+ }
+ //ADDR_EXIT( *pResult == final,
+ // ("Float %x converted to %d-bit Unorm %x != bitwise %x",
+ // value.u, bits, (UINT_32)*pResult, (UINT_32)final) );
+ if (final > mask)
+ {
+ final = mask;
+ }
+
+ scaled.f = value.f * ((1<>23)&0xFF);
+ truncated = (altShift > 60) ? 0 : truncated >> altShift;
+ rounded = static_cast((round + truncated) >> 8);
+ //if (rounded > ((1<(rounded); //(INT_32)final;
+ }
+ }
+ }
+
+ return;
+
+ case ADDR_S8FLOAT32: // 32-bit IEEE float, passes through NaN values
+ *pResult = value.i;
+ return;
+
+ // @@ FIX ROUNDING in this code, fix the denorm case
+ case ADDR_U4FLOATC: // Unsigned float, 4-bit exponent. bias 15, clamped [0..1]
+ sign = (value.i >> 31) & 1;
+ if ((value.i&0x7F800000) == 0x7F800000) // If NaN or INF:
+ {
+ if ((value.i&0x007FFFFF) != 0) // then if NaN
+ {
+ *pResult = 0; // return 0
+ }
+ else
+ {
+ *pResult = (sign)?0:0xF00000; // else +INF->+1, -INF->0
+ }
+ return;
+ }
+ if (value.f <= 0)
+ {
+ *pResult = 0;
+ }
+ else
+ {
+ if (value.f>=1)
+ {
+ *pResult = 0xF << (bits-4);
+ }
+ else
+ {
+ if ((value.i>>23) > 112 )
+ {
+ // 24-bit float: normalized
+ // value.i += 1 << (22-bits+4);
+ // round the IEEE mantissa to mantissa size
+ // @@ NOTE: add code to support rounding
+ value.u &= 0x7FFFFFF; // mask off high 4 exponent bits
+ *pResult = value.i >> (23-bits+4);// shift off unused mantissa bits
+ }
+ else
+ {
+ // 24-bit float: denormalized
+ value.f = value.f / (1<<28) / (1<<28);
+ value.f = value.f / (1<<28) / (1<<28); // convert to IEEE denorm
+ // value.i += 1 << (22-bits+4);
+ // round the IEEE mantissa to mantissa size
+ // @@ NOTE: add code to support rounding
+ *pResult = value.i >> (23-bits+4); // shift off unused mantissa bits
+ }
+ }
+ }
+
+ return;
+
+ default: // invalid number mode
+ //ADDR_EXIT(0, ("Invalid AddrNumber %d", numberType) );
+ break;
+
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::Int32sToPixel
+*
+* @brief
+* Pack 32-bit integer values into an uncompressed pixel,
+* in the proper order
+*
+* @return
+* N/A
+*
+* @note
+* This entry point packes four 32-bit integer values into
+* an uncompressed pixel. The pixel values are specifies in
+* standard order, e.g. depth/stencil. This routine asserts
+* if called on compressed pixel.
+***************************************************************************************************
+*/
+VOID AddrElemLib::Int32sToPixel(
+ UINT_32 numComps, ///< [in] number of components
+ UINT_32* pComps, ///< [in] compnents
+ UINT_32* pCompBits, ///< [in] total bits in each component
+ UINT_32* pCompStart, ///< [in] the first bit position of each component
+ ADDR_COMPONENT_FLAGS properties, ///< [in] properties about byteAligned, exportNorm
+ UINT_32 resultBits, ///< [in] result bits: total bpp after decompression
+ UINT_8* pPixel) ///< [out] a depth/stencil pixel value
+{
+ UINT_32 i;
+ UINT_32 j;
+ UINT_32 start;
+ UINT_32 size;
+ UINT_32 byte;
+ UINT_32 value = 0;
+ UINT_32 compMask;
+ UINT_32 elemMask=0;
+ UINT_32 elementXor = 0; // address xor when reading bytes from elements
+
+
+ // @@ NOTE: assert if called on a compressed format!
+
+ if (properties.byteAligned) // Components are all byte-sized
+ {
+ for (i = 0; i < numComps; i++) // Then for each component
+ {
+ // Copy the bytes of the component into the element
+ start = pCompStart[i] / 8;
+ size = pCompBits[i] / 8;
+ for (j = 0; j < size; j++)
+ {
+ pPixel[(j+start)^elementXor] = static_cast(pComps[i] >> (8*j));
+ }
+ }
+ }
+ else // Element is 32-bits or less, components are bit fields
+ {
+ // First, extract each component in turn and combine it into a 32-bit value
+ for (i = 0; i < numComps; i++)
+ {
+ compMask = (1 << pCompBits[i]) - 1;
+ elemMask |= compMask << pCompStart[i];
+ value |= (pComps[i] & compMask) << pCompStart[i];
+ }
+
+ // Mext, copy the masked value into the element
+ size = (resultBits + 7) / 8;
+ for (i = 0; i < size; i++)
+ {
+ byte = pPixel[i^elementXor] & ~(elemMask >> (8*i));
+ pPixel[i^elementXor] = static_cast(byte | ((elemMask & value) >> (8*i)));
+ }
+ }
+}
+
+/**
+***************************************************************************************************
+* Flt32ToDepthPixel
+*
+* @brief
+* Convert a FLT_32 value to a depth/stencil pixel value
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::Flt32ToDepthPixel(
+ AddrDepthFormat format, ///< [in] Depth format
+ const ADDR_FLT_32 comps[2], ///< [in] two components of depth
+ UINT_8* pPixel ///< [out] depth pixel value
+ ) const
+{
+ UINT_32 i;
+ UINT_32 values[2];
+ ADDR_COMPONENT_FLAGS properties; // byteAligned, exportNorm
+ UINT_32 resultBits = 0; // result bits: total bits per pixel after decompression
+
+ ADDR_PIXEL_FORMATINFO fmt;
+
+ // get type for each component
+ PixGetDepthCompInfo(format, &fmt);
+
+ //initialize properties
+ properties.byteAligned = TRUE;
+ properties.exportNorm = TRUE;
+ properties.floatComp = FALSE;
+
+ //set properties and result bits
+ for (i = 0; i < 2; i++)
+ {
+ if ((fmt.compBit[i] & 7) || (fmt.compStart[i] & 7))
+ {
+ properties.byteAligned = FALSE;
+ }
+
+ if (resultBits < fmt.compStart[i] + fmt.compBit[i])
+ {
+ resultBits = fmt.compStart[i] + fmt.compBit[i];
+ }
+
+ // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
+ if (fmt.compBit[i] > 11 || fmt.numType[i] >= ADDR_USCALED)
+ {
+ properties.exportNorm = FALSE;
+ }
+
+ // Mark if there are any floating point components
+ if ((fmt.numType[i] == ADDR_U4FLOATC) || (fmt.numType[i] >= ADDR_S8FLOAT) )
+ {
+ properties.floatComp = TRUE;
+ }
+ }
+
+ // Convert the two input floats to integer values
+ for (i = 0; i < 2; i++)
+ {
+ Flt32sToInt32s(comps[i], fmt.compBit[i], fmt.numType[i], &values[i]);
+ }
+
+ // Then pack the two integer components, in the proper order
+ Int32sToPixel(2, values, fmt.compBit, fmt.compStart, properties, resultBits, pPixel );
+
+}
+
+/**
+***************************************************************************************************
+* Flt32ToColorPixel
+*
+* @brief
+* Convert a FLT_32 value to a red/green/blue/alpha pixel value
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::Flt32ToColorPixel(
+ AddrColorFormat format, ///< [in] Color format
+ AddrSurfaceNumber surfNum, ///< [in] Surface number
+ AddrSurfaceSwap surfSwap, ///< [in] Surface swap
+ const ADDR_FLT_32 comps[4], ///< [in] four components of color
+ UINT_8* pPixel ///< [out] a red/green/blue/alpha pixel value
+ ) const
+{
+ ADDR_PIXEL_FORMATINFO pixelInfo;
+
+ UINT_32 i;
+ UINT_32 values[4];
+ ADDR_COMPONENT_FLAGS properties; // byteAligned, exportNorm
+ UINT_32 resultBits = 0; // result bits: total bits per pixel after decompression
+
+ memset(&pixelInfo, 0, sizeof(ADDR_PIXEL_FORMATINFO));
+
+ PixGetColorCompInfo(format, surfNum, surfSwap, &pixelInfo);
+
+ //initialize properties
+ properties.byteAligned = TRUE;
+ properties.exportNorm = TRUE;
+ properties.floatComp = FALSE;
+
+ //set properties and result bits
+ for (i = 0; i < 4; i++)
+ {
+ if ( (pixelInfo.compBit[i] & 7) || (pixelInfo.compStart[i] & 7) )
+ {
+ properties.byteAligned = FALSE;
+ }
+
+ if (resultBits < pixelInfo.compStart[i] + pixelInfo.compBit[i])
+ {
+ resultBits = pixelInfo.compStart[i] + pixelInfo.compBit[i];
+ }
+
+ if (m_fp16ExportNorm)
+ {
+ // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
+ // or if it's not FP and <=16 bits
+ if (((pixelInfo.compBit[i] > 11) || (pixelInfo.numType[i] >= ADDR_USCALED))
+ && (pixelInfo.numType[i] !=ADDR_U4FLOATC))
+ {
+ properties.exportNorm = FALSE;
+ }
+ }
+ else
+ {
+ // Clear ADDR_EXPORT_NORM if can't be represented as 11-bit or smaller [-1..+1] format
+ if (pixelInfo.compBit[i] > 11 || pixelInfo.numType[i] >= ADDR_USCALED)
+ {
+ properties.exportNorm = FALSE;
+ }
+ }
+
+ // Mark if there are any floating point components
+ if ( (pixelInfo.numType[i] == ADDR_U4FLOATC) ||
+ (pixelInfo.numType[i] >= ADDR_S8FLOAT) )
+ {
+ properties.floatComp = TRUE;
+ }
+ }
+
+ // Convert the four input floats to integer values
+ for (i = 0; i < 4; i++)
+ {
+ Flt32sToInt32s(comps[i], pixelInfo.compBit[i], pixelInfo.numType[i], &values[i]);
+ }
+
+ // Then pack the four integer components, in the proper order
+ Int32sToPixel(4, values, &pixelInfo.compBit[0], &pixelInfo.compStart[0],
+ properties, resultBits, pPixel);
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::GetCompType
+*
+* @brief
+* Fill per component info
+*
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::GetCompType(
+ AddrColorFormat format, ///< [in] surface format
+ AddrSurfaceNumber numType, ///< [in] number type
+ ADDR_PIXEL_FORMATINFO* pInfo) ///< [in][out] per component info out
+{
+ BOOL_32 handled = FALSE;
+
+ // Floating point formats override the number format
+ switch (format)
+ {
+ case ADDR_COLOR_16_FLOAT: // fall through for all pure floating point format
+ case ADDR_COLOR_16_16_FLOAT:
+ case ADDR_COLOR_16_16_16_16_FLOAT:
+ case ADDR_COLOR_32_FLOAT:
+ case ADDR_COLOR_32_32_FLOAT:
+ case ADDR_COLOR_32_32_32_32_FLOAT:
+ case ADDR_COLOR_10_11_11_FLOAT:
+ case ADDR_COLOR_11_11_10_FLOAT:
+ numType = ADDR_NUMBER_FLOAT;
+ break;
+ // Special handling for the depth formats
+ case ADDR_COLOR_8_24: // fall through for these 2 similar format
+ case ADDR_COLOR_24_8:
+ for (UINT_32 c = 0; c < 4; c++)
+ {
+ if (pInfo->compBit[c] == 8)
+ {
+ pInfo->numType[c] = ADDR_UINT_BITS;
+ }
+ else if (pInfo->compBit[c] == 24)
+ {
+ pInfo->numType[c] = ADDR_UNORM_R6XX;
+ }
+ else
+ {
+ pInfo->numType[c] = ADDR_NO_NUMBER;
+ }
+ }
+ handled = TRUE;
+ break;
+ case ADDR_COLOR_8_24_FLOAT: // fall through for these 3 similar format
+ case ADDR_COLOR_24_8_FLOAT:
+ case ADDR_COLOR_X24_8_32_FLOAT:
+ for (UINT_32 c = 0; c < 4; c++)
+ {
+ if (pInfo->compBit[c] == 8)
+ {
+ pInfo->numType[c] = ADDR_UINT_BITS;
+ }
+ else if (pInfo->compBit[c] == 24)
+ {
+ pInfo->numType[c] = ADDR_U4FLOATC;
+ }
+ else if (pInfo->compBit[c] == 32)
+ {
+ pInfo->numType[c] = ADDR_S8FLOAT32;
+ }
+ else
+ {
+ pInfo->numType[c] = ADDR_NO_NUMBER;
+ }
+ }
+ handled = TRUE;
+ break;
+ default:
+ break;
+ }
+
+ if (!handled)
+ {
+ for (UINT_32 c = 0; c < 4; c++)
+ {
+ // Assign a number type for each component
+ AddrSurfaceNumber cnum;
+
+ // First handle default component values
+ if (pInfo->compBit[c] == 0)
+ {
+ if (c < 3)
+ {
+ pInfo->numType[c] = ADDR_ZERO; // Default is zero for RGB
+ }
+ else if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
+ {
+ pInfo->numType[c] = ADDR_EPSILON; // Alpha INT_32 bits default is 0x01
+ }
+ else
+ {
+ pInfo->numType[c] = ADDR_ONE; // Alpha normal default is float 1.0
+ }
+ continue;
+ }
+ // Now handle small components
+ else if (pInfo->compBit[c] == 1)
+ {
+ if (numType == ADDR_NUMBER_UINT || numType == ADDR_NUMBER_SINT)
+ {
+ cnum = ADDR_NUMBER_UINT;
+ }
+ else
+ {
+ cnum = ADDR_NUMBER_UNORM;
+ }
+ }
+ else
+ {
+ cnum = numType;
+ }
+
+ // If no default, set the number type fom num, compbits, and architecture
+ switch (cnum)
+ {
+ case ADDR_NUMBER_SRGB:
+ pInfo->numType[c] = (c < 3) ? ADDR_GAMMA8_R6XX : ADDR_UNORM_R6XX;
+ break;
+ case ADDR_NUMBER_UNORM:
+ pInfo->numType[c] = ADDR_UNORM_R6XX;
+ break;
+ case ADDR_NUMBER_SNORM:
+ pInfo->numType[c] = ADDR_SNORM_R6XX;
+ break;
+ case ADDR_NUMBER_USCALED:
+ pInfo->numType[c] = ADDR_USCALED; // @@ Do we need separate Pele routine?
+ break;
+ case ADDR_NUMBER_SSCALED:
+ pInfo->numType[c] = ADDR_SSCALED; // @@ Do we need separate Pele routine?
+ break;
+ case ADDR_NUMBER_FLOAT:
+ if (pInfo->compBit[c] == 32)
+ {
+ pInfo->numType[c] = ADDR_S8FLOAT32;
+ }
+ else if (pInfo->compBit[c] == 16)
+ {
+ pInfo->numType[c] = ADDR_S5FLOAT;
+ }
+ else if (pInfo->compBit[c] >= 10)
+ {
+ pInfo->numType[c] = ADDR_U5FLOAT;
+ }
+ else
+ {
+ ADDR_ASSERT_ALWAYS();
+ }
+ break;
+ case ADDR_NUMBER_SINT:
+ pInfo->numType[c] = ADDR_SINT_BITS;
+ break;
+ case ADDR_NUMBER_UINT:
+ pInfo->numType[c] = ADDR_UINT_BITS;
+ break;
+
+ default:
+ ADDR_ASSERT(!"Invalid number type");
+ pInfo->numType[c] = ADDR_NO_NUMBER;
+ break;
+ }
+ }
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::GetCompSwap
+*
+* @brief
+* Get components swapped for color surface
+*
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::GetCompSwap(
+ AddrSurfaceSwap swap, ///< [in] swap mode
+ ADDR_PIXEL_FORMATINFO* pInfo) ///< [in/out] output per component info
+{
+ switch (pInfo->comps)
+ {
+ case 4:
+ switch (swap)
+ {
+ case ADDR_SWAP_ALT:
+ SwapComps( 0, 2, pInfo );
+ break; // BGRA
+ case ADDR_SWAP_STD_REV:
+ SwapComps( 0, 3, pInfo );
+ SwapComps( 1, 2, pInfo );
+ break; // ABGR
+ case ADDR_SWAP_ALT_REV:
+ SwapComps( 0, 3, pInfo );
+ SwapComps( 0, 2, pInfo );
+ SwapComps( 0, 1, pInfo );
+ break; // ARGB
+ default:
+ break;
+ }
+ break;
+ case 3:
+ switch (swap)
+ {
+ case ADDR_SWAP_ALT_REV:
+ SwapComps( 0, 3, pInfo );
+ SwapComps( 0, 2, pInfo );
+ break; // AGR
+ case ADDR_SWAP_STD_REV:
+ SwapComps( 0, 2, pInfo );
+ break; // BGR
+ case ADDR_SWAP_ALT:
+ SwapComps( 2, 3, pInfo );
+ break; // RGA
+ default:
+ break; // RGB
+ }
+ break;
+ case 2:
+ switch (swap)
+ {
+ case ADDR_SWAP_ALT_REV:
+ SwapComps( 0, 1, pInfo );
+ SwapComps( 1, 3, pInfo );
+ break; // AR
+ case ADDR_SWAP_STD_REV:
+ SwapComps( 0, 1, pInfo );
+ break; // GR
+ case ADDR_SWAP_ALT:
+ SwapComps( 1, 3, pInfo );
+ break; // RA
+ default:
+ break; // RG
+ }
+ break;
+ case 1:
+ switch (swap)
+ {
+ case ADDR_SWAP_ALT_REV:
+ SwapComps( 0, 3, pInfo );
+ break; // A
+ case ADDR_SWAP_STD_REV:
+ SwapComps( 0, 2, pInfo );
+ break; // B
+ case ADDR_SWAP_ALT:
+ SwapComps( 0, 1, pInfo );
+ break; // G
+ default:
+ break; // R
+ }
+ break;
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::GetCompSwap
+*
+* @brief
+* Get components swapped for color surface
+*
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::SwapComps(
+ UINT_32 c0, ///< [in] component index 0
+ UINT_32 c1, ///< [in] component index 1
+ ADDR_PIXEL_FORMATINFO* pInfo) ///< [in/out] output per component info
+{
+ UINT_32 start;
+ UINT_32 bits;
+
+ start = pInfo->compStart[c0];
+ pInfo->compStart[c0] = pInfo->compStart[c1];
+ pInfo->compStart[c1] = start;
+
+ bits = pInfo->compBit[c0];
+ pInfo->compBit[c0] = pInfo->compBit[c1];
+ pInfo->compBit[c1] = bits;
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::PixGetColorCompInfo
+*
+* @brief
+* Get per component info for color surface
+*
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::PixGetColorCompInfo(
+ AddrColorFormat format, ///< [in] surface format, read from register
+ AddrSurfaceNumber number, ///< [in] pixel number type
+ AddrSurfaceSwap swap, ///< [in] component swap mode
+ ADDR_PIXEL_FORMATINFO* pInfo ///< [out] output per component info
+ ) const
+{
+ // 1. Get componet bits
+ switch (format)
+ {
+ case ADDR_COLOR_8:
+ GetCompBits(8, 0, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_1_5_5_5:
+ GetCompBits(5, 5, 5, 1, pInfo);
+ break;
+ case ADDR_COLOR_5_6_5:
+ GetCompBits(8, 6, 5, 0, pInfo);
+ break;
+ case ADDR_COLOR_6_5_5:
+ GetCompBits(5, 5, 6, 0, pInfo);
+ break;
+ case ADDR_COLOR_8_8:
+ GetCompBits(8, 8, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_4_4_4_4:
+ GetCompBits(4, 4, 4, 4, pInfo);
+ break;
+ case ADDR_COLOR_16:
+ GetCompBits(16, 0, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_8_8_8_8:
+ GetCompBits(8, 8, 8, 8, pInfo);
+ break;
+ case ADDR_COLOR_2_10_10_10:
+ GetCompBits(10, 10, 10, 2, pInfo);
+ break;
+ case ADDR_COLOR_10_11_11:
+ GetCompBits(11, 11, 10, 0, pInfo);
+ break;
+ case ADDR_COLOR_11_11_10:
+ GetCompBits(10, 11, 11, 0, pInfo);
+ break;
+ case ADDR_COLOR_16_16:
+ GetCompBits(16, 16, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_16_16_16_16:
+ GetCompBits(16, 16, 16, 16, pInfo);
+ break;
+ case ADDR_COLOR_16_FLOAT:
+ GetCompBits(16, 0, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_16_16_FLOAT:
+ GetCompBits(16, 16, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_32_FLOAT:
+ GetCompBits(32, 0, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_32_32_FLOAT:
+ GetCompBits(32, 32, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_16_16_16_16_FLOAT:
+ GetCompBits(16, 16, 16, 16, pInfo);
+ break;
+ case ADDR_COLOR_32_32_32_32_FLOAT:
+ GetCompBits(32, 32, 32, 32, pInfo);
+ break;
+
+ case ADDR_COLOR_32:
+ GetCompBits(32, 0, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_32_32:
+ GetCompBits(32, 32, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_32_32_32_32:
+ GetCompBits(32, 32, 32, 32, pInfo);
+ break;
+ case ADDR_COLOR_10_10_10_2:
+ GetCompBits(2, 10, 10, 10, pInfo);
+ break;
+ case ADDR_COLOR_10_11_11_FLOAT:
+ GetCompBits(11, 11, 10, 0, pInfo);
+ break;
+ case ADDR_COLOR_11_11_10_FLOAT:
+ GetCompBits(10, 11, 11, 0, pInfo);
+ break;
+ case ADDR_COLOR_5_5_5_1:
+ GetCompBits(1, 5, 5, 5, pInfo);
+ break;
+ case ADDR_COLOR_3_3_2:
+ GetCompBits(2, 3, 3, 0, pInfo);
+ break;
+ case ADDR_COLOR_4_4:
+ GetCompBits(4, 4, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_8_24:
+ case ADDR_COLOR_8_24_FLOAT: // same bit count, fall through
+ GetCompBits(24, 8, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_24_8:
+ case ADDR_COLOR_24_8_FLOAT: // same bit count, fall through
+ GetCompBits(8, 24, 0, 0, pInfo);
+ break;
+ case ADDR_COLOR_X24_8_32_FLOAT:
+ GetCompBits(32, 8, 0, 0, pInfo);
+ break;
+
+ case ADDR_COLOR_INVALID:
+ GetCompBits(0, 0, 0, 0, pInfo);
+ break;
+ default:
+ ADDR_ASSERT(0);
+ GetCompBits(0, 0, 0, 0, pInfo);
+ break;
+ }
+
+ // 2. Get component number type
+
+ GetCompType(format, number, pInfo);
+
+ // 3. Swap components if needed
+
+ GetCompSwap(swap, pInfo);
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::PixGetDepthCompInfo
+*
+* @brief
+* Get per component info for depth surface
+*
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID AddrElemLib::PixGetDepthCompInfo(
+ AddrDepthFormat format, ///< [in] surface format, read from register
+ ADDR_PIXEL_FORMATINFO* pInfo ///< [out] output per component bits and type
+ ) const
+{
+ if (m_depthPlanarType == ADDR_DEPTH_PLANAR_R800)
+ {
+ if (format == ADDR_DEPTH_8_24_FLOAT)
+ {
+ format = ADDR_DEPTH_X24_8_32_FLOAT; // Use this format to represent R800's D24FS8
+ }
+
+ if (format == ADDR_DEPTH_X8_24_FLOAT)
+ {
+ format = ADDR_DEPTH_32_FLOAT;
+ }
+ }
+
+ switch (format)
+ {
+ case ADDR_DEPTH_16:
+ GetCompBits(16, 0, 0, 0, pInfo);
+ break;
+ case ADDR_DEPTH_8_24:
+ case ADDR_DEPTH_8_24_FLOAT: // similar format, fall through
+ GetCompBits(24, 8, 0, 0, pInfo);
+ break;
+ case ADDR_DEPTH_X8_24:
+ case ADDR_DEPTH_X8_24_FLOAT: // similar format, fall through
+ GetCompBits(24, 0, 0, 0, pInfo);
+ break;
+ case ADDR_DEPTH_32_FLOAT:
+ GetCompBits(32, 0, 0, 0, pInfo);
+ break;
+ case ADDR_DEPTH_X24_8_32_FLOAT:
+ GetCompBits(32, 8, 0, 0, pInfo);
+ break;
+ case ADDR_DEPTH_INVALID:
+ GetCompBits(0, 0, 0, 0, pInfo);
+ break;
+ default:
+ ADDR_ASSERT(0);
+ GetCompBits(0, 0, 0, 0, pInfo);
+ break;
+ }
+
+ switch (format)
+ {
+ case ADDR_DEPTH_16:
+ pInfo->numType [0] = ADDR_UNORM_R6XX;
+ pInfo->numType [1] = ADDR_ZERO;
+ break;
+ case ADDR_DEPTH_8_24:
+ pInfo->numType [0] = ADDR_UNORM_R6XXDB;
+ pInfo->numType [1] = ADDR_UINT_BITS;
+ break;
+ case ADDR_DEPTH_8_24_FLOAT:
+ pInfo->numType [0] = ADDR_U4FLOATC;
+ pInfo->numType [1] = ADDR_UINT_BITS;
+ break;
+ case ADDR_DEPTH_X8_24:
+ pInfo->numType [0] = ADDR_UNORM_R6XXDB;
+ pInfo->numType [1] = ADDR_ZERO;
+ break;
+ case ADDR_DEPTH_X8_24_FLOAT:
+ pInfo->numType [0] = ADDR_U4FLOATC;
+ pInfo->numType [1] = ADDR_ZERO;
+ break;
+ case ADDR_DEPTH_32_FLOAT:
+ pInfo->numType [0] = ADDR_S8FLOAT32;
+ pInfo->numType [1] = ADDR_ZERO;
+ break;
+ case ADDR_DEPTH_X24_8_32_FLOAT:
+ pInfo->numType [0] = ADDR_S8FLOAT32;
+ pInfo->numType [1] = ADDR_UINT_BITS;
+ break;
+ default:
+ pInfo->numType [0] = ADDR_NO_NUMBER;
+ pInfo->numType [1] = ADDR_NO_NUMBER;
+ break;
+ }
+
+ pInfo->numType [2] = ADDR_NO_NUMBER;
+ pInfo->numType [3] = ADDR_NO_NUMBER;
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::PixGetExportNorm
+*
+* @brief
+* Check if fp16 export norm can be enabled.
+*
+* @return
+* TRUE if this can be enabled.
+*
+***************************************************************************************************
+*/
+BOOL_32 AddrElemLib::PixGetExportNorm(
+ AddrColorFormat colorFmt, ///< [in] surface format, read from register
+ AddrSurfaceNumber numberFmt, ///< [in] pixel number type
+ AddrSurfaceSwap swap ///< [in] components swap type
+ ) const
+{
+ BOOL_32 enabled = TRUE;
+
+ ADDR_PIXEL_FORMATINFO formatInfo;
+
+ PixGetColorCompInfo(colorFmt, numberFmt, swap, &formatInfo);
+
+ for (UINT_32 c = 0; c < 4; c++)
+ {
+ if (m_fp16ExportNorm)
+ {
+ if (((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED)) &&
+ (formatInfo.numType[c] != ADDR_U4FLOATC) &&
+ (formatInfo.numType[c] != ADDR_S5FLOAT) &&
+ (formatInfo.numType[c] != ADDR_S5FLOATM) &&
+ (formatInfo.numType[c] != ADDR_U5FLOAT) &&
+ (formatInfo.numType[c] != ADDR_U3FLOATM))
+ {
+ enabled = FALSE;
+ break;
+ }
+ }
+ else
+ {
+ if ((formatInfo.compBit[c] > 11) || (formatInfo.numType[c] > ADDR_USCALED))
+ {
+ enabled = FALSE;
+ break;
+ }
+ }
+ }
+
+ return enabled;
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::AdjustSurfaceInfo
+*
+* @brief
+* Adjust bpp/base pitch/width/height according to elemMode and expandX/Y
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::AdjustSurfaceInfo(
+ AddrElemMode elemMode, ///< [in] element mode
+ UINT_32 expandX, ///< [in] decompression expansion factor in X
+ UINT_32 expandY, ///< [in] decompression expansion factor in Y
+ UINT_32* pBpp, ///< [in/out] bpp
+ UINT_32* pBasePitch, ///< [in/out] base pitch
+ UINT_32* pWidth, ///< [in/out] width
+ UINT_32* pHeight) ///< [in/out] height
+{
+ UINT_32 packedBits;
+ UINT_32 basePitch;
+ UINT_32 width;
+ UINT_32 height;
+ UINT_32 bpp;
+ BOOL_32 bBCnFormat = FALSE;
+
+ ADDR_ASSERT(pBpp != NULL);
+ ADDR_ASSERT(pWidth != NULL && pHeight != NULL && pBasePitch != NULL);
+
+ if (pBpp)
+ {
+ bpp = *pBpp;
+
+ switch (elemMode)
+ {
+ case ADDR_EXPANDED:
+ packedBits = bpp / expandX / expandY;
+ break;
+ case ADDR_PACKED_STD: // Different bit order
+ case ADDR_PACKED_REV:
+ packedBits = bpp * expandX * expandY;
+ break;
+ case ADDR_PACKED_GBGR:
+ case ADDR_PACKED_BGRG:
+ packedBits = bpp; // 32-bit packed ==> 2 32-bit result
+ break;
+ case ADDR_PACKED_BC1: // Fall through
+ case ADDR_PACKED_BC4:
+ packedBits = 64;
+ bBCnFormat = TRUE;
+ break;
+ case ADDR_PACKED_BC2: // Fall through
+ case ADDR_PACKED_BC3: // Fall through
+ case ADDR_PACKED_BC5: // Fall through
+ bBCnFormat = TRUE;
+ packedBits = 128;
+ break;
+ case ADDR_ROUND_BY_HALF: // Fall through
+ case ADDR_ROUND_TRUNCATE: // Fall through
+ case ADDR_ROUND_DITHER: // Fall through
+ case ADDR_UNCOMPRESSED:
+ packedBits = bpp;
+ break;
+ default:
+ packedBits = bpp;
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+
+ *pBpp = packedBits;
+ }
+
+ if (pWidth && pHeight && pBasePitch)
+ {
+ basePitch = *pBasePitch;
+ width = *pWidth;
+ height = *pHeight;
+
+ if ((expandX > 1) || (expandY > 1))
+ {
+ if (elemMode == ADDR_EXPANDED)
+ {
+ basePitch *= expandX;
+ width *= expandX;
+ height *= expandY;
+ }
+ else
+ {
+ // Evergreen family workaround
+ if (bBCnFormat && (m_pAddrLib->GetAddrChipFamily() == ADDR_CHIP_FAMILY_R8XX))
+ {
+ // For BCn we now pad it to POW2 at the beginning so it is safe to
+ // divide by 4 directly
+ basePitch = basePitch / expandX;
+ width = width / expandX;
+ height = height / expandY;
+#if DEBUG
+ width = (width == 0) ? 1 : width;
+ height = (height == 0) ? 1 : height;
+
+ if ((*pWidth > PowTwoAlign(width, 8) * expandX) ||
+ (*pHeight > PowTwoAlign(height, 8) * expandY)) // 8 is 1D tiling alignment
+ {
+ // if this assertion is hit we may have issues if app samples
+ // rightmost/bottommost pixels
+ ADDR_ASSERT_ALWAYS();
+ }
+#endif
+ }
+ else // Not BCn format we still keep old way (FMT_1? No real test yet)
+ {
+ basePitch = (basePitch + expandX - 1) / expandX;
+ width = (width + expandX - 1) / expandX;
+ height = (height + expandY - 1) / expandY;
+ }
+ }
+
+ *pBasePitch = basePitch; // 0 is legal value for base pitch.
+ *pWidth = (width == 0) ? 1 : width;
+ *pHeight = (height == 0) ? 1 : height;
+ } //if (pWidth && pHeight && pBasePitch)
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::RestoreSurfaceInfo
+*
+* @brief
+* Reverse operation of AdjustSurfaceInfo
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::RestoreSurfaceInfo(
+ AddrElemMode elemMode, ///< [in] element mode
+ UINT_32 expandX, ///< [in] decompression expansion factor in X
+ UINT_32 expandY, ///< [out] decompression expansion factor in Y
+ UINT_32* pBpp, ///< [in/out] bpp
+ UINT_32* pWidth, ///< [in/out] width
+ UINT_32* pHeight) ///< [in/out] height
+{
+ UINT_32 originalBits;
+ UINT_32 width;
+ UINT_32 height;
+ UINT_32 bpp;
+
+ ADDR_ASSERT(pBpp != NULL);
+ ADDR_ASSERT(pWidth != NULL && pHeight != NULL);
+
+ if (pBpp)
+ {
+ bpp = *pBpp;
+
+ switch (elemMode)
+ {
+ case ADDR_EXPANDED:
+ originalBits = bpp * expandX * expandY;
+ break;
+ case ADDR_PACKED_STD: // Different bit order
+ case ADDR_PACKED_REV:
+ originalBits = bpp / expandX / expandY;
+ break;
+ case ADDR_PACKED_GBGR:
+ case ADDR_PACKED_BGRG:
+ originalBits = bpp; // 32-bit packed ==> 2 32-bit result
+ break;
+ case ADDR_PACKED_BC1: // Fall through
+ case ADDR_PACKED_BC4:
+ originalBits = 64;
+ break;
+ case ADDR_PACKED_BC2: // Fall through
+ case ADDR_PACKED_BC3: // Fall through
+ case ADDR_PACKED_BC5:
+ originalBits = 128;
+ break;
+ case ADDR_ROUND_BY_HALF: // Fall through
+ case ADDR_ROUND_TRUNCATE: // Fall through
+ case ADDR_ROUND_DITHER: // Fall through
+ case ADDR_UNCOMPRESSED:
+ originalBits = bpp;
+ break;
+ default:
+ originalBits = bpp;
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+
+ *pBpp = originalBits;
+ }
+
+ if (pWidth && pHeight)
+ {
+ width = *pWidth;
+ height = *pHeight;
+
+ if ((expandX > 1) || (expandY > 1))
+ {
+ if (elemMode == ADDR_EXPANDED)
+ {
+ width /= expandX;
+ height /= expandY;
+ }
+ else
+ {
+ width *= expandX;
+ height *= expandY;
+ }
+ }
+
+ *pWidth = (width == 0) ? 1 : width;
+ *pHeight = (height == 0) ? 1 : height;
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::GetBitsPerPixel
+*
+* @brief
+* Compute the total bits per element according to a format
+* code. For compressed formats, this is not the same as
+* the number of bits per decompressed element.
+*
+* @return
+* Bits per pixel
+***************************************************************************************************
+*/
+UINT_32 AddrElemLib::GetBitsPerPixel(
+ AddrFormat format, ///< [in] surface format code
+ AddrElemMode* pElemMode, ///< [out] element mode
+ UINT_32* pExpandX, ///< [out] decompression expansion factor in X
+ UINT_32* pExpandY, ///< [out] decompression expansion factor in Y
+ UINT_32* pUnusedBits) ///< [out] bits unused
+{
+ UINT_32 bpp;
+ UINT_32 expandX = 1;
+ UINT_32 expandY = 1;
+ UINT_32 bitUnused = 0;
+ AddrElemMode elemMode = ADDR_UNCOMPRESSED; // default value
+
+ switch (format)
+ {
+ case ADDR_FMT_8:
+ bpp = 8;
+ break;
+ case ADDR_FMT_1_5_5_5:
+ case ADDR_FMT_5_6_5:
+ case ADDR_FMT_6_5_5:
+ case ADDR_FMT_8_8:
+ case ADDR_FMT_4_4_4_4:
+ case ADDR_FMT_16:
+ case ADDR_FMT_16_FLOAT:
+ bpp = 16;
+ break;
+ case ADDR_FMT_GB_GR: // treat as FMT_8_8
+ elemMode = ADDR_PACKED_GBGR;
+ bpp = 16;
+ break;
+ case ADDR_FMT_BG_RG: // treat as FMT_8_8
+ elemMode = ADDR_PACKED_BGRG;
+ bpp = 16;
+ break;
+ case ADDR_FMT_8_8_8_8:
+ case ADDR_FMT_2_10_10_10:
+ case ADDR_FMT_10_11_11:
+ case ADDR_FMT_11_11_10:
+ case ADDR_FMT_16_16:
+ case ADDR_FMT_16_16_FLOAT:
+ case ADDR_FMT_32:
+ case ADDR_FMT_32_FLOAT:
+ case ADDR_FMT_24_8:
+ case ADDR_FMT_24_8_FLOAT:
+ bpp = 32;
+ break;
+ case ADDR_FMT_16_16_16_16:
+ case ADDR_FMT_16_16_16_16_FLOAT:
+ case ADDR_FMT_32_32:
+ case ADDR_FMT_32_32_FLOAT:
+ case ADDR_FMT_CTX1:
+ bpp = 64;
+ break;
+ case ADDR_FMT_32_32_32_32:
+ case ADDR_FMT_32_32_32_32_FLOAT:
+ bpp = 128;
+ break;
+ case ADDR_FMT_INVALID:
+ bpp = 0;
+ break;
+ case ADDR_FMT_1_REVERSED:
+ elemMode = ADDR_PACKED_REV;
+ expandX = 8;
+ bpp = 1;
+ break;
+ case ADDR_FMT_1:
+ elemMode = ADDR_PACKED_STD;
+ expandX = 8;
+ bpp = 1;
+ break;
+ case ADDR_FMT_4_4:
+ case ADDR_FMT_3_3_2:
+ bpp = 8;
+ break;
+ case ADDR_FMT_5_5_5_1:
+ bpp = 16;
+ break;
+ case ADDR_FMT_32_AS_8:
+ case ADDR_FMT_32_AS_8_8:
+ case ADDR_FMT_8_24:
+ case ADDR_FMT_8_24_FLOAT:
+ case ADDR_FMT_10_10_10_2:
+ case ADDR_FMT_10_11_11_FLOAT:
+ case ADDR_FMT_11_11_10_FLOAT:
+ case ADDR_FMT_5_9_9_9_SHAREDEXP:
+ bpp = 32;
+ break;
+ case ADDR_FMT_X24_8_32_FLOAT:
+ bpp = 64;
+ bitUnused = 24;
+ break;
+ case ADDR_FMT_8_8_8:
+ elemMode = ADDR_EXPANDED;
+ bpp = 24;//@@ 8; // read 3 elements per pixel
+ expandX = 3;
+ break;
+ case ADDR_FMT_16_16_16:
+ case ADDR_FMT_16_16_16_FLOAT:
+ elemMode = ADDR_EXPANDED;
+ bpp = 48;//@@ 16; // read 3 elements per pixel
+ expandX = 3;
+ break;
+ case ADDR_FMT_32_32_32_FLOAT:
+ case ADDR_FMT_32_32_32:
+ elemMode = ADDR_EXPANDED;
+ expandX = 3;
+ bpp = 96;//@@ 32; // read 3 elements per pixel
+ break;
+ case ADDR_FMT_BC1:
+ elemMode = ADDR_PACKED_BC1;
+ expandX = 4;
+ expandY = 4;
+ bpp = 64;
+ break;
+ case ADDR_FMT_BC4:
+ elemMode = ADDR_PACKED_BC4;
+ expandX = 4;
+ expandY = 4;
+ bpp = 64;
+ break;
+ case ADDR_FMT_BC2:
+ elemMode = ADDR_PACKED_BC2;
+ expandX = 4;
+ expandY = 4;
+ bpp = 128;
+ break;
+ case ADDR_FMT_BC3:
+ elemMode = ADDR_PACKED_BC3;
+ expandX = 4;
+ expandY = 4;
+ bpp = 128;
+ break;
+ case ADDR_FMT_BC5:
+ case ADDR_FMT_BC6: // reuse ADDR_PACKED_BC5
+ case ADDR_FMT_BC7: // reuse ADDR_PACKED_BC5
+ elemMode = ADDR_PACKED_BC5;
+ expandX = 4;
+ expandY = 4;
+ bpp = 128;
+ break;
+ default:
+ bpp = 0;
+ ADDR_ASSERT_ALWAYS();
+ break;
+ // @@ or should this be an error?
+ }
+
+ SafeAssign(pExpandX, expandX);
+ SafeAssign(pExpandY, expandY);
+ SafeAssign(pUnusedBits, bitUnused);
+ SafeAssign(reinterpret_cast(pElemMode), elemMode);
+
+ return bpp;
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::GetCompBits
+*
+* @brief
+* Set each component's bit size and bit start. And set element mode and number type
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::GetCompBits(
+ UINT_32 c0, ///< [in] bits of component 0
+ UINT_32 c1, ///< [in] bits of component 1
+ UINT_32 c2, ///< [in] bits of component 2
+ UINT_32 c3, ///< [in] bits of component 3
+ ADDR_PIXEL_FORMATINFO* pInfo, ///< [out] per component info out
+ AddrElemMode elemMode) ///< [in] element mode
+{
+ pInfo->comps = 0;
+
+ pInfo->compBit[0] = c0;
+ pInfo->compBit[1] = c1;
+ pInfo->compBit[2] = c2;
+ pInfo->compBit[3] = c3;
+
+ pInfo->compStart[0] = 0;
+ pInfo->compStart[1] = c0;
+ pInfo->compStart[2] = c0+c1;
+ pInfo->compStart[3] = c0+c1+c2;
+
+ pInfo->elemMode = elemMode;
+ // still needed since component swap may depend on number of components
+ for (INT i=0; i<4; i++)
+ {
+ if (pInfo->compBit[i] == 0)
+ {
+ pInfo->compStart[i] = 0; // all null components start at bit 0
+ pInfo->numType[i] = ADDR_NO_NUMBER; // and have no number type
+ }
+ else
+ {
+ pInfo->comps++;
+ }
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::GetCompBits
+*
+* @brief
+* Set the clear color (or clear depth/stencil) for a surface
+*
+* @note
+* If clearColor is zero, a default clear value is used in place of comps[4].
+* If float32 is set, full precision is used, else the mantissa is reduced to 12-bits
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrElemLib::SetClearComps(
+ ADDR_FLT_32 comps[4], ///< [in/out] components
+ BOOL_32 clearColor, ///< [in] TRUE if clear color is set (CLEAR_COLOR)
+ BOOL_32 float32) ///< [in] TRUE if float32 component (BLEND_FLOAT32)
+{
+ INT_32 i;
+
+ // Use default clearvalues if clearColor is disabled
+ if (clearColor == FALSE)
+ {
+ for (i=0; i<3; i++)
+ {
+ comps[i].f = 0.0;
+ }
+ comps[3].f = 1.0;
+ }
+
+ // Otherwise use the (modified) clear value
+ else
+ {
+ for (i=0; i<4; i++)
+ { // If full precision, use clear value unchanged
+ if (float32)
+ {
+ // Do nothing
+ //comps[i] = comps[i];
+ }
+ // Else if it is a NaN, use the standard NaN value
+ else if ((comps[i].u & 0x7FFFFFFF) > 0x7F800000)
+ {
+ comps[i].u = 0xFFC00000;
+ }
+ // Else reduce the mantissa precision
+ else
+ {
+ comps[i].u = comps[i].u & 0xFFFFF000;
+ }
+ }
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::IsBlockCompressed
+*
+* @brief
+* TRUE if this is block compressed format
+*
+* @note
+*
+* @return
+* BOOL_32
+***************************************************************************************************
+*/
+BOOL_32 AddrElemLib::IsBlockCompressed(
+ AddrFormat format) ///< [in] Format
+{
+ return format >= ADDR_FMT_BC1 && format <= ADDR_FMT_BC7;
+}
+
+
+/**
+***************************************************************************************************
+* AddrElemLib::IsCompressed
+*
+* @brief
+* TRUE if this is block compressed format or 1 bit format
+*
+* @note
+*
+* @return
+* BOOL_32
+***************************************************************************************************
+*/
+BOOL_32 AddrElemLib::IsCompressed(
+ AddrFormat format) ///< [in] Format
+{
+ return IsBlockCompressed(format) || format == ADDR_FMT_BC1 || format == ADDR_FMT_BC7;
+}
+
+/**
+***************************************************************************************************
+* AddrElemLib::IsExpand3x
+*
+* @brief
+* TRUE if this is 3x expand format
+*
+* @note
+*
+* @return
+* BOOL_32
+***************************************************************************************************
+*/
+BOOL_32 AddrElemLib::IsExpand3x(
+ AddrFormat format) ///< [in] Format
+{
+ BOOL_32 is3x = FALSE;
+
+ switch (format)
+ {
+ case ADDR_FMT_8_8_8:
+ case ADDR_FMT_16_16_16:
+ case ADDR_FMT_16_16_16_FLOAT:
+ case ADDR_FMT_32_32_32:
+ case ADDR_FMT_32_32_32_FLOAT:
+ is3x = TRUE;
+ break;
+ default:
+ break;
+ }
+
+ return is3x;
+}
+
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.h b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.h
new file mode 100644
index 00000000000..c302b3b1788
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrelemlib.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrelemlib.h
+* @brief Contains the class for element/pixel related functions
+***************************************************************************************************
+*/
+
+#ifndef __ELEM_LIB_H__
+#define __ELEM_LIB_H__
+
+#include "addrinterface.h"
+#include "addrobject.h"
+#include "addrcommon.h"
+
+class AddrLib;
+
+// The masks for property bits within the Properties INT_32
+union ADDR_COMPONENT_FLAGS
+{
+ struct
+ {
+ UINT_32 byteAligned : 1; ///< all components are byte aligned
+ UINT_32 exportNorm : 1; ///< components support R6xx NORM compression
+ UINT_32 floatComp : 1; ///< there is at least one floating point component
+ };
+
+ UINT_32 value;
+};
+
+// Copy from legacy lib's AddrNumberType
+enum AddrNumberType
+{
+ // The following number types have the range [-1..1]
+ ADDR_NO_NUMBER, // This component doesn't exist and has no default value
+ ADDR_EPSILON, // Force component value to integer 0x00000001
+ ADDR_ZERO, // Force component value to integer 0x00000000
+ ADDR_ONE, // Force component value to floating point 1.0
+ // Above values don't have any bits per component (keep ADDR_ONE the last of these)
+
+ ADDR_UNORM, // Unsigned normalized (repeating fraction) full precision
+ ADDR_SNORM, // Signed normalized (repeating fraction) full precision
+ ADDR_GAMMA, // Gamma-corrected, full precision
+
+ ADDR_UNORM_R5XXRB, // Unsigned normalized (repeating fraction) for r5xx RB
+ ADDR_SNORM_R5XXRB, // Signed normalized (repeating fraction) for r5xx RB
+ ADDR_GAMMA_R5XXRB, // Gamma-corrected for r5xx RB (note: unnormalized value)
+ ADDR_UNORM_R5XXBC, // Unsigned normalized (repeating fraction) for r5xx BC
+ ADDR_SNORM_R5XXBC, // Signed normalized (repeating fraction) for r5xx BC
+ ADDR_GAMMA_R5XXBC, // Gamma-corrected for r5xx BC (note: unnormalized value)
+
+ ADDR_UNORM_R6XX, // Unsigned normalized (repeating fraction) for R6xx
+ ADDR_UNORM_R6XXDB, // Unorms for 24-bit depth: one value differs from ADDR_UNORM_R6XX
+ ADDR_SNORM_R6XX, // Signed normalized (repeating fraction) for R6xx
+ ADDR_GAMMA8_R6XX, // Gamma-corrected for r6xx
+ ADDR_GAMMA8_R7XX_TP, // Gamma-corrected for r7xx TP 12bit unorm 8.4.
+
+ ADDR_U4FLOATC, // Unsigned float: 4-bit exponent, bias=15, no NaN, clamp [0..1]
+ ADDR_GAMMA_4SEG, // Gamma-corrected, four segment approximation
+ ADDR_U0FIXED, // Unsigned 0.N-bit fixed point
+
+ // The following number types have large ranges (LEAVE ADDR_USCALED first or fix Finish routine)
+ ADDR_USCALED, // Unsigned integer converted to/from floating point
+ ADDR_SSCALED, // Signed integer converted to/from floating point
+ ADDR_USCALED_R5XXRB, // Unsigned integer to/from floating point for r5xx RB
+ ADDR_SSCALED_R5XXRB, // Signed integer to/from floating point for r5xx RB
+ ADDR_UINT_BITS, // Keep in unsigned integer form, clamped to specified range
+ ADDR_SINT_BITS, // Keep in signed integer form, clamped to specified range
+ ADDR_UINTBITS, // @@ remove Keep in unsigned integer form, use modulus to reduce bits
+ ADDR_SINTBITS, // @@ remove Keep in signed integer form, use modulus to reduce bits
+
+ // The following number types and ADDR_U4FLOATC have exponents
+ // (LEAVE ADDR_S8FLOAT first or fix Finish routine)
+ ADDR_S8FLOAT, // Signed floating point with 8-bit exponent, bias=127
+ ADDR_S8FLOAT32, // 32-bit IEEE float, passes through NaN values
+ ADDR_S5FLOAT, // Signed floating point with 5-bit exponent, bias=15
+ ADDR_S5FLOATM, // Signed floating point with 5-bit exponent, bias=15, no NaN/Inf
+ ADDR_U5FLOAT, // Signed floating point with 5-bit exponent, bias=15
+ ADDR_U3FLOATM, // Unsigned floating point with 3-bit exponent, bias=3
+
+ ADDR_S5FIXED, // Signed 5.N-bit fixed point, with rounding
+
+ ADDR_END_NUMBER // Used for range comparisons
+};
+
+// Copy from legacy lib's AddrElement
+enum AddrElemMode
+{
+ // These formats allow both packing an unpacking
+ ADDR_ROUND_BY_HALF, // add 1/2 and truncate when packing this element
+ ADDR_ROUND_TRUNCATE, // truncate toward 0 for sign/mag, else toward neg
+ ADDR_ROUND_DITHER, // Pack by dithering -- requires (x,y) position
+
+ // These formats only allow unpacking, no packing
+ ADDR_UNCOMPRESSED, // Elements are not compressed: one data element per pixel/texel
+ ADDR_EXPANDED, // Elements are split up and stored in multiple data elements
+ ADDR_PACKED_STD, // Elements are compressed into ExpandX by ExpandY data elements
+ ADDR_PACKED_REV, // Like ADDR_PACKED, but X order of pixels is reverved
+ ADDR_PACKED_GBGR, // Elements are compressed 4:2:2 in G1B_G0R order (high to low)
+ ADDR_PACKED_BGRG, // Elements are compressed 4:2:2 in BG1_RG0 order (high to low)
+ ADDR_PACKED_BC1, // Each data element is uncompressed to a 4x4 pixel/texel array
+ ADDR_PACKED_BC2, // Each data element is uncompressed to a 4x4 pixel/texel array
+ ADDR_PACKED_BC3, // Each data element is uncompressed to a 4x4 pixel/texel array
+ ADDR_PACKED_BC4, // Each data element is uncompressed to a 4x4 pixel/texel array
+ ADDR_PACKED_BC5, // Each data element is uncompressed to a 4x4 pixel/texel array
+
+ // These formats provide various kinds of compression
+ ADDR_ZPLANE_R5XX, // Compressed Zplane using r5xx architecture format
+ ADDR_ZPLANE_R6XX, // Compressed Zplane using r6xx architecture format
+ //@@ Fill in the compression modes
+
+ ADDR_END_ELEMENT // Used for range comparisons
+};
+
+enum AddrDepthPlanarType
+{
+ ADDR_DEPTH_PLANAR_NONE = 0, // No plane z/stencl
+ ADDR_DEPTH_PLANAR_R600 = 1, // R600 z and stencil planes are store within a tile
+ ADDR_DEPTH_PLANAR_R800 = 2, // R800 has separate z and stencil planes
+};
+
+/**
+***************************************************************************************************
+* ADDR_PIXEL_FORMATINFO
+*
+* @brief
+* Per component info
+*
+***************************************************************************************************
+*/
+struct ADDR_PIXEL_FORMATINFO
+{
+ UINT_32 compBit[4];
+ AddrNumberType numType[4];
+ UINT_32 compStart[4];
+ AddrElemMode elemMode;
+ UINT_32 comps; ///< Number of components
+};
+
+/**
+***************************************************************************************************
+* @brief This class contains asic indepentent element related attributes and operations
+***************************************************************************************************
+*/
+class AddrElemLib : public AddrObject
+{
+protected:
+ AddrElemLib(AddrLib* const pAddrLib);
+
+public:
+
+ /// Makes this class virtual
+ virtual ~AddrElemLib();
+
+ static AddrElemLib *Create(
+ const AddrLib* const pAddrLib);
+
+ /// The implementation is only for R6xx/R7xx, so make it virtual in case we need for R8xx
+ BOOL_32 PixGetExportNorm(
+ AddrColorFormat colorFmt,
+ AddrSurfaceNumber numberFmt, AddrSurfaceSwap swap) const;
+
+ /// Below method are asic independent, so make them just static.
+ /// Remove static if we need different operation in hwl.
+
+ VOID Flt32ToDepthPixel(
+ AddrDepthFormat format, const ADDR_FLT_32 comps[2], UINT_8 *pPixel) const;
+
+ VOID Flt32ToColorPixel(
+ AddrColorFormat format, AddrSurfaceNumber surfNum, AddrSurfaceSwap surfSwap,
+ const ADDR_FLT_32 comps[4], UINT_8 *pPixel) const;
+
+ static VOID Flt32sToInt32s(
+ ADDR_FLT_32 value, UINT_32 bits, AddrNumberType numberType, UINT_32* pResult);
+
+ static VOID Int32sToPixel(
+ UINT_32 numComps, UINT_32* pComps, UINT_32* pCompBits, UINT_32* pCompStart,
+ ADDR_COMPONENT_FLAGS properties, UINT_32 resultBits, UINT_8* pPixel);
+
+ VOID PixGetColorCompInfo(
+ AddrColorFormat format, AddrSurfaceNumber number, AddrSurfaceSwap swap,
+ ADDR_PIXEL_FORMATINFO* pInfo) const;
+
+ VOID PixGetDepthCompInfo(
+ AddrDepthFormat format, ADDR_PIXEL_FORMATINFO* pInfo) const;
+
+ UINT_32 GetBitsPerPixel(
+ AddrFormat format, AddrElemMode* pElemMode,
+ UINT_32* pExpandX = NULL, UINT_32* pExpandY = NULL, UINT_32* pBitsUnused = NULL);
+
+ static VOID SetClearComps(
+ ADDR_FLT_32 comps[4], BOOL_32 clearColor, BOOL_32 float32);
+
+ VOID AdjustSurfaceInfo(
+ AddrElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
+ UINT_32* pBpp, UINT_32* pBasePitch, UINT_32* pWidth, UINT_32* pHeight);
+
+ VOID RestoreSurfaceInfo(
+ AddrElemMode elemMode, UINT_32 expandX, UINT_32 expandY,
+ UINT_32* pBpp, UINT_32* pWidth, UINT_32* pHeight);
+
+ /// Checks if depth and stencil are planar inside a tile
+ BOOL_32 IsDepthStencilTilePlanar()
+ {
+ return (m_depthPlanarType == ADDR_DEPTH_PLANAR_R600) ? TRUE : FALSE;
+ }
+
+ /// Sets m_configFlags, copied from AddrLib
+ VOID SetConfigFlags(ADDR_CONFIG_FLAGS flags)
+ {
+ m_configFlags = flags;
+ }
+
+ static BOOL_32 IsCompressed(AddrFormat format);
+ static BOOL_32 IsBlockCompressed(AddrFormat format);
+ static BOOL_32 IsExpand3x(AddrFormat format);
+
+protected:
+
+ static VOID GetCompBits(
+ UINT_32 c0, UINT_32 c1, UINT_32 c2, UINT_32 c3,
+ ADDR_PIXEL_FORMATINFO* pInfo,
+ AddrElemMode elemMode = ADDR_ROUND_BY_HALF);
+
+ static VOID GetCompType(
+ AddrColorFormat format, AddrSurfaceNumber numType,
+ ADDR_PIXEL_FORMATINFO* pInfo);
+
+ static VOID GetCompSwap(
+ AddrSurfaceSwap swap, ADDR_PIXEL_FORMATINFO* pInfo);
+
+ static VOID SwapComps(
+ UINT_32 c0, UINT_32 c1, ADDR_PIXEL_FORMATINFO* pInfo);
+
+private:
+
+ UINT_32 m_fp16ExportNorm; ///< If allow FP16 to be reported as EXPORT_NORM
+ AddrDepthPlanarType m_depthPlanarType;
+
+ ADDR_CONFIG_FLAGS m_configFlags; ///< Copy of AddrLib's configFlags
+ AddrLib* const m_pAddrLib; ///< Pointer to parent addrlib instance
+};
+
+#endif
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.cpp b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.cpp
new file mode 100644
index 00000000000..1df693e5be5
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.cpp
@@ -0,0 +1,4023 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrlib.cpp
+* @brief Contains the implementation for the AddrLib base class..
+***************************************************************************************************
+*/
+
+#include "addrinterface.h"
+#include "addrlib.h"
+#include "addrcommon.h"
+
+#if defined(__APPLE__)
+
+UINT_32 div64_32(UINT_64 n, UINT_32 base)
+{
+ UINT_64 rem = n;
+ UINT_64 b = base;
+ UINT_64 res, d = 1;
+ UINT_32 high = rem >> 32;
+
+ res = 0;
+ if (high >= base)
+ {
+ high /= base;
+ res = (UINT_64) high << 32;
+ rem -= (UINT_64) (high*base) << 32;
+ }
+
+ while ((INT_64)b > 0 && b < rem)
+ {
+ b = b+b;
+ d = d+d;
+ }
+
+ do
+ {
+ if (rem >= b)
+ {
+ rem -= b;
+ res += d;
+ }
+ b >>= 1;
+ d >>= 1;
+ } while (d);
+
+ n = res;
+ return rem;
+}
+
+extern "C"
+UINT_32 __umoddi3(UINT_64 n, UINT_32 base)
+{
+ return div64_32(n, base);
+}
+
+#endif // __APPLE__
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Static Const Member
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+const AddrTileModeFlags AddrLib::m_modeFlags[ADDR_TM_COUNT] =
+{// T L 1 2 3 P Pr B
+ {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_GENERAL
+ {1, 1, 0, 0, 0, 0, 0, 0}, // ADDR_TM_LINEAR_ALIGNED
+ {1, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THIN1
+ {4, 0, 1, 0, 0, 0, 0, 0}, // ADDR_TM_1D_TILED_THICK
+ {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN1
+ {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN2
+ {1, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THIN4
+ {4, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_THICK
+ {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN1
+ {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN2
+ {1, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THIN4
+ {4, 0, 0, 1, 0, 0, 0, 1}, // ADDR_TM_2B_TILED_THICK
+ {1, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THIN1
+ {4, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_THICK
+ {1, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THIN1
+ {4, 0, 0, 1, 1, 0, 0, 1}, // ADDR_TM_3B_TILED_THICK
+ {8, 0, 0, 1, 0, 0, 0, 0}, // ADDR_TM_2D_TILED_XTHICK
+ {8, 0, 0, 1, 1, 0, 0, 0}, // ADDR_TM_3D_TILED_XTHICK
+ {1, 0, 0, 0, 0, 0, 0, 0}, // ADDR_TM_POWER_SAVE
+ {1, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THIN1
+ {1, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THIN1
+ {1, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THIN1
+ {4, 0, 0, 1, 0, 1, 1, 0}, // ADDR_TM_PRT_TILED_THICK
+ {4, 0, 0, 1, 0, 1, 0, 0}, // ADDR_TM_PRT_2D_TILED_THICK
+ {4, 0, 0, 1, 1, 1, 0, 0}, // ADDR_TM_PRT_3D_TILED_THICK
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Constructor/Destructor
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrLib::AddrLib
+*
+* @brief
+* Constructor for the AddrLib class
+*
+***************************************************************************************************
+*/
+AddrLib::AddrLib() :
+ m_class(BASE_ADDRLIB),
+ m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
+ m_chipRevision(0),
+ m_version(ADDRLIB_VERSION),
+ m_pipes(0),
+ m_banks(0),
+ m_pipeInterleaveBytes(0),
+ m_rowSize(0),
+ m_minPitchAlignPixels(1),
+ m_maxSamples(8),
+ m_pElemLib(NULL)
+{
+ m_configFlags.value = 0;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::AddrLib
+*
+* @brief
+* Constructor for the AddrLib class with hClient as parameter
+*
+***************************************************************************************************
+*/
+AddrLib::AddrLib(const AddrClient* pClient) :
+ AddrObject(pClient),
+ m_class(BASE_ADDRLIB),
+ m_chipFamily(ADDR_CHIP_FAMILY_IVLD),
+ m_chipRevision(0),
+ m_version(ADDRLIB_VERSION),
+ m_pipes(0),
+ m_banks(0),
+ m_pipeInterleaveBytes(0),
+ m_rowSize(0),
+ m_minPitchAlignPixels(1),
+ m_maxSamples(8),
+ m_pElemLib(NULL)
+{
+ m_configFlags.value = 0;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::~AddrLib
+*
+* @brief
+* Destructor for the AddrLib class
+*
+***************************************************************************************************
+*/
+AddrLib::~AddrLib()
+{
+ if (m_pElemLib)
+ {
+ delete m_pElemLib;
+ }
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Initialization/Helper
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrLib::Create
+*
+* @brief
+* Creates and initializes AddrLib object.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::Create(
+ const ADDR_CREATE_INPUT* pCreateIn, ///< [in] pointer to ADDR_CREATE_INPUT
+ ADDR_CREATE_OUTPUT* pCreateOut) ///< [out] pointer to ADDR_CREATE_OUTPUT
+{
+ AddrLib* pLib = NULL;
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (pCreateIn->createFlags.fillSizeFields == TRUE)
+ {
+ if ((pCreateIn->size != sizeof(ADDR_CREATE_INPUT)) ||
+ (pCreateOut->size != sizeof(ADDR_CREATE_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if ((returnCode == ADDR_OK) &&
+ (pCreateIn->callbacks.allocSysMem != NULL) &&
+ (pCreateIn->callbacks.freeSysMem != NULL))
+ {
+ AddrClient client = {
+ pCreateIn->hClient,
+ pCreateIn->callbacks
+ };
+
+ switch (pCreateIn->chipEngine)
+ {
+ case CIASICIDGFXENGINE_SOUTHERNISLAND:
+ switch (pCreateIn->chipFamily)
+ {
+ case FAMILY_SI:
+ pLib = AddrSIHwlInit(&client);
+ break;
+ case FAMILY_VI:
+ case FAMILY_CZ: // VI based fusion(carrizo)
+ case FAMILY_CI:
+ case FAMILY_KV: // CI based fusion
+ pLib = AddrCIHwlInit(&client);
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+ }
+
+ if ((pLib != NULL))
+ {
+ BOOL_32 initValid;
+
+ // Pass createFlags to configFlags first since these flags may be overwritten
+ pLib->m_configFlags.noCubeMipSlicesPad = pCreateIn->createFlags.noCubeMipSlicesPad;
+ pLib->m_configFlags.fillSizeFields = pCreateIn->createFlags.fillSizeFields;
+ pLib->m_configFlags.useTileIndex = pCreateIn->createFlags.useTileIndex;
+ pLib->m_configFlags.useCombinedSwizzle = pCreateIn->createFlags.useCombinedSwizzle;
+ pLib->m_configFlags.checkLast2DLevel = pCreateIn->createFlags.checkLast2DLevel;
+ pLib->m_configFlags.useHtileSliceAlign = pCreateIn->createFlags.useHtileSliceAlign;
+ pLib->m_configFlags.degradeBaseLevel = pCreateIn->createFlags.degradeBaseLevel;
+ pLib->m_configFlags.allowLargeThickTile = pCreateIn->createFlags.allowLargeThickTile;
+
+ pLib->SetAddrChipFamily(pCreateIn->chipFamily, pCreateIn->chipRevision);
+
+ pLib->SetMinPitchAlignPixels(pCreateIn->minPitchAlignPixels);
+
+ // Global parameters initialized and remaining configFlags bits are set as well
+ initValid = pLib->HwlInitGlobalParams(pCreateIn);
+
+ if (initValid)
+ {
+ pLib->m_pElemLib = AddrElemLib::Create(pLib);
+ }
+ else
+ {
+ pLib->m_pElemLib = NULL; // Don't go on allocating element lib
+ returnCode = ADDR_INVALIDGBREGVALUES;
+ }
+
+ if (pLib->m_pElemLib == NULL)
+ {
+ delete pLib;
+ pLib = NULL;
+ ADDR_ASSERT_ALWAYS();
+ }
+ else
+ {
+ pLib->m_pElemLib->SetConfigFlags(pLib->m_configFlags);
+ }
+ }
+
+ pCreateOut->hLib = pLib;
+
+ if ((pLib == NULL) &&
+ (returnCode == ADDR_OK))
+ {
+ // Unknown failures, we return the general error code
+ returnCode = ADDR_ERROR;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::SetAddrChipFamily
+*
+* @brief
+* Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrLib::SetAddrChipFamily(
+ UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
+ UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
+{
+ AddrChipFamily family = ADDR_CHIP_FAMILY_IVLD;
+
+ family = HwlConvertChipFamily(uChipFamily, uChipRevision);
+
+ ADDR_ASSERT(family != ADDR_CHIP_FAMILY_IVLD);
+
+ m_chipFamily = family;
+ m_chipRevision = uChipRevision;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::SetMinPitchAlignPixels
+*
+* @brief
+* Set m_minPitchAlignPixels with input param
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrLib::SetMinPitchAlignPixels(
+ UINT_32 minPitchAlignPixels) ///< [in] minmum pitch alignment in pixels
+{
+ m_minPitchAlignPixels = (minPitchAlignPixels == 0)? 1 : minPitchAlignPixels;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::GetAddrLib
+*
+* @brief
+* Get AddrLib pointer
+*
+* @return
+* An AddrLib class pointer
+***************************************************************************************************
+*/
+AddrLib * AddrLib::GetAddrLib(
+ ADDR_HANDLE hLib) ///< [in] handle of ADDR_HANDLE
+{
+ return static_cast(hLib);
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Surface Methods
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeSurfaceInfo
+*
+* @brief
+* Interface function stub of AddrComputeSurfaceInfo.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ // We suggest client do sanity check but a check here is also good
+ if (pIn->bpp > 128)
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+
+ // Thick modes don't support multisample
+ if (ComputeSurfaceThickness(pIn->tileMode) > 1 && pIn->numSamples > 1)
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ // Get a local copy of input structure and only reference pIn for unadjusted values
+ ADDR_COMPUTE_SURFACE_INFO_INPUT localIn = *pIn;
+ ADDR_TILEINFO tileInfoNull = {0};
+
+ if (UseTileInfo())
+ {
+ // If the original input has a valid ADDR_TILEINFO pointer then copy its contents.
+ // Otherwise the default 0's in tileInfoNull are used.
+ if (pIn->pTileInfo)
+ {
+ tileInfoNull = *pIn->pTileInfo;
+ }
+ localIn.pTileInfo = &tileInfoNull;
+ }
+
+ localIn.numSamples = pIn->numSamples == 0 ? 1 : pIn->numSamples;
+
+ // Do mipmap check first
+ // If format is BCn, pre-pad dimension to power-of-two according to HWL
+ ComputeMipLevel(&localIn);
+
+ if (m_configFlags.checkLast2DLevel)
+ {
+ // Save this level's original height in pixels
+ pOut->height = pIn->height;
+ }
+
+ UINT_32 expandX = 1;
+ UINT_32 expandY = 1;
+ AddrElemMode elemMode;
+
+ // Save outputs that may not go through HWL
+ pOut->pixelBits = localIn.bpp;
+ pOut->numSamples = localIn.numSamples;
+ pOut->last2DLevel = FALSE;
+
+#if !ALT_TEST
+ if (localIn.numSamples > 1)
+ {
+ ADDR_ASSERT(localIn.mipLevel == 0);
+ }
+#endif
+
+ if (localIn.format != ADDR_FMT_INVALID) // Set format to INVALID will skip this conversion
+ {
+ // Get compression/expansion factors and element mode
+ // (which indicates compression/expansion
+ localIn.bpp = GetElemLib()->GetBitsPerPixel(localIn.format,
+ &elemMode,
+ &expandX,
+ &expandY);
+
+ // Special flag for 96 bit surface. 96 (or 48 if we support) bit surface's width is
+ // pre-multiplied by 3 and bpp is divided by 3. So pitch alignment for linear-
+ // aligned does not meet 64-pixel in real. We keep special handling in hwl since hw
+ // restrictions are different.
+ // Also Mip 1+ needs an element pitch of 32 bits so we do not need this workaround
+ // but we use this flag to skip RestoreSurfaceInfo below
+
+ if ((elemMode == ADDR_EXPANDED) &&
+ (expandX > 1))
+ {
+ ADDR_ASSERT(localIn.tileMode == ADDR_TM_LINEAR_ALIGNED || localIn.height == 1);
+ }
+
+ GetElemLib()->AdjustSurfaceInfo(elemMode,
+ expandX,
+ expandY,
+ &localIn.bpp,
+ &localIn.basePitch,
+ &localIn.width,
+ &localIn.height);
+
+ // Overwrite these parameters if we have a valid format
+ }
+ else if (localIn.bpp != 0)
+ {
+ localIn.width = (localIn.width != 0) ? localIn.width : 1;
+ localIn.height = (localIn.height != 0) ? localIn.height : 1;
+ }
+ else // Rule out some invalid parameters
+ {
+ ADDR_ASSERT_ALWAYS();
+
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+
+ // Check mipmap after surface expansion
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = PostComputeMipLevel(&localIn, pOut);
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ if (UseTileIndex(localIn.tileIndex))
+ {
+ // Make sure pTileInfo is not NULL
+ ADDR_ASSERT(localIn.pTileInfo);
+
+ UINT_32 numSamples = GetNumFragments(localIn.numSamples, localIn.numFrags);
+
+ INT_32 macroModeIndex = TileIndexNoMacroIndex;
+
+ if (localIn.tileIndex != TileIndexLinearGeneral)
+ {
+ // Try finding a macroModeIndex
+ macroModeIndex = HwlComputeMacroModeIndex(localIn.tileIndex,
+ localIn.flags,
+ localIn.bpp,
+ numSamples,
+ localIn.pTileInfo,
+ &localIn.tileMode,
+ &localIn.tileType);
+ }
+
+ // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+ if (macroModeIndex == TileIndexNoMacroIndex)
+ {
+ returnCode = HwlSetupTileCfg(localIn.tileIndex, macroModeIndex,
+ localIn.pTileInfo,
+ &localIn.tileMode, &localIn.tileType);
+ }
+ // If macroModeIndex is invalid, then assert this is not macro tiled
+ else if (macroModeIndex == TileIndexInvalid)
+ {
+ ADDR_ASSERT(!IsMacroTiled(localIn.tileMode));
+ }
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ AddrTileMode tileMode = localIn.tileMode;
+ AddrTileType tileType = localIn.tileType;
+
+ // HWL layer may override tile mode if necessary
+ if (HwlOverrideTileMode(&localIn, &tileMode, &tileType))
+ {
+ localIn.tileMode = tileMode;
+ localIn.tileType = tileType;
+ }
+ // Degrade base level if applicable
+ if (DegradeBaseLevel(&localIn, &tileMode))
+ {
+ localIn.tileMode = tileMode;
+ }
+ }
+
+ // Call main function to compute surface info
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = HwlComputeSurfaceInfo(&localIn, pOut);
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ // Since bpp might be changed we just pass it through
+ pOut->bpp = localIn.bpp;
+
+ // Also original width/height/bpp
+ pOut->pixelPitch = pOut->pitch;
+ pOut->pixelHeight = pOut->height;
+
+#if DEBUG
+ if (localIn.flags.display)
+ {
+ ADDR_ASSERT((pOut->pitchAlign % 32) == 0);
+ }
+#endif //DEBUG
+
+ if (localIn.format != ADDR_FMT_INVALID)
+ {
+ //
+ // 96 bits surface of level 1+ requires element pitch of 32 bits instead
+ // In hwl function we skip multiplication of 3 then we should skip division of 3
+ // We keep pitch that represents 32 bit element instead of 96 bits since we
+ // will get an odd number if divided by 3.
+ //
+ if (!((expandX == 3) && (localIn.mipLevel > 0)))
+ {
+
+ GetElemLib()->RestoreSurfaceInfo(elemMode,
+ expandX,
+ expandY,
+ &localIn.bpp,
+ &pOut->pixelPitch,
+ &pOut->pixelHeight);
+ }
+ }
+
+ if (localIn.flags.qbStereo)
+ {
+ if (pOut->pStereoInfo)
+ {
+ ComputeQbStereoInfo(pOut);
+ }
+ }
+
+ if (localIn.flags.volume) // For volume sliceSize equals to all z-slices
+ {
+ pOut->sliceSize = pOut->surfSize;
+ }
+ else // For array: sliceSize is likely to have slice-padding (the last one)
+ {
+ pOut->sliceSize = pOut->surfSize / pOut->depth;
+
+ // array or cubemap
+ if (pIn->numSlices > 1)
+ {
+ // If this is the last slice then add the padding size to this slice
+ if (pIn->slice == (pIn->numSlices - 1))
+ {
+ pOut->sliceSize += pOut->sliceSize * (pOut->depth - pIn->numSlices);
+ }
+ else if (m_configFlags.checkLast2DLevel)
+ {
+ // Reset last2DLevel flag if this is not the last array slice
+ pOut->last2DLevel = FALSE;
+ }
+ }
+ }
+
+ pOut->pitchTileMax = pOut->pitch / 8 - 1;
+ pOut->heightTileMax = pOut->height / 8 - 1;
+ pOut->sliceTileMax = pOut->pitch * pOut->height / 64 - 1;
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeSurfaceInfo
+*
+* @brief
+* Interface function stub of AddrComputeSurfaceInfo.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeSurfaceAddrFromCoord(
+ const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ const ADDR_SURFACE_FLAGS flags = {{0}};
+ UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
+
+ // Try finding a macroModeIndex
+ INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
+ flags,
+ input.bpp,
+ numSamples,
+ input.pTileInfo,
+ &input.tileMode,
+ &input.tileType);
+
+ // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+ if (macroModeIndex == TileIndexNoMacroIndex)
+ {
+ returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex,
+ input.pTileInfo, &input.tileMode, &input.tileType);
+ }
+ // If macroModeIndex is invalid, then assert this is not macro tiled
+ else if (macroModeIndex == TileIndexInvalid)
+ {
+ ADDR_ASSERT(!IsMacroTiled(input.tileMode));
+ }
+
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = HwlComputeSurfaceAddrFromCoord(pIn, pOut);
+
+ if (returnCode == ADDR_OK)
+ {
+ pOut->prtBlockIndex = static_cast(pOut->addr / (64 * 1024));
+ }
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeSurfaceCoordFromAddr
+*
+* @brief
+* Interface function stub of ComputeSurfaceCoordFromAddr.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeSurfaceCoordFromAddr(
+ const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ const ADDR_SURFACE_FLAGS flags = {{0}};
+ UINT_32 numSamples = GetNumFragments(pIn->numSamples, pIn->numFrags);
+
+ // Try finding a macroModeIndex
+ INT_32 macroModeIndex = HwlComputeMacroModeIndex(input.tileIndex,
+ flags,
+ input.bpp,
+ numSamples,
+ input.pTileInfo,
+ &input.tileMode,
+ &input.tileType);
+
+ // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+ if (macroModeIndex == TileIndexNoMacroIndex)
+ {
+ returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex,
+ input.pTileInfo, &input.tileMode, &input.tileType);
+ }
+ // If macroModeIndex is invalid, then assert this is not macro tiled
+ else if (macroModeIndex == TileIndexInvalid)
+ {
+ ADDR_ASSERT(!IsMacroTiled(input.tileMode));
+ }
+
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = HwlComputeSurfaceCoordFromAddr(pIn, pOut);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeSliceTileSwizzle
+*
+* @brief
+* Interface function stub of ComputeSliceTileSwizzle.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeSliceTileSwizzle(
+ const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_SLICESWIZZLE_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_SLICESWIZZLE_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex,
+ input.pTileInfo, &input.tileMode);
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = HwlComputeSliceTileSwizzle(pIn, pOut);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ExtractBankPipeSwizzle
+*
+* @brief
+* Interface function stub of AddrExtractBankPipeSwizzle.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ExtractBankPipeSwizzle(
+ const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure
+ ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT)) ||
+ (pOut->size != sizeof(ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = HwlExtractBankPipeSwizzle(pIn, pOut);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::CombineBankPipeSwizzle
+*
+* @brief
+* Interface function stub of AddrCombineBankPipeSwizzle.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::CombineBankPipeSwizzle(
+ const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure
+ ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = HwlCombineBankPipeSwizzle(pIn->bankSwizzle,
+ pIn->pipeSwizzle,
+ pIn->pTileInfo,
+ pIn->baseAddr,
+ &pOut->tileSwizzle);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeBaseSwizzle
+*
+* @brief
+* Interface function stub of AddrCompueBaseSwizzle.
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeBaseSwizzle(
+ const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_BASE_SWIZZLE_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ if (IsMacroTiled(pIn->tileMode))
+ {
+ returnCode = HwlComputeBaseSwizzle(pIn, pOut);
+ }
+ else
+ {
+ pOut->tileSwizzle = 0;
+ }
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeFmaskInfo
+*
+* @brief
+* Interface function stub of ComputeFmaskInfo.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeFmaskInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure
+ )
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_INFO_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ // No thick MSAA
+ if (ComputeSurfaceThickness(pIn->tileMode) > 1)
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_FMASK_INFO_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+
+ if (pOut->pTileInfo)
+ {
+ // Use temp tile info for calcalation
+ input.pTileInfo = pOut->pTileInfo;
+ }
+ else
+ {
+ input.pTileInfo = &tileInfoNull;
+ }
+
+ ADDR_SURFACE_FLAGS flags = {{0}};
+ flags.fmask = 1;
+
+ // Try finding a macroModeIndex
+ INT_32 macroModeIndex = HwlComputeMacroModeIndex(pIn->tileIndex,
+ flags,
+ HwlComputeFmaskBits(pIn, NULL),
+ pIn->numSamples,
+ input.pTileInfo,
+ &input.tileMode);
+
+ // If macroModeIndex is not needed, then call HwlSetupTileCfg to get tile info
+ if (macroModeIndex == TileIndexNoMacroIndex)
+ {
+ returnCode = HwlSetupTileCfg(input.tileIndex, macroModeIndex,
+ input.pTileInfo, &input.tileMode);
+ }
+
+ ADDR_ASSERT(macroModeIndex != TileIndexInvalid);
+
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ if (pIn->numSamples > 1)
+ {
+ returnCode = HwlComputeFmaskInfo(pIn, pOut);
+ }
+ else
+ {
+ memset(pOut, 0, sizeof(ADDR_COMPUTE_FMASK_INFO_OUTPUT));
+
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeFmaskAddrFromCoord
+*
+* @brief
+* Interface function stub of ComputeFmaskAddrFromCoord.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeFmaskAddrFromCoord(
+ const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_ASSERT(pIn->numSamples > 1);
+
+ if (pIn->numSamples > 1)
+ {
+ returnCode = HwlComputeFmaskAddrFromCoord(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeFmaskCoordFromAddr
+*
+* @brief
+* Interface function stub of ComputeFmaskAddrFromCoord.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeFmaskCoordFromAddr(
+ const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_ASSERT(pIn->numSamples > 1);
+
+ if (pIn->numSamples > 1)
+ {
+ returnCode = HwlComputeFmaskCoordFromAddr(pIn, pOut);
+ }
+ else
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ConvertTileInfoToHW
+*
+* @brief
+* Convert tile info from real value to HW register value in HW layer
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ConvertTileInfoToHW(
+ const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_INPUT)) ||
+ (pOut->size != sizeof(ADDR_CONVERT_TILEINFOTOHW_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_CONVERT_TILEINFOTOHW_INPUT input;
+ // if pIn->reverse is TRUE, indices are ignored
+ if (pIn->reverse == FALSE && UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = HwlConvertTileInfoToHW(pIn, pOut);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ConvertTileIndex
+*
+* @brief
+* Convert tile index to tile mode/type/info
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ConvertTileIndex(
+ const ADDR_CONVERT_TILEINDEX_INPUT* pIn, ///< [in] input structure
+ ADDR_CONVERT_TILEINDEX_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX_INPUT)) ||
+ (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+
+ returnCode = HwlSetupTileCfg(pIn->tileIndex, pIn->macroModeIndex,
+ pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
+
+ if (returnCode == ADDR_OK && pIn->tileInfoHw)
+ {
+ ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
+
+ hwInput.pTileInfo = pOut->pTileInfo;
+ hwInput.tileIndex = -1;
+ hwOutput.pTileInfo = pOut->pTileInfo;
+
+ returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ConvertTileIndex1
+*
+* @brief
+* Convert tile index to tile mode/type/info
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ConvertTileIndex1(
+ const ADDR_CONVERT_TILEINDEX1_INPUT* pIn, ///< [in] input structure
+ ADDR_CONVERT_TILEINDEX_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_CONVERT_TILEINDEX1_INPUT)) ||
+ (pOut->size != sizeof(ADDR_CONVERT_TILEINDEX_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_SURFACE_FLAGS flags = {{0}};
+
+ HwlComputeMacroModeIndex(pIn->tileIndex, flags, pIn->bpp, pIn->numSamples,
+ pOut->pTileInfo, &pOut->tileMode, &pOut->tileType);
+
+ if (pIn->tileInfoHw)
+ {
+ ADDR_CONVERT_TILEINFOTOHW_INPUT hwInput = {0};
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT hwOutput = {0};
+
+ hwInput.pTileInfo = pOut->pTileInfo;
+ hwInput.tileIndex = -1;
+ hwOutput.pTileInfo = pOut->pTileInfo;
+
+ returnCode = HwlConvertTileInfoToHW(&hwInput, &hwOutput);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::GetTileIndex
+*
+* @brief
+* Get tile index from tile mode/type/info
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::GetTileIndex(
+ const ADDR_GET_TILEINDEX_INPUT* pIn, ///< [in] input structure
+ ADDR_GET_TILEINDEX_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_GET_TILEINDEX_INPUT)) ||
+ (pOut->size != sizeof(ADDR_GET_TILEINDEX_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = HwlGetTileIndex(pIn, pOut);
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeSurfaceThickness
+*
+* @brief
+* Compute surface thickness
+*
+* @return
+* Surface thickness
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputeSurfaceThickness(
+ AddrTileMode tileMode) ///< [in] tile mode
+{
+ return m_modeFlags[tileMode].thickness;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// CMASK/HTILE
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeHtileInfo
+*
+* @brief
+* Interface function stub of AddrComputeHtilenfo
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeHtileInfo(
+ const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE;
+ BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_HTILE_INFO_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ pOut->bpp = ComputeHtileInfo(pIn->flags,
+ pIn->pitch,
+ pIn->height,
+ pIn->numSlices,
+ pIn->isLinear,
+ isWidth8,
+ isHeight8,
+ pIn->pTileInfo,
+ &pOut->pitch,
+ &pOut->height,
+ &pOut->htileBytes,
+ &pOut->macroWidth,
+ &pOut->macroHeight,
+ &pOut->sliceSize,
+ &pOut->baseAlign);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeCmaskInfo
+*
+* @brief
+* Interface function stub of AddrComputeCmaskInfo
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeCmaskInfo(
+ const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_INFO_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_CMASK_INFO_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_CMASK_INFO_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ returnCode = ComputeCmaskInfo(pIn->flags,
+ pIn->pitch,
+ pIn->height,
+ pIn->numSlices,
+ pIn->isLinear,
+ pIn->pTileInfo,
+ &pOut->pitch,
+ &pOut->height,
+ &pOut->cmaskBytes,
+ &pOut->macroWidth,
+ &pOut->macroHeight,
+ &pOut->sliceSize,
+ &pOut->baseAlign,
+ &pOut->blockMax);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeDccInfo
+*
+* @brief
+* Interface function to compute DCC key info
+*
+* @return
+* return code of HwlComputeDccInfo
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeDccInfo(
+ const ADDR_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE ret = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_DCCINFO_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT)))
+ {
+ ret = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (ret == ADDR_OK)
+ {
+ ADDR_COMPUTE_DCCINFO_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+
+ ret = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex,
+ &input.tileInfo, &input.tileMode);
+
+ pIn = &input;
+ }
+
+ if (ADDR_OK == ret)
+ {
+ ret = HwlComputeDccInfo(pIn, pOut);
+ }
+ }
+
+ return ret;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeHtileAddrFromCoord
+*
+* @brief
+* Interface function stub of AddrComputeHtileAddrFromCoord
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeHtileAddrFromCoord(
+ const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE;
+ BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
+ pIn->height,
+ pIn->x,
+ pIn->y,
+ pIn->slice,
+ pIn->numSlices,
+ 1,
+ pIn->isLinear,
+ isWidth8,
+ isHeight8,
+ pIn->pTileInfo,
+ &pOut->bitPosition);
+ }
+ }
+
+ return returnCode;
+
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeHtileCoordFromAddr
+*
+* @brief
+* Interface function stub of AddrComputeHtileCoordFromAddr
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeHtileCoordFromAddr(
+ const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ BOOL_32 isWidth8 = (pIn->blockWidth == 8) ? TRUE : FALSE;
+ BOOL_32 isHeight8 = (pIn->blockHeight == 8) ? TRUE : FALSE;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ HwlComputeXmaskCoordFromAddr(pIn->addr,
+ pIn->bitPosition,
+ pIn->pitch,
+ pIn->height,
+ pIn->numSlices,
+ 1,
+ pIn->isLinear,
+ isWidth8,
+ isHeight8,
+ pIn->pTileInfo,
+ &pOut->x,
+ &pOut->y,
+ &pOut->slice);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeCmaskAddrFromCoord
+*
+* @brief
+* Interface function stub of AddrComputeCmaskAddrFromCoord
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeCmaskAddrFromCoord(
+ const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ if (pIn->flags.tcCompatible == TRUE)
+ {
+ returnCode = HwlComputeCmaskAddrFromCoord(pIn, pOut);
+ }
+ else
+ {
+ pOut->addr = HwlComputeXmaskAddrFromCoord(pIn->pitch,
+ pIn->height,
+ pIn->x,
+ pIn->y,
+ pIn->slice,
+ pIn->numSlices,
+ 2,
+ pIn->isLinear,
+ FALSE, //this is cmask, isWidth8 is not needed
+ FALSE, //this is cmask, isHeight8 is not needed
+ pIn->pTileInfo,
+ &pOut->bitPosition);
+ }
+
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeCmaskCoordFromAddr
+*
+* @brief
+* Interface function stub of AddrComputeCmaskCoordFromAddr
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeCmaskCoordFromAddr(
+ const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT)) ||
+ (pOut->size != sizeof(ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ ADDR_TILEINFO tileInfoNull;
+ ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT input;
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ input = *pIn;
+ // Use temp tile info for calcalation
+ input.pTileInfo = &tileInfoNull;
+
+ returnCode = HwlSetupTileCfg(input.tileIndex, input.macroModeIndex, input.pTileInfo);
+
+ // Change the input structure
+ pIn = &input;
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ HwlComputeXmaskCoordFromAddr(pIn->addr,
+ pIn->bitPosition,
+ pIn->pitch,
+ pIn->height,
+ pIn->numSlices,
+ 2,
+ pIn->isLinear,
+ FALSE,
+ FALSE,
+ pIn->pTileInfo,
+ &pOut->x,
+ &pOut->y,
+ &pOut->slice);
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeTileDataWidthAndHeight
+*
+* @brief
+* Compute the squared cache shape for per-tile data (CMASK and HTILE)
+*
+* @return
+* N/A
+*
+* @note
+* MacroWidth and macroHeight are measured in pixels
+***************************************************************************************************
+*/
+VOID AddrLib::ComputeTileDataWidthAndHeight(
+ UINT_32 bpp, ///< [in] bits per pixel
+ UINT_32 cacheBits, ///< [in] bits of cache
+ ADDR_TILEINFO* pTileInfo, ///< [in] Tile info
+ UINT_32* pMacroWidth, ///< [out] macro tile width
+ UINT_32* pMacroHeight ///< [out] macro tile height
+ ) const
+{
+ UINT_32 height = 1;
+ UINT_32 width = cacheBits / bpp;
+ UINT_32 pipes = HwlGetPipes(pTileInfo);
+
+ // Double height until the macro-tile is close to square
+ // Height can only be doubled if width is even
+
+ while ((width > height * 2 * pipes) && !(width & 1))
+ {
+ width /= 2;
+ height *= 2;
+ }
+
+ *pMacroWidth = 8 * width;
+ *pMacroHeight = 8 * height * pipes;
+
+ // Note: The above iterative comptuation is equivalent to the following
+ //
+ //int log2_height = ((log2(cacheBits)-log2(bpp)-log2(pipes))/2);
+ //int macroHeight = pow2( 3+log2(pipes)+log2_height );
+}
+
+/**
+***************************************************************************************************
+* AddrLib::HwlComputeTileDataWidthAndHeightLinear
+*
+* @brief
+* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
+*
+* @return
+* N/A
+*
+* @note
+* MacroWidth and macroHeight are measured in pixels
+***************************************************************************************************
+*/
+VOID AddrLib::HwlComputeTileDataWidthAndHeightLinear(
+ UINT_32* pMacroWidth, ///< [out] macro tile width
+ UINT_32* pMacroHeight, ///< [out] macro tile height
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_TILEINFO* pTileInfo ///< [in] tile info
+ ) const
+{
+ ADDR_ASSERT(bpp != 4); // Cmask does not support linear layout prior to SI
+ *pMacroWidth = 8 * 512 / bpp; // Align width to 512-bit memory accesses
+ *pMacroHeight = 8 * m_pipes; // Align height to number of pipes
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeHtileInfo
+*
+* @brief
+* Compute htile pitch,width, bytes per 2D slice
+*
+* @return
+* Htile bpp i.e. How many bits for an 8x8 tile
+* Also returns by output parameters:
+* *Htile pitch, height, total size in bytes, macro-tile dimensions and slice size*
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputeHtileInfo(
+ ADDR_HTILE_FLAGS flags, ///< [in] htile flags
+ UINT_32 pitchIn, ///< [in] pitch input
+ UINT_32 heightIn, ///< [in] height input
+ UINT_32 numSlices, ///< [in] number of slices
+ BOOL_32 isLinear, ///< [in] if it is linear mode
+ BOOL_32 isWidth8, ///< [in] if htile block width is 8
+ BOOL_32 isHeight8, ///< [in] if htile block height is 8
+ ADDR_TILEINFO* pTileInfo, ///< [in] Tile info
+ UINT_32* pPitchOut, ///< [out] pitch output
+ UINT_32* pHeightOut, ///< [out] height output
+ UINT_64* pHtileBytes, ///< [out] bytes per 2D slice
+ UINT_32* pMacroWidth, ///< [out] macro-tile width in pixels
+ UINT_32* pMacroHeight, ///< [out] macro-tile width in pixels
+ UINT_64* pSliceSize, ///< [out] slice size in bytes
+ UINT_32* pBaseAlign ///< [out] base alignment
+ ) const
+{
+
+ UINT_32 macroWidth;
+ UINT_32 macroHeight;
+ UINT_32 baseAlign;
+ UINT_64 surfBytes;
+ UINT_64 sliceBytes;
+
+ numSlices = Max(1u, numSlices);
+
+ const UINT_32 bpp = HwlComputeHtileBpp(isWidth8, isHeight8);
+ const UINT_32 cacheBits = HtileCacheBits;
+
+ if (isLinear)
+ {
+ HwlComputeTileDataWidthAndHeightLinear(¯oWidth,
+ ¯oHeight,
+ bpp,
+ pTileInfo);
+ }
+ else
+ {
+ ComputeTileDataWidthAndHeight(bpp,
+ cacheBits,
+ pTileInfo,
+ ¯oWidth,
+ ¯oHeight);
+ }
+
+ *pPitchOut = PowTwoAlign(pitchIn, macroWidth);
+ *pHeightOut = PowTwoAlign(heightIn, macroHeight);
+
+ baseAlign = HwlComputeHtileBaseAlign(flags.tcCompatible, isLinear, pTileInfo);
+
+ surfBytes = HwlComputeHtileBytes(*pPitchOut,
+ *pHeightOut,
+ bpp,
+ isLinear,
+ numSlices,
+ &sliceBytes,
+ baseAlign);
+
+ *pHtileBytes = surfBytes;
+
+ //
+ // Use SafeAssign since they are optional
+ //
+ SafeAssign(pMacroWidth, macroWidth);
+
+ SafeAssign(pMacroHeight, macroHeight);
+
+ SafeAssign(pSliceSize, sliceBytes);
+
+ SafeAssign(pBaseAlign, baseAlign);
+
+ return bpp;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeCmaskBaseAlign
+*
+* @brief
+* Compute cmask base alignment
+*
+* @return
+* Cmask base alignment
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputeCmaskBaseAlign(
+ ADDR_CMASK_FLAGS flags, ///< [in] Cmask flags
+ ADDR_TILEINFO* pTileInfo ///< [in] Tile info
+ ) const
+{
+ UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
+
+ if (flags.tcCompatible)
+ {
+ ADDR_ASSERT(pTileInfo != NULL);
+ if (pTileInfo)
+ {
+ baseAlign *= pTileInfo->banks;
+ }
+ }
+
+ return baseAlign;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeCmaskBytes
+*
+* @brief
+* Compute cmask size in bytes
+*
+* @return
+* Cmask size in bytes
+***************************************************************************************************
+*/
+UINT_64 AddrLib::ComputeCmaskBytes(
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 numSlices ///< [in] number of slices
+ ) const
+{
+ return BITS_TO_BYTES(static_cast(pitch) * height * numSlices * CmaskElemBits) /
+ MicroTilePixels;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeCmaskInfo
+*
+* @brief
+* Compute cmask pitch,width, bytes per 2D slice
+*
+* @return
+* BlockMax. Also by output parameters: Cmask pitch,height, total size in bytes,
+* macro-tile dimensions
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputeCmaskInfo(
+ ADDR_CMASK_FLAGS flags, ///< [in] cmask flags
+ UINT_32 pitchIn, ///< [in] pitch input
+ UINT_32 heightIn, ///< [in] height input
+ UINT_32 numSlices, ///< [in] number of slices
+ BOOL_32 isLinear, ///< [in] is linear mode
+ ADDR_TILEINFO* pTileInfo, ///< [in] Tile info
+ UINT_32* pPitchOut, ///< [out] pitch output
+ UINT_32* pHeightOut, ///< [out] height output
+ UINT_64* pCmaskBytes, ///< [out] bytes per 2D slice
+ UINT_32* pMacroWidth, ///< [out] macro-tile width in pixels
+ UINT_32* pMacroHeight, ///< [out] macro-tile width in pixels
+ UINT_64* pSliceSize, ///< [out] slice size in bytes
+ UINT_32* pBaseAlign, ///< [out] base alignment
+ UINT_32* pBlockMax ///< [out] block max == slice / 128 / 128 - 1
+ ) const
+{
+ UINT_32 macroWidth;
+ UINT_32 macroHeight;
+ UINT_32 baseAlign;
+ UINT_64 surfBytes;
+ UINT_64 sliceBytes;
+
+ numSlices = Max(1u, numSlices);
+
+ const UINT_32 bpp = CmaskElemBits;
+ const UINT_32 cacheBits = CmaskCacheBits;
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (isLinear)
+ {
+ HwlComputeTileDataWidthAndHeightLinear(¯oWidth,
+ ¯oHeight,
+ bpp,
+ pTileInfo);
+ }
+ else
+ {
+ ComputeTileDataWidthAndHeight(bpp,
+ cacheBits,
+ pTileInfo,
+ ¯oWidth,
+ ¯oHeight);
+ }
+
+ *pPitchOut = (pitchIn + macroWidth - 1) & ~(macroWidth - 1);
+ *pHeightOut = (heightIn + macroHeight - 1) & ~(macroHeight - 1);
+
+
+ sliceBytes = ComputeCmaskBytes(*pPitchOut,
+ *pHeightOut,
+ 1);
+
+ baseAlign = ComputeCmaskBaseAlign(flags, pTileInfo);
+
+ while (sliceBytes % baseAlign)
+ {
+ *pHeightOut += macroHeight;
+
+ sliceBytes = ComputeCmaskBytes(*pPitchOut,
+ *pHeightOut,
+ 1);
+ }
+
+ surfBytes = sliceBytes * numSlices;
+
+ *pCmaskBytes = surfBytes;
+
+ //
+ // Use SafeAssign since they are optional
+ //
+ SafeAssign(pMacroWidth, macroWidth);
+
+ SafeAssign(pMacroHeight, macroHeight);
+
+ SafeAssign(pBaseAlign, baseAlign);
+
+ SafeAssign(pSliceSize, sliceBytes);
+
+ UINT_32 slice = (*pPitchOut) * (*pHeightOut);
+ UINT_32 blockMax = slice / 128 / 128 - 1;
+
+#if DEBUG
+ if (slice % (64*256) != 0)
+ {
+ ADDR_ASSERT_ALWAYS();
+ }
+#endif //DEBUG
+
+ UINT_32 maxBlockMax = HwlGetMaxCmaskBlockMax();
+
+ if (blockMax > maxBlockMax)
+ {
+ blockMax = maxBlockMax;
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+
+ SafeAssign(pBlockMax, blockMax);
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeXmaskCoordYFromPipe
+*
+* @brief
+* Compute the Y coord from pipe number for cmask/htile
+*
+* @return
+* Y coordinate
+*
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputeXmaskCoordYFromPipe(
+ UINT_32 pipe, ///< [in] pipe number
+ UINT_32 x ///< [in] x coordinate
+ ) const
+{
+ UINT_32 pipeBit0;
+ UINT_32 pipeBit1;
+ UINT_32 xBit0;
+ UINT_32 xBit1;
+ UINT_32 yBit0;
+ UINT_32 yBit1;
+
+ UINT_32 y = 0;
+
+ UINT_32 numPipes = m_pipes; // SI has its implementation
+ //
+ // Convert pipe + x to y coordinate.
+ //
+ switch (numPipes)
+ {
+ case 1:
+ //
+ // 1 pipe
+ //
+ // p0 = 0
+ //
+ y = 0;
+ break;
+ case 2:
+ //
+ // 2 pipes
+ //
+ // p0 = x0 ^ y0
+ //
+ // y0 = p0 ^ x0
+ //
+ pipeBit0 = pipe & 0x1;
+
+ xBit0 = x & 0x1;
+
+ yBit0 = pipeBit0 ^ xBit0;
+
+ y = yBit0;
+ break;
+ case 4:
+ //
+ // 4 pipes
+ //
+ // p0 = x1 ^ y0
+ // p1 = x0 ^ y1
+ //
+ // y0 = p0 ^ x1
+ // y1 = p1 ^ x0
+ //
+ pipeBit0 = pipe & 0x1;
+ pipeBit1 = (pipe & 0x2) >> 1;
+
+ xBit0 = x & 0x1;
+ xBit1 = (x & 0x2) >> 1;
+
+ yBit0 = pipeBit0 ^ xBit1;
+ yBit1 = pipeBit1 ^ xBit0;
+
+ y = (yBit0 |
+ (yBit1 << 1));
+ break;
+ case 8:
+ //
+ // 8 pipes
+ //
+ // r600 and r800 have different method
+ //
+ y = HwlComputeXmaskCoordYFrom8Pipe(pipe, x);
+ break;
+ default:
+ break;
+ }
+ return y;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::HwlComputeXmaskCoordFromAddr
+*
+* @brief
+* Compute the coord from an address of a cmask/htile
+*
+* @return
+* N/A
+*
+* @note
+* This method is reused by htile, so rename to Xmask
+***************************************************************************************************
+*/
+VOID AddrLib::HwlComputeXmaskCoordFromAddr(
+ UINT_64 addr, ///< [in] address
+ UINT_32 bitPosition, ///< [in] bitPosition in a byte
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 numSlices, ///< [in] number of slices
+ UINT_32 factor, ///< [in] factor that indicates cmask or htile
+ BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout
+ BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+ BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+ ADDR_TILEINFO* pTileInfo, ///< [in] Tile info
+ UINT_32* pX, ///< [out] x coord
+ UINT_32* pY, ///< [out] y coord
+ UINT_32* pSlice ///< [out] slice index
+ ) const
+{
+ UINT_32 pipe;
+ UINT_32 numPipes;
+ UINT_32 numPipeBits;
+ UINT_32 macroTilePitch;
+ UINT_32 macroTileHeight;
+
+ UINT_64 bitAddr;
+
+ UINT_32 microTileCoordY;
+
+ UINT_32 elemBits;
+
+ UINT_32 pitchAligned = pitch;
+ UINT_32 heightAligned = height;
+ UINT_64 totalBytes;
+
+ UINT_64 elemOffset;
+
+ UINT_64 macroIndex;
+ UINT_32 microIndex;
+
+ UINT_64 macroNumber;
+ UINT_32 microNumber;
+
+ UINT_32 macroX;
+ UINT_32 macroY;
+ UINT_32 macroZ;
+
+ UINT_32 microX;
+ UINT_32 microY;
+
+ UINT_32 tilesPerMacro;
+ UINT_32 macrosPerPitch;
+ UINT_32 macrosPerSlice;
+
+ //
+ // Extract pipe.
+ //
+ numPipes = HwlGetPipes(pTileInfo);
+ pipe = ComputePipeFromAddr(addr, numPipes);
+
+ //
+ // Compute the number of group and pipe bits.
+ //
+ numPipeBits = Log2(numPipes);
+
+ UINT_32 groupBits = 8 * m_pipeInterleaveBytes;
+ UINT_32 pipes = numPipes;
+
+
+ //
+ // Compute the micro tile size, in bits. And macro tile pitch and height.
+ //
+ if (factor == 2) //CMASK
+ {
+ ADDR_CMASK_FLAGS flags = {{0}};
+
+ elemBits = CmaskElemBits;
+
+ ComputeCmaskInfo(flags,
+ pitch,
+ height,
+ numSlices,
+ isLinear,
+ pTileInfo,
+ &pitchAligned,
+ &heightAligned,
+ &totalBytes,
+ ¯oTilePitch,
+ ¯oTileHeight);
+ }
+ else //HTILE
+ {
+ ADDR_HTILE_FLAGS flags = {{0}};
+
+ if (factor != 1)
+ {
+ factor = 1;
+ }
+
+ elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
+
+ ComputeHtileInfo(flags,
+ pitch,
+ height,
+ numSlices,
+ isLinear,
+ isWidth8,
+ isHeight8,
+ pTileInfo,
+ &pitchAligned,
+ &heightAligned,
+ &totalBytes,
+ ¯oTilePitch,
+ ¯oTileHeight);
+ }
+
+ // Should use aligned dims
+ //
+ pitch = pitchAligned;
+ height = heightAligned;
+
+
+ //
+ // Convert byte address to bit address.
+ //
+ bitAddr = BYTES_TO_BITS(addr) + bitPosition;
+
+
+ //
+ // Remove pipe bits from address.
+ //
+
+ bitAddr = (bitAddr % groupBits) + ((bitAddr/groupBits/pipes)*groupBits);
+
+
+ elemOffset = bitAddr / elemBits;
+
+ tilesPerMacro = (macroTilePitch/factor) * macroTileHeight / MicroTilePixels >> numPipeBits;
+
+ macrosPerPitch = pitch / (macroTilePitch/factor);
+ macrosPerSlice = macrosPerPitch * height / macroTileHeight;
+
+ macroIndex = elemOffset / factor / tilesPerMacro;
+ microIndex = static_cast(elemOffset % (tilesPerMacro * factor));
+
+ macroNumber = macroIndex * factor + microIndex % factor;
+ microNumber = microIndex / factor;
+
+ macroX = static_cast((macroNumber % macrosPerPitch));
+ macroY = static_cast((macroNumber % macrosPerSlice) / macrosPerPitch);
+ macroZ = static_cast((macroNumber / macrosPerSlice));
+
+
+ microX = microNumber % (macroTilePitch / factor / MicroTileWidth);
+ microY = (microNumber / (macroTilePitch / factor / MicroTileHeight));
+
+ *pX = macroX * (macroTilePitch/factor) + microX * MicroTileWidth;
+ *pY = macroY * macroTileHeight + (microY * MicroTileHeight << numPipeBits);
+ *pSlice = macroZ;
+
+ microTileCoordY = ComputeXmaskCoordYFromPipe(pipe,
+ *pX/MicroTileWidth);
+
+
+ //
+ // Assemble final coordinates.
+ //
+ *pY += microTileCoordY * MicroTileHeight;
+
+}
+
+/**
+***************************************************************************************************
+* AddrLib::HwlComputeXmaskAddrFromCoord
+*
+* @brief
+* Compute the address from an address of cmask (prior to si)
+*
+* @return
+* Address in bytes
+*
+***************************************************************************************************
+*/
+UINT_64 AddrLib::HwlComputeXmaskAddrFromCoord(
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 x, ///< [in] x coord
+ UINT_32 y, ///< [in] y coord
+ UINT_32 slice, ///< [in] slice/depth index
+ UINT_32 numSlices, ///< [in] number of slices
+ UINT_32 factor, ///< [in] factor that indicates cmask(2) or htile(1)
+ BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout
+ BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+ BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+ ADDR_TILEINFO* pTileInfo, ///< [in] Tile info
+ UINT_32* pBitPosition ///< [out] bit position inside a byte
+ ) const
+{
+ UINT_64 addr;
+ UINT_32 numGroupBits;
+ UINT_32 numPipeBits;
+ UINT_32 newPitch = 0;
+ UINT_32 newHeight = 0;
+ UINT_64 sliceBytes = 0;
+ UINT_64 totalBytes = 0;
+ UINT_64 sliceOffset;
+ UINT_32 pipe;
+ UINT_32 macroTileWidth;
+ UINT_32 macroTileHeight;
+ UINT_32 macroTilesPerRow;
+ UINT_32 macroTileBytes;
+ UINT_32 macroTileIndexX;
+ UINT_32 macroTileIndexY;
+ UINT_64 macroTileOffset;
+ UINT_32 pixelBytesPerRow;
+ UINT_32 pixelOffsetX;
+ UINT_32 pixelOffsetY;
+ UINT_32 pixelOffset;
+ UINT_64 totalOffset;
+ UINT_64 offsetLo;
+ UINT_64 offsetHi;
+ UINT_64 groupMask;
+
+
+ UINT_32 elemBits = 0;
+
+ UINT_32 numPipes = m_pipes; // This function is accessed prior to si only
+
+ if (factor == 2) //CMASK
+ {
+ elemBits = CmaskElemBits;
+
+ // For asics before SI, cmask is always tiled
+ isLinear = FALSE;
+ }
+ else //HTILE
+ {
+ if (factor != 1) // Fix compile warning
+ {
+ factor = 1;
+ }
+
+ elemBits = HwlComputeHtileBpp(isWidth8, isHeight8);
+ }
+
+ //
+ // Compute the number of group bits and pipe bits.
+ //
+ numGroupBits = Log2(m_pipeInterleaveBytes);
+ numPipeBits = Log2(numPipes);
+
+ //
+ // Compute macro tile dimensions.
+ //
+ if (factor == 2) // CMASK
+ {
+ ADDR_CMASK_FLAGS flags = {{0}};
+
+ ComputeCmaskInfo(flags,
+ pitch,
+ height,
+ numSlices,
+ isLinear,
+ pTileInfo,
+ &newPitch,
+ &newHeight,
+ &totalBytes,
+ ¯oTileWidth,
+ ¯oTileHeight);
+
+ sliceBytes = totalBytes / numSlices;
+ }
+ else // HTILE
+ {
+ ADDR_HTILE_FLAGS flags = {{0}};
+
+ ComputeHtileInfo(flags,
+ pitch,
+ height,
+ numSlices,
+ isLinear,
+ isWidth8,
+ isHeight8,
+ pTileInfo,
+ &newPitch,
+ &newHeight,
+ &totalBytes,
+ ¯oTileWidth,
+ ¯oTileHeight,
+ &sliceBytes);
+ }
+
+ sliceOffset = slice * sliceBytes;
+
+ //
+ // Get the pipe. Note that neither slice rotation nor pipe swizzling apply for CMASK.
+ //
+ pipe = ComputePipeFromCoord(x,
+ y,
+ 0,
+ ADDR_TM_2D_TILED_THIN1,
+ 0,
+ FALSE,
+ pTileInfo);
+
+ //
+ // Compute the number of macro tiles per row.
+ //
+ macroTilesPerRow = newPitch / macroTileWidth;
+
+ //
+ // Compute the number of bytes per macro tile.
+ //
+ macroTileBytes = BITS_TO_BYTES((macroTileWidth * macroTileHeight * elemBits) / MicroTilePixels);
+
+ //
+ // Compute the offset to the macro tile containing the specified coordinate.
+ //
+ macroTileIndexX = x / macroTileWidth;
+ macroTileIndexY = y / macroTileHeight;
+ macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
+
+ //
+ // Compute the pixel offset within the macro tile.
+ //
+ pixelBytesPerRow = BITS_TO_BYTES(macroTileWidth * elemBits) / MicroTileWidth;
+
+ //
+ // The nibbles are interleaved (see below), so the part of the offset relative to the x
+ // coordinate repeats halfway across the row. (Not for HTILE)
+ //
+ if (factor == 2)
+ {
+ pixelOffsetX = (x % (macroTileWidth / 2)) / MicroTileWidth;
+ }
+ else
+ {
+ pixelOffsetX = (x % (macroTileWidth)) / MicroTileWidth * BITS_TO_BYTES(elemBits);
+ }
+
+ //
+ // Compute the y offset within the macro tile.
+ //
+ pixelOffsetY = (((y % macroTileHeight) / MicroTileHeight) / numPipes) * pixelBytesPerRow;
+
+ pixelOffset = pixelOffsetX + pixelOffsetY;
+
+ //
+ // Combine the slice offset and macro tile offset with the pixel offset, accounting for the
+ // pipe bits in the middle of the address.
+ //
+ totalOffset = ((sliceOffset + macroTileOffset) >> numPipeBits) + pixelOffset;
+
+ //
+ // Split the offset to put some bits below the pipe bits and some above.
+ //
+ groupMask = (1 << numGroupBits) - 1;
+ offsetLo = totalOffset & groupMask;
+ offsetHi = (totalOffset & ~groupMask) << numPipeBits;
+
+ //
+ // Assemble the address from its components.
+ //
+ addr = offsetLo;
+ addr |= offsetHi;
+ // This is to remove warning with /analyze option
+ UINT_32 pipeBits = pipe << numGroupBits;
+ addr |= pipeBits;
+
+ //
+ // Compute the bit position. The lower nibble is used when the x coordinate within the macro
+ // tile is less than half of the macro tile width, and the upper nibble is used when the x
+ // coordinate within the macro tile is greater than or equal to half the macro tile width.
+ //
+ *pBitPosition = ((x % macroTileWidth) < (macroTileWidth / factor)) ? 0 : 4;
+
+ return addr;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Surface Addressing Shared
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeSurfaceAddrFromCoordLinear
+*
+* @brief
+* Compute address from coord for linear surface
+*
+* @return
+* Address in bytes
+*
+***************************************************************************************************
+*/
+UINT_64 AddrLib::ComputeSurfaceAddrFromCoordLinear(
+ UINT_32 x, ///< [in] x coord
+ UINT_32 y, ///< [in] y coord
+ UINT_32 slice, ///< [in] slice/depth index
+ UINT_32 sample, ///< [in] sample index
+ UINT_32 bpp, ///< [in] bits per pixel
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 numSlices, ///< [in] number of slices
+ UINT_32* pBitPosition ///< [out] bit position inside a byte
+ ) const
+{
+ const UINT_64 sliceSize = static_cast(pitch) * height;
+
+ UINT_64 sliceOffset = (slice + sample * numSlices)* sliceSize;
+ UINT_64 rowOffset = static_cast(y) * pitch;
+ UINT_64 pixOffset = x;
+
+ UINT_64 addr = (sliceOffset + rowOffset + pixOffset) * bpp;
+
+ *pBitPosition = static_cast(addr % 8);
+ addr /= 8;
+
+ return addr;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeSurfaceCoordFromAddrLinear
+*
+* @brief
+* Compute the coord from an address of a linear surface
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrLib::ComputeSurfaceCoordFromAddrLinear(
+ UINT_64 addr, ///< [in] address
+ UINT_32 bitPosition, ///< [in] bitPosition in a byte
+ UINT_32 bpp, ///< [in] bits per pixel
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 numSlices, ///< [in] number of slices
+ UINT_32* pX, ///< [out] x coord
+ UINT_32* pY, ///< [out] y coord
+ UINT_32* pSlice, ///< [out] slice/depth index
+ UINT_32* pSample ///< [out] sample index
+ ) const
+{
+ const UINT_64 sliceSize = static_cast(pitch) * height;
+ const UINT_64 linearOffset = (BYTES_TO_BITS(addr) + bitPosition) / bpp;
+
+ *pX = static_cast((linearOffset % sliceSize) % pitch);
+ *pY = static_cast((linearOffset % sliceSize) / pitch % height);
+ *pSlice = static_cast((linearOffset / sliceSize) % numSlices);
+ *pSample = static_cast((linearOffset / sliceSize) / numSlices);
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeSurfaceCoordFromAddrMicroTiled
+*
+* @brief
+* Compute the coord from an address of a micro tiled surface
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrLib::ComputeSurfaceCoordFromAddrMicroTiled(
+ UINT_64 addr, ///< [in] address
+ UINT_32 bitPosition, ///< [in] bitPosition in a byte
+ UINT_32 bpp, ///< [in] bits per pixel
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 numSamples, ///< [in] number of samples
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 tileBase, ///< [in] base offset within a tile
+ UINT_32 compBits, ///< [in] component bits actually needed(for planar surface)
+ UINT_32* pX, ///< [out] x coord
+ UINT_32* pY, ///< [out] y coord
+ UINT_32* pSlice, ///< [out] slice/depth index
+ UINT_32* pSample, ///< [out] sample index,
+ AddrTileType microTileType, ///< [in] micro tiling order
+ BOOL_32 isDepthSampleOrder ///< [in] TRUE if in depth sample order
+ ) const
+{
+ UINT_64 bitAddr;
+ UINT_32 microTileThickness;
+ UINT_32 microTileBits;
+ UINT_64 sliceBits;
+ UINT_64 rowBits;
+ UINT_32 sliceIndex;
+ UINT_32 microTileCoordX;
+ UINT_32 microTileCoordY;
+ UINT_32 pixelOffset;
+ UINT_32 pixelCoordX = 0;
+ UINT_32 pixelCoordY = 0;
+ UINT_32 pixelCoordZ = 0;
+ UINT_32 pixelCoordS = 0;
+
+ //
+ // Convert byte address to bit address.
+ //
+ bitAddr = BYTES_TO_BITS(addr) + bitPosition;
+
+ //
+ // Compute the micro tile size, in bits.
+ //
+ switch (tileMode)
+ {
+ case ADDR_TM_1D_TILED_THICK:
+ microTileThickness = ThickTileThickness;
+ break;
+ default:
+ microTileThickness = 1;
+ break;
+ }
+
+ microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
+
+ //
+ // Compute number of bits per slice and number of bits per row of micro tiles.
+ //
+ sliceBits = static_cast(pitch) * height * microTileThickness * bpp * numSamples;
+
+ rowBits = (pitch / MicroTileWidth) * microTileBits;
+
+ //
+ // Extract the slice index.
+ //
+ sliceIndex = static_cast(bitAddr / sliceBits);
+ bitAddr -= sliceIndex * sliceBits;
+
+ //
+ // Extract the y coordinate of the micro tile.
+ //
+ microTileCoordY = static_cast(bitAddr / rowBits) * MicroTileHeight;
+ bitAddr -= (microTileCoordY / MicroTileHeight) * rowBits;
+
+ //
+ // Extract the x coordinate of the micro tile.
+ //
+ microTileCoordX = static_cast(bitAddr / microTileBits) * MicroTileWidth;
+
+ //
+ // Compute the pixel offset within the micro tile.
+ //
+ pixelOffset = static_cast(bitAddr % microTileBits);
+
+ //
+ // Extract pixel coordinates from the offset.
+ //
+ HwlComputePixelCoordFromOffset(pixelOffset,
+ bpp,
+ numSamples,
+ tileMode,
+ tileBase,
+ compBits,
+ &pixelCoordX,
+ &pixelCoordY,
+ &pixelCoordZ,
+ &pixelCoordS,
+ microTileType,
+ isDepthSampleOrder);
+
+ //
+ // Assemble final coordinates.
+ //
+ *pX = microTileCoordX + pixelCoordX;
+ *pY = microTileCoordY + pixelCoordY;
+ *pSlice = (sliceIndex * microTileThickness) + pixelCoordZ;
+ *pSample = pixelCoordS;
+
+ if (microTileThickness > 1)
+ {
+ *pSample = 0;
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputePipeFromAddr
+*
+* @brief
+* Compute the pipe number from an address
+*
+* @return
+* Pipe number
+*
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputePipeFromAddr(
+ UINT_64 addr, ///< [in] address
+ UINT_32 numPipes ///< [in] number of banks
+ ) const
+{
+ UINT_32 pipe;
+
+ UINT_32 groupBytes = m_pipeInterleaveBytes; //just different terms
+
+ // R600
+ // The LSBs of the address are arranged as follows:
+ // bank | pipe | group
+ //
+ // To get the pipe number, shift off the group bits and mask the pipe bits.
+ //
+
+ // R800
+ // The LSBs of the address are arranged as follows:
+ // bank | bankInterleave | pipe | pipeInterleave
+ //
+ // To get the pipe number, shift off the pipe interleave bits and mask the pipe bits.
+ //
+
+ pipe = static_cast(addr >> Log2(groupBytes)) & (numPipes - 1);
+
+ return pipe;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputePixelIndexWithinMicroTile
+*
+* @brief
+* Compute the pixel index inside a micro tile of surface
+*
+* @return
+* Pixel index
+*
+***************************************************************************************************
+*/
+UINT_32 AddrLib::ComputePixelIndexWithinMicroTile(
+ UINT_32 x, ///< [in] x coord
+ UINT_32 y, ///< [in] y coord
+ UINT_32 z, ///< [in] slice/depth index
+ UINT_32 bpp, ///< [in] bits per pixel
+ AddrTileMode tileMode, ///< [in] tile mode
+ AddrTileType microTileType ///< [in] pixel order in display/non-display mode
+ ) const
+{
+ UINT_32 pixelBit0 = 0;
+ UINT_32 pixelBit1 = 0;
+ UINT_32 pixelBit2 = 0;
+ UINT_32 pixelBit3 = 0;
+ UINT_32 pixelBit4 = 0;
+ UINT_32 pixelBit5 = 0;
+ UINT_32 pixelBit6 = 0;
+ UINT_32 pixelBit7 = 0;
+ UINT_32 pixelBit8 = 0;
+ UINT_32 pixelNumber;
+
+ UINT_32 x0 = _BIT(x, 0);
+ UINT_32 x1 = _BIT(x, 1);
+ UINT_32 x2 = _BIT(x, 2);
+ UINT_32 y0 = _BIT(y, 0);
+ UINT_32 y1 = _BIT(y, 1);
+ UINT_32 y2 = _BIT(y, 2);
+ UINT_32 z0 = _BIT(z, 0);
+ UINT_32 z1 = _BIT(z, 1);
+ UINT_32 z2 = _BIT(z, 2);
+
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+ // Compute the pixel number within the micro tile.
+
+ if (microTileType != ADDR_THICK)
+ {
+ if (microTileType == ADDR_DISPLAYABLE)
+ {
+ switch (bpp)
+ {
+ case 8:
+ pixelBit0 = x0;
+ pixelBit1 = x1;
+ pixelBit2 = x2;
+ pixelBit3 = y1;
+ pixelBit4 = y0;
+ pixelBit5 = y2;
+ break;
+ case 16:
+ pixelBit0 = x0;
+ pixelBit1 = x1;
+ pixelBit2 = x2;
+ pixelBit3 = y0;
+ pixelBit4 = y1;
+ pixelBit5 = y2;
+ break;
+ case 32:
+ pixelBit0 = x0;
+ pixelBit1 = x1;
+ pixelBit2 = y0;
+ pixelBit3 = x2;
+ pixelBit4 = y1;
+ pixelBit5 = y2;
+ break;
+ case 64:
+ pixelBit0 = x0;
+ pixelBit1 = y0;
+ pixelBit2 = x1;
+ pixelBit3 = x2;
+ pixelBit4 = y1;
+ pixelBit5 = y2;
+ break;
+ case 128:
+ pixelBit0 = y0;
+ pixelBit1 = x0;
+ pixelBit2 = x1;
+ pixelBit3 = x2;
+ pixelBit4 = y1;
+ pixelBit5 = y2;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+ }
+ else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ pixelBit0 = x0;
+ pixelBit1 = y0;
+ pixelBit2 = x1;
+ pixelBit3 = y1;
+ pixelBit4 = x2;
+ pixelBit5 = y2;
+ }
+ else if (microTileType == ADDR_ROTATED)
+ {
+ ADDR_ASSERT(thickness == 1);
+
+ switch (bpp)
+ {
+ case 8:
+ pixelBit0 = y0;
+ pixelBit1 = y1;
+ pixelBit2 = y2;
+ pixelBit3 = x1;
+ pixelBit4 = x0;
+ pixelBit5 = x2;
+ break;
+ case 16:
+ pixelBit0 = y0;
+ pixelBit1 = y1;
+ pixelBit2 = y2;
+ pixelBit3 = x0;
+ pixelBit4 = x1;
+ pixelBit5 = x2;
+ break;
+ case 32:
+ pixelBit0 = y0;
+ pixelBit1 = y1;
+ pixelBit2 = x0;
+ pixelBit3 = y2;
+ pixelBit4 = x1;
+ pixelBit5 = x2;
+ break;
+ case 64:
+ pixelBit0 = y0;
+ pixelBit1 = x0;
+ pixelBit2 = y1;
+ pixelBit3 = x1;
+ pixelBit4 = x2;
+ pixelBit5 = y2;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+ }
+
+ if (thickness > 1)
+ {
+ pixelBit6 = z0;
+ pixelBit7 = z1;
+ }
+ }
+ else // ADDR_THICK
+ {
+ ADDR_ASSERT(thickness > 1);
+
+ switch (bpp)
+ {
+ case 8:
+ case 16:
+ pixelBit0 = x0;
+ pixelBit1 = y0;
+ pixelBit2 = x1;
+ pixelBit3 = y1;
+ pixelBit4 = z0;
+ pixelBit5 = z1;
+ break;
+ case 32:
+ pixelBit0 = x0;
+ pixelBit1 = y0;
+ pixelBit2 = x1;
+ pixelBit3 = z0;
+ pixelBit4 = y1;
+ pixelBit5 = z1;
+ break;
+ case 64:
+ case 128:
+ pixelBit0 = y0;
+ pixelBit1 = x0;
+ pixelBit2 = z0;
+ pixelBit3 = x1;
+ pixelBit4 = y1;
+ pixelBit5 = z1;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+
+ pixelBit6 = x2;
+ pixelBit7 = y2;
+ }
+
+ if (thickness == 8)
+ {
+ pixelBit8 = z2;
+ }
+
+ pixelNumber = ((pixelBit0 ) |
+ (pixelBit1 << 1) |
+ (pixelBit2 << 2) |
+ (pixelBit3 << 3) |
+ (pixelBit4 << 4) |
+ (pixelBit5 << 5) |
+ (pixelBit6 << 6) |
+ (pixelBit7 << 7) |
+ (pixelBit8 << 8));
+
+ return pixelNumber;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::AdjustPitchAlignment
+*
+* @brief
+* Adjusts pitch alignment for flipping surface
+*
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID AddrLib::AdjustPitchAlignment(
+ ADDR_SURFACE_FLAGS flags, ///< [in] Surface flags
+ UINT_32* pPitchAlign ///< [out] Pointer to pitch alignment
+ ) const
+{
+ // Display engine hardwires lower 5 bit of GRPH_PITCH to ZERO which means 32 pixel alignment
+ // Maybe it will be fixed in future but let's make it general for now.
+ if (flags.display || flags.overlay)
+ {
+ *pPitchAlign = PowTwoAlign(*pPitchAlign, 32);
+
+ if(flags.display)
+ {
+ *pPitchAlign = Max(m_minPitchAlignPixels, *pPitchAlign);
+ }
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrLib::PadDimensions
+*
+* @brief
+* Helper function to pad dimensions
+*
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID AddrLib::PadDimensions(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 numSamples, ///< [in] number of samples
+ ADDR_TILEINFO* pTileInfo, ///< [in/out] bank structure.
+ UINT_32 padDims, ///< [in] Dimensions to pad valid value 1,2,3
+ UINT_32 mipLevel, ///< [in] MipLevel
+ UINT_32* pPitch, ///< [in/out] pitch in pixels
+ UINT_32 pitchAlign, ///< [in] pitch alignment
+ UINT_32* pHeight, ///< [in/out] height in pixels
+ UINT_32 heightAlign, ///< [in] height alignment
+ UINT_32* pSlices, ///< [in/out] number of slices
+ UINT_32 sliceAlign ///< [in] number of slice alignment
+ ) const
+{
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+ ADDR_ASSERT(padDims <= 3);
+
+ //
+ // Override padding for mip levels
+ //
+ if (mipLevel > 0)
+ {
+ if (flags.cube)
+ {
+ // for cubemap, we only pad when client call with 6 faces as an identity
+ if (*pSlices > 1)
+ {
+ padDims = 3; // we should pad cubemap sub levels when we treat it as 3d texture
+ }
+ else
+ {
+ padDims = 2;
+ }
+ }
+ }
+
+ // Any possibilities that padDims is 0?
+ if (padDims == 0)
+ {
+ padDims = 3;
+ }
+
+ if (IsPow2(pitchAlign))
+ {
+ *pPitch = PowTwoAlign((*pPitch), pitchAlign);
+ }
+ else // add this code to pass unit test, r600 linear mode is not align bpp to pow2 for linear
+ {
+ *pPitch += pitchAlign - 1;
+ *pPitch /= pitchAlign;
+ *pPitch *= pitchAlign;
+ }
+
+ if (padDims > 1)
+ {
+ *pHeight = PowTwoAlign((*pHeight), heightAlign);
+ }
+
+ if (padDims > 2 || thickness > 1)
+ {
+ // for cubemap single face, we do not pad slices.
+ // if we pad it, the slice number should be set to 6 and current mip level > 1
+ if (flags.cube && (!m_configFlags.noCubeMipSlicesPad || flags.cubeAsArray))
+ {
+ *pSlices = NextPow2(*pSlices);
+ }
+
+ // normal 3D texture or arrays or cubemap has a thick mode? (Just pass unit test)
+ if (thickness > 1)
+ {
+ *pSlices = PowTwoAlign((*pSlices), sliceAlign);
+ }
+
+ }
+
+ HwlPadDimensions(tileMode,
+ bpp,
+ flags,
+ numSamples,
+ pTileInfo,
+ padDims,
+ mipLevel,
+ pPitch,
+ pitchAlign,
+ pHeight,
+ heightAlign,
+ pSlices,
+ sliceAlign);
+}
+
+
+/**
+***************************************************************************************************
+* AddrLib::HwlPreHandleBaseLvl3xPitch
+*
+* @brief
+* Pre-handler of 3x pitch (96 bit) adjustment
+*
+* @return
+* Expected pitch
+***************************************************************************************************
+*/
+UINT_32 AddrLib::HwlPreHandleBaseLvl3xPitch(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input
+ UINT_32 expPitch ///< [in] pitch
+ ) const
+{
+ ADDR_ASSERT(pIn->width == expPitch);
+ //
+ // If pitch is pre-multiplied by 3, we retrieve original one here to get correct miplevel size
+ //
+ if (AddrElemLib::IsExpand3x(pIn->format) &&
+ pIn->mipLevel == 0 &&
+ pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
+ {
+ expPitch /= 3;
+ expPitch = NextPow2(expPitch);
+ }
+
+ return expPitch;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::HwlPostHandleBaseLvl3xPitch
+*
+* @brief
+* Post-handler of 3x pitch adjustment
+*
+* @return
+* Expected pitch
+***************************************************************************************************
+*/
+UINT_32 AddrLib::HwlPostHandleBaseLvl3xPitch(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input
+ UINT_32 expPitch ///< [in] pitch
+ ) const
+{
+ //
+ // 96 bits surface of sub levels require element pitch of 32 bits instead
+ // So we just return pitch in 32 bit pixels without timing 3
+ //
+ if (AddrElemLib::IsExpand3x(pIn->format) &&
+ pIn->mipLevel == 0 &&
+ pIn->tileMode == ADDR_TM_LINEAR_ALIGNED)
+ {
+ expPitch *= 3;
+ }
+
+ return expPitch;
+}
+
+
+/**
+***************************************************************************************************
+* AddrLib::IsMacroTiled
+*
+* @brief
+* Check if the tile mode is macro tiled
+*
+* @return
+* TRUE if it is macro tiled (2D/2B/3D/3B)
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsMacroTiled(
+ AddrTileMode tileMode) ///< [in] tile mode
+{
+ return m_modeFlags[tileMode].isMacro;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::IsMacro3dTiled
+*
+* @brief
+* Check if the tile mode is 3D macro tiled
+*
+* @return
+* TRUE if it is 3D macro tiled
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsMacro3dTiled(
+ AddrTileMode tileMode) ///< [in] tile mode
+{
+ return m_modeFlags[tileMode].isMacro3d;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::IsMicroTiled
+*
+* @brief
+* Check if the tile mode is micro tiled
+*
+* @return
+* TRUE if micro tiled
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsMicroTiled(
+ AddrTileMode tileMode) ///< [in] tile mode
+{
+ return m_modeFlags[tileMode].isMicro;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::IsLinear
+*
+* @brief
+* Check if the tile mode is linear
+*
+* @return
+* TRUE if linear
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsLinear(
+ AddrTileMode tileMode) ///< [in] tile mode
+{
+ return m_modeFlags[tileMode].isLinear;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::IsPrtNoRotationTileMode
+*
+* @brief
+* Return TRUE if it is prt tile without rotation
+* @note
+* This function just used by CI
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsPrtNoRotationTileMode(
+ AddrTileMode tileMode)
+{
+ return m_modeFlags[tileMode].isPrtNoRotation;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::IsPrtTileMode
+*
+* @brief
+* Return TRUE if it is prt tile
+* @note
+* This function just used by CI
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::IsPrtTileMode(
+ AddrTileMode tileMode)
+{
+ return m_modeFlags[tileMode].isPrt;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::Bits2Number
+*
+* @brief
+* Cat a array of binary bit to a number
+*
+* @return
+* The number combined with the array of bits
+***************************************************************************************************
+*/
+UINT_32 AddrLib::Bits2Number(
+ UINT_32 bitNum, ///< [in] how many bits
+ ...) ///< [in] varaible bits value starting from MSB
+{
+ UINT_32 number = 0;
+ UINT_32 i;
+ va_list bits_ptr;
+
+ va_start(bits_ptr, bitNum);
+
+ for(i = 0; i < bitNum; i++)
+ {
+ number |= va_arg(bits_ptr, UINT_32);
+ number <<= 1;
+ }
+
+ number>>=1;
+
+ va_end(bits_ptr);
+
+ return number;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeMipLevel
+*
+* @brief
+* Compute mipmap level width/height/slices
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrLib::ComputeMipLevel(
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in/out] Input structure
+ ) const
+{
+ if (AddrElemLib::IsBlockCompressed(pIn->format))
+ {
+ if (pIn->mipLevel == 0)
+ {
+ // DXTn's level 0 must be multiple of 4
+ // But there are exceptions:
+ // 1. Internal surface creation in hostblt/vsblt/etc...
+ // 2. Runtime doesn't reject ATI1/ATI2 whose width/height are not multiple of 4
+ pIn->width = PowTwoAlign(pIn->width, 4);
+ pIn->height = PowTwoAlign(pIn->height, 4);
+ }
+ }
+
+ HwlComputeMipLevel(pIn);
+}
+
+/**
+***************************************************************************************************
+* AddrLib::DegradeBaseLevel
+*
+* @brief
+* Check if base level's tile mode can be degraded
+* @return
+* TRUE if degraded, also returns degraded tile mode (unchanged if not degraded)
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::DegradeBaseLevel(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure for surface info
+ AddrTileMode* pTileMode ///< [out] Degraded tile mode
+ ) const
+{
+ BOOL_32 degraded = FALSE;
+ AddrTileMode tileMode = pIn->tileMode;
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+ if (m_configFlags.degradeBaseLevel) // This is a global setting
+ {
+ if (pIn->flags.degrade4Space && // Degradation per surface
+ pIn->mipLevel == 0 &&
+ pIn->numSamples == 1 &&
+ IsMacroTiled(tileMode))
+ {
+ if (HwlDegradeBaseLevel(pIn))
+ {
+ *pTileMode = thickness == 1 ? ADDR_TM_1D_TILED_THIN1 : ADDR_TM_1D_TILED_THICK;
+ degraded = TRUE;
+ }
+ else if (thickness > 1)
+ {
+ // As in the following HwlComputeSurfaceInfo, thick modes may be degraded to
+ // thinner modes, we should re-evaluate whether the corresponding thinner modes
+ // need to be degraded. If so, we choose 1D thick mode instead.
+ tileMode = DegradeLargeThickTile(pIn->tileMode, pIn->bpp);
+ if (tileMode != pIn->tileMode)
+ {
+ ADDR_COMPUTE_SURFACE_INFO_INPUT input = *pIn;
+ input.tileMode = tileMode;
+ if (HwlDegradeBaseLevel(&input))
+ {
+ *pTileMode = ADDR_TM_1D_TILED_THICK;
+ degraded = TRUE;
+ }
+ }
+ }
+ }
+ }
+
+ return degraded;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::DegradeLargeThickTile
+*
+* @brief
+* Check if the thickness needs to be reduced if a tile is too large
+* @return
+* The degraded tile mode (unchanged if not degraded)
+***************************************************************************************************
+*/
+AddrTileMode AddrLib::DegradeLargeThickTile(
+ AddrTileMode tileMode,
+ UINT_32 bpp) const
+{
+ // Override tilemode
+ // When tile_width (8) * tile_height (8) * thickness * element_bytes is > row_size,
+ // it is better to just use THIN mode in this case
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+ if (thickness > 1 && m_configFlags.allowLargeThickTile == 0)
+ {
+ UINT_32 tileSize = MicroTilePixels * thickness * (bpp >> 3);
+
+ if (tileSize > m_rowSize)
+ {
+ switch (tileMode)
+ {
+ case ADDR_TM_2D_TILED_XTHICK:
+ if ((tileSize >> 1) <= m_rowSize)
+ {
+ tileMode = ADDR_TM_2D_TILED_THICK;
+ break;
+ }
+ // else fall through
+ case ADDR_TM_2D_TILED_THICK:
+ tileMode = ADDR_TM_2D_TILED_THIN1;
+ break;
+
+ case ADDR_TM_3D_TILED_XTHICK:
+ if ((tileSize >> 1) <= m_rowSize)
+ {
+ tileMode = ADDR_TM_3D_TILED_THICK;
+ break;
+ }
+ // else fall through
+ case ADDR_TM_3D_TILED_THICK:
+ tileMode = ADDR_TM_3D_TILED_THIN1;
+ break;
+
+ case ADDR_TM_PRT_TILED_THICK:
+ tileMode = ADDR_TM_PRT_TILED_THIN1;
+ break;
+
+ case ADDR_TM_PRT_2D_TILED_THICK:
+ tileMode = ADDR_TM_PRT_2D_TILED_THIN1;
+ break;
+
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ tileMode = ADDR_TM_PRT_3D_TILED_THIN1;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ return tileMode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::PostComputeMipLevel
+* @brief
+* Compute MipLevel info (including level 0) after surface adjustment
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::PostComputeMipLevel(
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in/out] Input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output structure
+ ) const
+{
+ // Mipmap including level 0 must be pow2 padded since either SI hw expects so or it is
+ // required by CFX for Hw Compatibility between NI and SI. Otherwise it is only needed for
+ // mipLevel > 0. Any h/w has different requirement should implement its own virtual function
+
+ if (pIn->flags.pow2Pad)
+ {
+ pIn->width = NextPow2(pIn->width);
+ pIn->height = NextPow2(pIn->height);
+ pIn->numSlices = NextPow2(pIn->numSlices);
+ }
+ else if (pIn->mipLevel > 0)
+ {
+ pIn->width = NextPow2(pIn->width);
+ pIn->height = NextPow2(pIn->height);
+
+ if (!pIn->flags.cube)
+ {
+ pIn->numSlices = NextPow2(pIn->numSlices);
+ }
+
+ // for cubemap, we keep its value at first
+ }
+
+ return ADDR_OK;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::HwlSetupTileCfg
+*
+* @brief
+* Map tile index to tile setting.
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::HwlSetupTileCfg(
+ INT_32 index, ///< [in] Tile index
+ INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI)
+ ADDR_TILEINFO* pInfo, ///< [out] Tile Info
+ AddrTileMode* pMode, ///< [out] Tile mode
+ AddrTileType* pType ///< [out] Tile type
+ ) const
+{
+ return ADDR_NOTSUPPORTED;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::HwlGetPipes
+*
+* @brief
+* Get number pipes
+* @return
+* num pipes
+***************************************************************************************************
+*/
+UINT_32 AddrLib::HwlGetPipes(
+ const ADDR_TILEINFO* pTileInfo ///< [in] Tile info
+ ) const
+{
+ //pTileInfo can be NULL when asic is 6xx and 8xx.
+ return m_pipes;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputeQbStereoInfo
+*
+* @brief
+* Get quad buffer stereo information
+* @return
+* TRUE if no error
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::ComputeQbStereoInfo(
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in/out] updated pOut+pStereoInfo
+ ) const
+{
+ BOOL_32 success = FALSE;
+
+ if (pOut->pStereoInfo)
+ {
+ ADDR_ASSERT(pOut->bpp >= 8);
+ ADDR_ASSERT((pOut->surfSize % pOut->baseAlign) == 0);
+
+ // Save original height
+ pOut->pStereoInfo->eyeHeight = pOut->height;
+
+ // Right offset
+ pOut->pStereoInfo->rightOffset = static_cast(pOut->surfSize);
+
+ pOut->pStereoInfo->rightSwizzle = HwlComputeQbStereoRightSwizzle(pOut);
+ // Double height
+ pOut->height <<= 1;
+ pOut->pixelHeight <<= 1;
+
+ // Double size
+ pOut->surfSize <<= 1;
+
+ // Right start address meets the base align since it is guaranteed by AddrLib
+
+ // 1D surface on SI may break this rule, but we can force it to meet by checking .qbStereo.
+ success = TRUE;
+ }
+
+ return success;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Element lib
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+/**
+***************************************************************************************************
+* AddrLib::Flt32ToColorPixel
+*
+* @brief
+* Convert a FLT_32 value to a depth/stencil pixel value
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::Flt32ToDepthPixel(
+ const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
+ ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ELEM_FLT32TODEPTHPIXEL_INPUT)) ||
+ (pOut->size != sizeof(ELEM_FLT32TODEPTHPIXEL_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ GetElemLib()->Flt32ToDepthPixel(pIn->format,
+ pIn->comps,
+ pOut->pPixel);
+ UINT_32 depthBase = 0;
+ UINT_32 stencilBase = 0;
+ UINT_32 depthBits = 0;
+ UINT_32 stencilBits = 0;
+
+ switch (pIn->format)
+ {
+ case ADDR_DEPTH_16:
+ depthBits = 16;
+ break;
+ case ADDR_DEPTH_X8_24:
+ case ADDR_DEPTH_8_24:
+ case ADDR_DEPTH_X8_24_FLOAT:
+ case ADDR_DEPTH_8_24_FLOAT:
+ depthBase = 8;
+ depthBits = 24;
+ stencilBits = 8;
+ break;
+ case ADDR_DEPTH_32_FLOAT:
+ depthBits = 32;
+ break;
+ case ADDR_DEPTH_X24_8_32_FLOAT:
+ depthBase = 8;
+ depthBits = 32;
+ stencilBits = 8;
+ break;
+ default:
+ break;
+ }
+
+ // Overwrite base since R800 has no "tileBase"
+ if (GetElemLib()->IsDepthStencilTilePlanar() == FALSE)
+ {
+ depthBase = 0;
+ stencilBase = 0;
+ }
+
+ depthBase *= 64;
+ stencilBase *= 64;
+
+ pOut->stencilBase = stencilBase;
+ pOut->depthBase = depthBase;
+ pOut->depthBits = depthBits;
+ pOut->stencilBits = stencilBits;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::Flt32ToColorPixel
+*
+* @brief
+* Convert a FLT_32 value to a red/green/blue/alpha pixel value
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::Flt32ToColorPixel(
+ const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
+ ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if ((pIn->size != sizeof(ELEM_FLT32TOCOLORPIXEL_INPUT)) ||
+ (pOut->size != sizeof(ELEM_FLT32TOCOLORPIXEL_OUTPUT)))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ GetElemLib()->Flt32ToColorPixel(pIn->format,
+ pIn->surfNum,
+ pIn->surfSwap,
+ pIn->comps,
+ pOut->pPixel);
+ }
+
+ return returnCode;
+}
+
+
+/**
+***************************************************************************************************
+* AddrLib::GetExportNorm
+*
+* @brief
+* Check one format can be EXPORT_NUM
+* @return
+* TRUE if EXPORT_NORM can be used
+***************************************************************************************************
+*/
+BOOL_32 AddrLib::GetExportNorm(
+ const ELEM_GETEXPORTNORM_INPUT* pIn) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ BOOL_32 enabled = FALSE;
+
+ if (GetFillSizeFieldsFlags() == TRUE)
+ {
+ if (pIn->size != sizeof(ELEM_GETEXPORTNORM_INPUT))
+ {
+ returnCode = ADDR_PARAMSIZEMISMATCH;
+ }
+ }
+
+ if (returnCode == ADDR_OK)
+ {
+ enabled = GetElemLib()->PixGetExportNorm(pIn->format,
+ pIn->num,
+ pIn->swap);
+ }
+
+ return enabled;
+}
+
+/**
+***************************************************************************************************
+* AddrLib::ComputePrtInfo
+*
+* @brief
+* Compute prt surface related info
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE AddrLib::ComputePrtInfo(
+ const ADDR_PRT_INFO_INPUT* pIn,
+ ADDR_PRT_INFO_OUTPUT* pOut) const
+{
+ ADDR_ASSERT(pOut != NULL);
+
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ UINT_32 expandX = 1;
+ UINT_32 expandY = 1;
+ AddrElemMode elemMode;
+
+ UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
+ &elemMode,
+ &expandX,
+ &expandY);
+
+ if (bpp <8 || bpp == 24 || bpp == 48 || bpp == 96 )
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+
+ UINT_32 numFrags = pIn->numFrags;
+ ADDR_ASSERT(numFrags <= 8);
+
+ UINT_32 tileWidth = 0;
+ UINT_32 tileHeight = 0;
+ if (returnCode == ADDR_OK)
+ {
+ // 3D texture without depth or 2d texture
+ if (pIn->baseMipDepth > 1 || pIn->baseMipHeight > 1)
+ {
+ if (bpp == 8)
+ {
+ tileWidth = 256;
+ tileHeight = 256;
+ }
+ else if (bpp == 16)
+ {
+ tileWidth = 256;
+ tileHeight = 128;
+ }
+ else if (bpp == 32)
+ {
+ tileWidth = 128;
+ tileHeight = 128;
+ }
+ else if (bpp == 64)
+ {
+ // assume it is BC1/4
+ tileWidth = 512;
+ tileHeight = 256;
+
+ if (elemMode == ADDR_UNCOMPRESSED)
+ {
+ tileWidth = 128;
+ tileHeight = 64;
+ }
+ }
+ else if (bpp == 128)
+ {
+ // assume it is BC2/3/5/6H/7
+ tileWidth = 256;
+ tileHeight = 256;
+
+ if (elemMode == ADDR_UNCOMPRESSED)
+ {
+ tileWidth = 64;
+ tileHeight = 64;
+ }
+ }
+
+ if (numFrags == 2)
+ {
+ tileWidth = tileWidth / 2;
+ }
+ else if (numFrags == 4)
+ {
+ tileWidth = tileWidth / 2;
+ tileHeight = tileHeight / 2;
+ }
+ else if (numFrags == 8)
+ {
+ tileWidth = tileWidth / 4;
+ tileHeight = tileHeight / 2;
+ }
+ }
+ else // 1d
+ {
+ tileHeight = 1;
+ if (bpp == 8)
+ {
+ tileWidth = 65536;
+ }
+ else if (bpp == 16)
+ {
+ tileWidth = 32768;
+ }
+ else if (bpp == 32)
+ {
+ tileWidth = 16384;
+ }
+ else if (bpp == 64)
+ {
+ tileWidth = 8192;
+ }
+ else if (bpp == 128)
+ {
+ tileWidth = 4096;
+ }
+ }
+ }
+
+ pOut->prtTileWidth = tileWidth;
+ pOut->prtTileHeight = tileHeight;
+
+ return returnCode;
+}
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.h b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.h
new file mode 100644
index 00000000000..43c55ff32ff
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrlib.h
@@ -0,0 +1,695 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrlib.h
+* @brief Contains the AddrLib base class definition.
+***************************************************************************************************
+*/
+
+#ifndef __ADDR_LIB_H__
+#define __ADDR_LIB_H__
+
+
+#include "addrinterface.h"
+#include "addrobject.h"
+#include "addrelemlib.h"
+
+#if BRAHMA_BUILD
+#include "amdgpu_id.h"
+#else
+#include "atiid.h"
+#endif
+
+#ifndef CIASICIDGFXENGINE_R600
+#define CIASICIDGFXENGINE_R600 0x00000006
+#endif
+
+#ifndef CIASICIDGFXENGINE_R800
+#define CIASICIDGFXENGINE_R800 0x00000008
+#endif
+
+#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
+#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
+#endif
+
+#ifndef CIASICIDGFXENGINE_SEAISLAND
+#define CIASICIDGFXENGINE_SEAISLAND 0x0000000B
+#endif
+/**
+***************************************************************************************************
+* @brief Neutral enums that define pipeinterleave
+***************************************************************************************************
+*/
+enum AddrPipeInterleave
+{
+ ADDR_PIPEINTERLEAVE_256B = 256,
+ ADDR_PIPEINTERLEAVE_512B = 512,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define DRAM row size
+***************************************************************************************************
+*/
+enum AddrRowSize
+{
+ ADDR_ROWSIZE_1KB = 1024,
+ ADDR_ROWSIZE_2KB = 2048,
+ ADDR_ROWSIZE_4KB = 4096,
+ ADDR_ROWSIZE_8KB = 8192,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define bank interleave
+***************************************************************************************************
+*/
+enum AddrBankInterleave
+{
+ ADDR_BANKINTERLEAVE_1 = 1,
+ ADDR_BANKINTERLEAVE_2 = 2,
+ ADDR_BANKINTERLEAVE_4 = 4,
+ ADDR_BANKINTERLEAVE_8 = 8,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define MGPU chip tile size
+***************************************************************************************************
+*/
+enum AddrChipTileSize
+{
+ ADDR_CHIPTILESIZE_16 = 16,
+ ADDR_CHIPTILESIZE_32 = 32,
+ ADDR_CHIPTILESIZE_64 = 64,
+ ADDR_CHIPTILESIZE_128 = 128,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define shader engine tile size
+***************************************************************************************************
+*/
+enum AddrEngTileSize
+{
+ ADDR_SE_TILESIZE_16 = 16,
+ ADDR_SE_TILESIZE_32 = 32,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define bank swap size
+***************************************************************************************************
+*/
+enum AddrBankSwapSize
+{
+ ADDR_BANKSWAP_128B = 128,
+ ADDR_BANKSWAP_256B = 256,
+ ADDR_BANKSWAP_512B = 512,
+ ADDR_BANKSWAP_1KB = 1024,
+};
+
+/**
+***************************************************************************************************
+* @brief Neutral enums that define bank swap size
+***************************************************************************************************
+*/
+enum AddrSampleSplitSize
+{
+ ADDR_SAMPLESPLIT_1KB = 1024,
+ ADDR_SAMPLESPLIT_2KB = 2048,
+ ADDR_SAMPLESPLIT_4KB = 4096,
+ ADDR_SAMPLESPLIT_8KB = 8192,
+};
+
+/**
+***************************************************************************************************
+* @brief Flags for AddrTileMode
+***************************************************************************************************
+*/
+struct AddrTileModeFlags
+{
+ UINT_32 thickness : 4;
+ UINT_32 isLinear : 1;
+ UINT_32 isMicro : 1;
+ UINT_32 isMacro : 1;
+ UINT_32 isMacro3d : 1;
+ UINT_32 isPrt : 1;
+ UINT_32 isPrtNoRotation : 1;
+ UINT_32 isBankSwapped : 1;
+};
+
+/**
+***************************************************************************************************
+* @brief This class contains asic independent address lib functionalities
+***************************************************************************************************
+*/
+class AddrLib : public AddrObject
+{
+public:
+ virtual ~AddrLib();
+
+ static ADDR_E_RETURNCODE Create(
+ const ADDR_CREATE_INPUT* pCreateInfo, ADDR_CREATE_OUTPUT* pCreateOut);
+
+ /// Pair of Create
+ VOID Destroy()
+ {
+ delete this;
+ }
+
+ static AddrLib* GetAddrLib(
+ ADDR_HANDLE hLib);
+
+ /// Returns AddrLib version (from compiled binary instead include file)
+ UINT_32 GetVersion()
+ {
+ return m_version;
+ }
+
+ /// Returns asic chip family name defined by AddrLib
+ AddrChipFamily GetAddrChipFamily()
+ {
+ return m_chipFamily;
+ }
+
+ /// Returns tileIndex support
+ BOOL_32 UseTileIndex(INT_32 index) const
+ {
+ return m_configFlags.useTileIndex && (index != TileIndexInvalid);
+ }
+
+ /// Returns combined swizzle support
+ BOOL_32 UseCombinedSwizzle() const
+ {
+ return m_configFlags.useCombinedSwizzle;
+ }
+
+ //
+ // Interface stubs
+ //
+ ADDR_E_RETURNCODE ComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeSurfaceAddrFromCoord(
+ const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeSurfaceCoordFromAddr(
+ const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeSliceTileSwizzle(
+ const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ExtractBankPipeSwizzle(
+ const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+ ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE CombineBankPipeSwizzle(
+ const ADDR_COMBINE_BANKPIPE_SWIZZLE_INPUT* pIn,
+ ADDR_COMBINE_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeBaseSwizzle(
+ const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeFmaskInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+ ADDR_E_RETURNCODE ComputeFmaskAddrFromCoord(
+ const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeFmaskCoordFromAddr(
+ const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ConvertTileInfoToHW(
+ const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ConvertTileIndex(
+ const ADDR_CONVERT_TILEINDEX_INPUT* pIn,
+ ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ConvertTileIndex1(
+ const ADDR_CONVERT_TILEINDEX1_INPUT* pIn,
+ ADDR_CONVERT_TILEINDEX_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE GetTileIndex(
+ const ADDR_GET_TILEINDEX_INPUT* pIn,
+ ADDR_GET_TILEINDEX_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeHtileInfo(
+ const ADDR_COMPUTE_HTILE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeCmaskInfo(
+ const ADDR_COMPUTE_CMASK_INFO_INPUT* pIn,
+ ADDR_COMPUTE_CMASK_INFO_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeDccInfo(
+ const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+ ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeHtileAddrFromCoord(
+ const ADDR_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeCmaskAddrFromCoord(
+ const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeHtileCoordFromAddr(
+ const ADDR_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputeCmaskCoordFromAddr(
+ const ADDR_COMPUTE_CMASK_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_CMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE ComputePrtInfo(
+ const ADDR_PRT_INFO_INPUT* pIn,
+ ADDR_PRT_INFO_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE Flt32ToDepthPixel(
+ const ELEM_FLT32TODEPTHPIXEL_INPUT* pIn,
+ ELEM_FLT32TODEPTHPIXEL_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE Flt32ToColorPixel(
+ const ELEM_FLT32TOCOLORPIXEL_INPUT* pIn,
+ ELEM_FLT32TOCOLORPIXEL_OUTPUT* pOut) const;
+
+ BOOL_32 GetExportNorm(
+ const ELEM_GETEXPORTNORM_INPUT* pIn) const;
+
+protected:
+ AddrLib(); // Constructor is protected
+ AddrLib(const AddrClient* pClient);
+
+ /// Pure Virtual function for Hwl computing surface info
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+ /// Pure Virtual function for Hwl computing surface address from coord
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
+ const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
+
+ /// Pure Virtual function for Hwl computing surface coord from address
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
+ const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const = 0;
+
+ /// Pure Virtual function for Hwl computing surface tile swizzle
+ virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
+ const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const = 0;
+
+ /// Pure Virtual function for Hwl extracting bank/pipe swizzle from base256b
+ virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
+ const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+ ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const = 0;
+
+ /// Pure Virtual function for Hwl combining bank/pipe swizzle
+ virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
+ UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO* pTileInfo,
+ UINT_64 baseAddr, UINT_32* pTileSwizzle) const = 0;
+
+ /// Pure Virtual function for Hwl computing base swizzle
+ virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
+ const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const = 0;
+
+ /// Pure Virtual function for Hwl computing HTILE base align
+ virtual UINT_32 HwlComputeHtileBaseAlign(
+ BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const = 0;
+
+ /// Pure Virtual function for Hwl computing HTILE bpp
+ virtual UINT_32 HwlComputeHtileBpp(
+ BOOL_32 isWidth8, BOOL_32 isHeight8) const = 0;
+
+ /// Pure Virtual function for Hwl computing HTILE bytes
+ virtual UINT_64 HwlComputeHtileBytes(
+ UINT_32 pitch, UINT_32 height, UINT_32 bpp,
+ BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const = 0;
+
+ /// Pure Virtual function for Hwl computing FMASK info
+ virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) = 0;
+
+ /// Pure Virtual function for Hwl FMASK address from coord
+ virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
+ const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const = 0;
+
+ /// Pure Virtual function for Hwl FMASK coord from address
+ virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
+ const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const = 0;
+
+ /// Pure Virtual function for Hwl convert tile info from real value to HW value
+ virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
+ const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const = 0;
+
+ /// Pure Virtual function for Hwl compute mipmap info
+ virtual BOOL_32 HwlComputeMipLevel(
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;
+
+ /// Pure Virtual function for Hwl compute max cmask blockMax value
+ virtual BOOL_32 HwlGetMaxCmaskBlockMax() const = 0;
+
+ /// Pure Virtual function for Hwl compute fmask bits
+ virtual UINT_32 HwlComputeFmaskBits(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+ UINT_32* pNumSamples) const = 0;
+
+ /// Virtual function to get index (not pure then no need to implement this in all hwls
+ virtual ADDR_E_RETURNCODE HwlGetTileIndex(
+ const ADDR_GET_TILEINDEX_INPUT* pIn,
+ ADDR_GET_TILEINDEX_OUTPUT* pOut) const
+ {
+ return ADDR_NOTSUPPORTED;
+ }
+
+ /// Virtual function for Hwl to compute Dcc info
+ virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
+ const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+ ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const
+ {
+ return ADDR_NOTSUPPORTED;
+ }
+
+ /// Virtual function to get cmask address for tc compatible cmask
+ virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
+ const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const
+ {
+ return ADDR_NOTSUPPORTED;
+ }
+ // Compute attributes
+
+ // HTILE
+ UINT_32 ComputeHtileInfo(
+ ADDR_HTILE_FLAGS flags,
+ UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices,
+ BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+ ADDR_TILEINFO* pTileInfo,
+ UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pHtileBytes,
+ UINT_32* pMacroWidth = NULL, UINT_32* pMacroHeight = NULL,
+ UINT_64* pSliceSize = NULL, UINT_32* pBaseAlign = NULL) const;
+
+ // CMASK
+ ADDR_E_RETURNCODE ComputeCmaskInfo(
+ ADDR_CMASK_FLAGS flags,
+ UINT_32 pitchIn, UINT_32 heightIn, UINT_32 numSlices, BOOL_32 isLinear,
+ ADDR_TILEINFO* pTileInfo, UINT_32* pPitchOut, UINT_32* pHeightOut, UINT_64* pCmaskBytes,
+ UINT_32* pMacroWidth, UINT_32* pMacroHeight, UINT_64* pSliceSize = NULL,
+ UINT_32* pBaseAlign = NULL, UINT_32* pBlockMax = NULL) const;
+
+ virtual VOID HwlComputeTileDataWidthAndHeightLinear(
+ UINT_32* pMacroWidth, UINT_32* pMacroHeight,
+ UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
+
+ // CMASK & HTILE addressing
+ virtual UINT_64 HwlComputeXmaskAddrFromCoord(
+ UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice,
+ UINT_32 numSlices, UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8,
+ BOOL_32 isHeight8, ADDR_TILEINFO* pTileInfo,
+ UINT_32* bitPosition) const;
+
+ virtual VOID HwlComputeXmaskCoordFromAddr(
+ UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+ UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+ ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
+
+ // Surface mipmap
+ VOID ComputeMipLevel(
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+ /// Pure Virtual function for Hwl checking degrade for base level
+ virtual BOOL_32 HwlDegradeBaseLevel(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const = 0;
+
+ virtual BOOL_32 HwlOverrideTileMode(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ AddrTileMode* pTileMode,
+ AddrTileType* pTileType) const
+ {
+ // not supported in hwl layer, FALSE for not-overrided
+ return FALSE;
+ }
+
+ AddrTileMode DegradeLargeThickTile(AddrTileMode tileMode, UINT_32 bpp) const;
+
+ VOID PadDimensions(
+ AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+ UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
+ UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
+ UINT_32* pSlices, UINT_32 sliceAlign) const;
+
+ virtual VOID HwlPadDimensions(
+ AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+ UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
+ UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
+ UINT_32* pSlices, UINT_32 sliceAlign) const
+ {
+ }
+
+ //
+ // Addressing shared for linear/1D tiling
+ //
+ UINT_64 ComputeSurfaceAddrFromCoordLinear(
+ UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
+ UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+ UINT_32* pBitPosition) const;
+
+ VOID ComputeSurfaceCoordFromAddrLinear(
+ UINT_64 addr, UINT_32 bitPosition, UINT_32 bpp,
+ UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+ UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
+
+ VOID ComputeSurfaceCoordFromAddrMicroTiled(
+ UINT_64 addr, UINT_32 bitPosition,
+ UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+ AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+ UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
+ AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
+
+ UINT_32 ComputePixelIndexWithinMicroTile(
+ UINT_32 x, UINT_32 y, UINT_32 z,
+ UINT_32 bpp, AddrTileMode tileMode, AddrTileType microTileType) const;
+
+ /// Pure Virtual function for Hwl computing coord from offset inside micro tile
+ virtual VOID HwlComputePixelCoordFromOffset(
+ UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
+ AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+ UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
+ AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const = 0;
+
+ //
+ // Addressing shared by all
+ //
+ virtual UINT_32 HwlGetPipes(
+ const ADDR_TILEINFO* pTileInfo) const;
+
+ UINT_32 ComputePipeFromAddr(
+ UINT_64 addr, UINT_32 numPipes) const;
+
+ /// Pure Virtual function for Hwl computing pipe from coord
+ virtual UINT_32 ComputePipeFromCoord(
+ UINT_32 x, UINT_32 y, UINT_32 slice, AddrTileMode tileMode,
+ UINT_32 pipeSwizzle, BOOL_32 flags, ADDR_TILEINFO* pTileInfo) const = 0;
+
+ /// Pure Virtual function for Hwl computing coord Y for 8 pipe cmask/htile
+ virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
+ UINT_32 pipe, UINT_32 x) const = 0;
+
+ //
+ // Initialization
+ //
+ /// Pure Virtual function for Hwl computing internal global parameters from h/w registers
+ virtual BOOL_32 HwlInitGlobalParams(
+ const ADDR_CREATE_INPUT* pCreateIn) = 0;
+
+ /// Pure Virtual function for Hwl converting chip family
+ virtual AddrChipFamily HwlConvertChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision) = 0;
+
+ //
+ // Misc helper
+ //
+ static const AddrTileModeFlags m_modeFlags[ADDR_TM_COUNT];
+
+ static UINT_32 ComputeSurfaceThickness(
+ AddrTileMode tileMode);
+
+ // Checking tile mode
+ static BOOL_32 IsMacroTiled(AddrTileMode tileMode);
+ static BOOL_32 IsMacro3dTiled(AddrTileMode tileMode);
+ static BOOL_32 IsLinear(AddrTileMode tileMode);
+ static BOOL_32 IsMicroTiled(AddrTileMode tileMode);
+ static BOOL_32 IsPrtTileMode(AddrTileMode tileMode);
+ static BOOL_32 IsPrtNoRotationTileMode(AddrTileMode tileMode);
+
+ static UINT_32 Bits2Number(UINT_32 bitNum,...);
+
+ static UINT_32 GetNumFragments(UINT_32 numSamples, UINT_32 numFrags)
+ {
+ return numFrags != 0 ? numFrags : Max(1u, numSamples);
+ }
+
+ /// Returns pointer of AddrElemLib
+ AddrElemLib* GetElemLib() const
+ {
+ return m_pElemLib;
+ }
+
+ /// Return TRUE if tile info is needed
+ BOOL_32 UseTileInfo() const
+ {
+ return !m_configFlags.ignoreTileInfo;
+ }
+
+ /// Returns fillSizeFields flag
+ UINT_32 GetFillSizeFieldsFlags() const
+ {
+ return m_configFlags.fillSizeFields;
+ }
+
+ /// Adjusts pitch alignment for flipping surface
+ VOID AdjustPitchAlignment(
+ ADDR_SURFACE_FLAGS flags, UINT_32* pPitchAlign) const;
+
+ /// Overwrite tile config according to tile index
+ virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
+ INT_32 index, INT_32 macroModeIndex,
+ ADDR_TILEINFO* pInfo, AddrTileMode* mode = NULL, AddrTileType* type = NULL) const;
+
+ /// Overwrite macro tile config according to tile index
+ virtual INT_32 HwlComputeMacroModeIndex(
+ INT_32 index, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
+ ADDR_TILEINFO* pTileInfo, AddrTileMode *pTileMode = NULL, AddrTileType *pTileType = NULL
+ ) const
+ {
+ return TileIndexNoMacroIndex;
+ }
+
+ /// Pre-handler of 3x pitch (96 bit) adjustment
+ virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+ /// Post-handler of 3x pitch adjustment
+ virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+ /// Check miplevel after surface adjustment
+ ADDR_E_RETURNCODE PostComputeMipLevel(
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ /// Quad buffer stereo support, has its implementation in ind. layer
+ virtual BOOL_32 ComputeQbStereoInfo(
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ /// Pure virutual function to compute stereo bank swizzle for right eye
+ virtual UINT_32 HwlComputeQbStereoRightSwizzle(
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+private:
+ // Disallow the copy constructor
+ AddrLib(const AddrLib& a);
+
+ // Disallow the assignment operator
+ AddrLib& operator=(const AddrLib& a);
+
+ VOID SetAddrChipFamily(UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+ UINT_32 ComputeCmaskBaseAlign(
+ ADDR_CMASK_FLAGS flags, ADDR_TILEINFO* pTileInfo) const;
+
+ UINT_64 ComputeCmaskBytes(
+ UINT_32 pitch, UINT_32 height, UINT_32 numSlices) const;
+
+ //
+ // CMASK/HTILE shared methods
+ //
+ VOID ComputeTileDataWidthAndHeight(
+ UINT_32 bpp, UINT_32 cacheBits, ADDR_TILEINFO* pTileInfo,
+ UINT_32* pMacroWidth, UINT_32* pMacroHeight) const;
+
+ UINT_32 ComputeXmaskCoordYFromPipe(
+ UINT_32 pipe, UINT_32 x) const;
+
+ VOID SetMinPitchAlignPixels(UINT_32 minPitchAlignPixels);
+
+ BOOL_32 DegradeBaseLevel(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, AddrTileMode* pTileMode) const;
+
+protected:
+ AddrLibClass m_class; ///< Store class type (HWL type)
+
+ AddrChipFamily m_chipFamily; ///< Chip family translated from the one in atiid.h
+
+ UINT_32 m_chipRevision; ///< Revision id from xxx_id.h
+
+ UINT_32 m_version; ///< Current version
+
+ //
+ // Global parameters
+ //
+ ADDR_CONFIG_FLAGS m_configFlags; ///< Global configuration flags. Note this is setup by
+ /// AddrLib instead of Client except forceLinearAligned
+
+ UINT_32 m_pipes; ///< Number of pipes
+ UINT_32 m_banks; ///< Number of banks
+ /// For r800 this is MC_ARB_RAMCFG.NOOFBANK
+ /// Keep it here to do default parameter calculation
+
+ UINT_32 m_pipeInterleaveBytes;
+ ///< Specifies the size of contiguous address space
+ /// within each tiling pipe when making linear
+ /// accesses. (Formerly Group Size)
+
+ UINT_32 m_rowSize; ///< DRAM row size, in bytes
+
+ UINT_32 m_minPitchAlignPixels; ///< Minimum pitch alignment in pixels
+ UINT_32 m_maxSamples; ///< Max numSamples
+private:
+ AddrElemLib* m_pElemLib; ///< Element Lib pointer
+};
+
+AddrLib* AddrSIHwlInit (const AddrClient* pClient);
+AddrLib* AddrCIHwlInit (const AddrClient* pClient);
+
+#endif
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.cpp b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.cpp
new file mode 100644
index 00000000000..863a252fcf1
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.cpp
@@ -0,0 +1,246 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrobject.cpp
+* @brief Contains the AddrObject base class implementation.
+***************************************************************************************************
+*/
+
+#include "addrinterface.h"
+#include "addrobject.h"
+
+/**
+***************************************************************************************************
+* AddrObject::AddrObject
+*
+* @brief
+* Constructor for the AddrObject class.
+***************************************************************************************************
+*/
+AddrObject::AddrObject()
+{
+ m_client.handle = NULL;
+ m_client.callbacks.allocSysMem = NULL;
+ m_client.callbacks.freeSysMem = NULL;
+ m_client.callbacks.debugPrint = NULL;
+}
+
+/**
+***************************************************************************************************
+* AddrObject::AddrObject
+*
+* @brief
+* Constructor for the AddrObject class.
+***************************************************************************************************
+*/
+AddrObject::AddrObject(const AddrClient* pClient)
+{
+ m_client = *pClient;
+}
+
+/**
+***************************************************************************************************
+* AddrObject::~AddrObject
+*
+* @brief
+* Destructor for the AddrObject class.
+***************************************************************************************************
+*/
+AddrObject::~AddrObject()
+{
+}
+
+/**
+***************************************************************************************************
+* AddrObject::ClientAlloc
+*
+* @brief
+* Calls instanced allocSysMem inside AddrClient
+***************************************************************************************************
+*/
+VOID* AddrObject::ClientAlloc(
+ size_t objSize, ///< [in] Size to allocate
+ const AddrClient* pClient) ///< [in] Client pointer
+{
+ VOID* pObjMem = NULL;
+
+ if (pClient->callbacks.allocSysMem != NULL)
+ {
+ ADDR_ALLOCSYSMEM_INPUT allocInput = {0};
+
+ allocInput.size = sizeof(ADDR_ALLOCSYSMEM_INPUT);
+ allocInput.flags.value = 0;
+ allocInput.sizeInBytes = static_cast(objSize);
+ allocInput.hClient = pClient->handle;
+
+ pObjMem = pClient->callbacks.allocSysMem(&allocInput);
+ }
+
+ return pObjMem;
+}
+
+/**
+***************************************************************************************************
+* AddrObject::AddrMalloc
+*
+* @brief
+* A wrapper of ClientAlloc
+***************************************************************************************************
+*/
+VOID* AddrObject::AddrMalloc(
+ size_t objSize) const ///< [in] Size to allocate
+{
+ return ClientAlloc(objSize, &m_client);;
+}
+
+/**
+***************************************************************************************************
+* AddrObject::ClientFree
+*
+* @brief
+* Calls freeSysMem inside AddrClient
+***************************************************************************************************
+*/
+VOID AddrObject::ClientFree(
+ VOID* pObjMem, ///< [in] User virtual address to free.
+ const AddrClient* pClient) ///< [in] Client pointer
+{
+ if (pClient->callbacks.freeSysMem != NULL)
+ {
+ if (pObjMem != NULL)
+ {
+ ADDR_FREESYSMEM_INPUT freeInput = {0};
+
+ freeInput.size = sizeof(ADDR_FREESYSMEM_INPUT);
+ freeInput.hClient = pClient->handle;
+ freeInput.pVirtAddr = pObjMem;
+
+ pClient->callbacks.freeSysMem(&freeInput);
+ }
+ }
+}
+
+/**
+***************************************************************************************************
+* AddrObject::AddrFree
+*
+* @brief
+* A wrapper of ClientFree
+***************************************************************************************************
+*/
+VOID AddrObject::AddrFree(
+ VOID* pObjMem) const ///< [in] User virtual address to free.
+{
+ ClientFree(pObjMem, &m_client);
+}
+
+/**
+***************************************************************************************************
+* AddrObject::operator new
+*
+* @brief
+* Allocates memory needed for AddrObject object. (with ADDR_CLIENT_HANDLE)
+*
+* @return
+* Returns NULL if unsuccessful.
+***************************************************************************************************
+*/
+VOID* AddrObject::operator new(
+ size_t objSize, ///< [in] Size to allocate
+ const AddrClient* pClient) ///< [in] Client pointer
+{
+ return ClientAlloc(objSize, pClient);
+}
+
+
+/**
+***************************************************************************************************
+* AddrObject::operator delete
+*
+* @brief
+* Frees AddrObject object memory.
+***************************************************************************************************
+*/
+VOID AddrObject::operator delete(
+ VOID* pObjMem, ///< [in] User virtual address to free.
+ const AddrClient* pClient) ///< [in] Client handle
+{
+ ClientFree(pObjMem, pClient);
+}
+
+/**
+***************************************************************************************************
+* AddrObject::operator delete
+*
+* @brief
+* Frees AddrObject object memory.
+***************************************************************************************************
+*/
+VOID AddrObject::operator delete(
+ VOID* pObjMem) ///< [in] User virtual address to free.
+{
+ AddrObject* pObj = static_cast(pObjMem);
+ ClientFree(pObjMem, &pObj->m_client);
+}
+
+/**
+***************************************************************************************************
+* AddrObject::DebugPrint
+*
+* @brief
+* Print debug message
+*
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID AddrObject::DebugPrint(
+ const CHAR* pDebugString, ///< [in] Debug string
+ ...) const
+{
+#if DEBUG
+ if (m_client.callbacks.debugPrint != NULL)
+ {
+ va_list ap;
+
+ va_start(ap, pDebugString);
+
+ ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};
+
+ debugPrintInput.size = sizeof(ADDR_DEBUGPRINT_INPUT);
+ debugPrintInput.pDebugString = const_cast(pDebugString);
+ debugPrintInput.hClient = m_client.handle;
+ va_copy(debugPrintInput.ap, ap);
+
+ m_client.callbacks.debugPrint(&debugPrintInput);
+
+ va_end(ap);
+ }
+#endif
+}
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.h b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.h
new file mode 100644
index 00000000000..35400885afe
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/core/addrobject.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file addrobject.h
+* @brief Contains the AddrObject base class definition.
+***************************************************************************************************
+*/
+
+#ifndef __ADDR_OBJECT_H__
+#define __ADDR_OBJECT_H__
+
+#include "addrtypes.h"
+#include "addrcommon.h"
+
+/**
+***************************************************************************************************
+* @brief This structure contains client specific data
+***************************************************************************************************
+*/
+struct AddrClient
+{
+ ADDR_CLIENT_HANDLE handle;
+ ADDR_CALLBACKS callbacks;
+};
+/**
+***************************************************************************************************
+* @brief This class is the base class for all ADDR class objects.
+***************************************************************************************************
+*/
+class AddrObject
+{
+public:
+ AddrObject();
+ AddrObject(const AddrClient* pClient);
+ virtual ~AddrObject();
+
+ VOID* operator new(size_t size, const AddrClient* pClient);
+ VOID operator delete(VOID* pObj, const AddrClient* pClient);
+ VOID operator delete(VOID* pObj);
+ VOID* AddrMalloc(size_t size) const;
+ VOID AddrFree(VOID* pObj) const;
+
+ VOID DebugPrint(
+ const CHAR* pDebugString,
+ ...) const;
+
+ const AddrClient* GetClient() const {return &m_client;}
+
+protected:
+ AddrClient m_client;
+
+private:
+ static VOID* ClientAlloc(size_t size, const AddrClient* pClient);
+ static VOID ClientFree(VOID* pObj, const AddrClient* pClient);
+
+ // disallow the copy constructor
+ AddrObject(const AddrObject& a);
+
+ // disallow the assignment operator
+ AddrObject& operator=(const AddrObject& a);
+};
+
+#endif
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/inc/chip/r800/si_gb_reg.h b/src/gallium/winsys/amdgpu/drm/addrlib/inc/chip/r800/si_gb_reg.h
new file mode 100644
index 00000000000..cf67f602bdf
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/inc/chip/r800/si_gb_reg.h
@@ -0,0 +1,155 @@
+#if !defined (__SI_GB_REG_H__)
+#define __SI_GB_REG_H__
+
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+//
+// Make sure the necessary endian defines are there.
+//
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
+#endif
+
+/*
+ * GB_ADDR_CONFIG struct
+ */
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _GB_ADDR_CONFIG_T {
+ unsigned int num_pipes : 3;
+ unsigned int : 1;
+ unsigned int pipe_interleave_size : 3;
+ unsigned int : 1;
+ unsigned int bank_interleave_size : 3;
+ unsigned int : 1;
+ unsigned int num_shader_engines : 2;
+ unsigned int : 2;
+ unsigned int shader_engine_tile_size : 3;
+ unsigned int : 1;
+ unsigned int num_gpus : 3;
+ unsigned int : 1;
+ unsigned int multi_gpu_tile_size : 2;
+ unsigned int : 2;
+ unsigned int row_size : 2;
+ unsigned int num_lower_pipes : 1;
+ unsigned int : 1;
+ } GB_ADDR_CONFIG_T;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _GB_ADDR_CONFIG_T {
+ unsigned int : 1;
+ unsigned int num_lower_pipes : 1;
+ unsigned int row_size : 2;
+ unsigned int : 2;
+ unsigned int multi_gpu_tile_size : 2;
+ unsigned int : 1;
+ unsigned int num_gpus : 3;
+ unsigned int : 1;
+ unsigned int shader_engine_tile_size : 3;
+ unsigned int : 2;
+ unsigned int num_shader_engines : 2;
+ unsigned int : 1;
+ unsigned int bank_interleave_size : 3;
+ unsigned int : 1;
+ unsigned int pipe_interleave_size : 3;
+ unsigned int : 1;
+ unsigned int num_pipes : 3;
+ } GB_ADDR_CONFIG_T;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ GB_ADDR_CONFIG_T f;
+} GB_ADDR_CONFIG;
+
+#if defined(LITTLEENDIAN_CPU)
+
+ typedef struct _GB_TILE_MODE_T {
+ unsigned int micro_tile_mode : 2;
+ unsigned int array_mode : 4;
+ unsigned int pipe_config : 5;
+ unsigned int tile_split : 3;
+ unsigned int bank_width : 2;
+ unsigned int bank_height : 2;
+ unsigned int macro_tile_aspect : 2;
+ unsigned int num_banks : 2;
+ unsigned int micro_tile_mode_new : 3;
+ unsigned int sample_split : 2;
+ unsigned int : 5;
+ } GB_TILE_MODE_T;
+
+ typedef struct _GB_MACROTILE_MODE_T {
+ unsigned int bank_width : 2;
+ unsigned int bank_height : 2;
+ unsigned int macro_tile_aspect : 2;
+ unsigned int num_banks : 2;
+ unsigned int : 24;
+ } GB_MACROTILE_MODE_T;
+
+#elif defined(BIGENDIAN_CPU)
+
+ typedef struct _GB_TILE_MODE_T {
+ unsigned int : 5;
+ unsigned int sample_split : 2;
+ unsigned int micro_tile_mode_new : 3;
+ unsigned int num_banks : 2;
+ unsigned int macro_tile_aspect : 2;
+ unsigned int bank_height : 2;
+ unsigned int bank_width : 2;
+ unsigned int tile_split : 3;
+ unsigned int pipe_config : 5;
+ unsigned int array_mode : 4;
+ unsigned int micro_tile_mode : 2;
+ } GB_TILE_MODE_T;
+
+ typedef struct _GB_MACROTILE_MODE_T {
+ unsigned int : 24;
+ unsigned int num_banks : 2;
+ unsigned int macro_tile_aspect : 2;
+ unsigned int bank_height : 2;
+ unsigned int bank_width : 2;
+ } GB_MACROTILE_MODE_T;
+
+#endif
+
+typedef union {
+ unsigned int val : 32;
+ GB_TILE_MODE_T f;
+} GB_TILE_MODE;
+
+typedef union {
+ unsigned int val : 32;
+ GB_MACROTILE_MODE_T f;
+} GB_MACROTILE_MODE;
+
+#endif
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/inc/lnx_common_defs.h b/src/gallium/winsys/amdgpu/drm/addrlib/inc/lnx_common_defs.h
new file mode 100644
index 00000000000..61540f49b7e
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/inc/lnx_common_defs.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+#ifndef _lnx_common_defs_h_
+#define _lnx_common_defs_h_
+
+#if DBG
+#include // We do not have any choice: need variable
+ // number of parameters support for debug
+ // build.
+#endif // #if DBG
+
+//
+// -------------- External functions from Linux kernel driver ----------------
+//
+// Note: The definitions/declararions below must match the original ones.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef unsigned long __ke_size_t; // as it is defined in firegl_public.h
+typedef int __kernel_ptrdiff_t; // as it is defined in posix_types.h
+
+
+#if !defined(ATI_API_CALL)
+#define ATI_API_CALL __attribute__((regparm(0)))
+#endif
+
+extern void * ATI_API_CALL __ke_memset(void* s, int c, __ke_size_t count);
+extern void * ATI_API_CALL __ke_memcpy(void* d, const void* s, __ke_size_t count);
+extern ATI_API_CALL __ke_size_t __ke_strlen(const char *s);
+extern char* ATI_API_CALL __ke_strcpy(char* d, const char* s);
+extern char* ATI_API_CALL __ke_strncpy(char* d, const char* s, __ke_size_t count);
+extern void __ke_printk(const char* fmt, ...);
+
+extern int ATI_API_CALL __ke_snprintf(char* buf, __ke_size_t size, const char* fmt, ...);
+extern int ATI_API_CALL KCL_CopyFromUserSpace(void* to, const void* from, __ke_size_t size);
+extern int ATI_API_CALL KCL_CopyToUserSpace(void* to, const void* from, __ke_size_t size);
+#define __ke_copy_from_user KCL_CopyFromUserSpace
+#define __ke_copy_to_user KCL_CopyToUserSpace
+extern int ATI_API_CALL __ke_verify_area(int type, const void * addr, unsigned long size);
+
+extern unsigned long ATI_API_CALL KAS_GetTickCounter(void);
+extern unsigned long ATI_API_CALL KAS_GetTicksPerSecond(void);
+
+
+#if DBG
+extern int ATI_API_CALL __ke_vsnprintf(char *buf, __ke_size_t size, const char *fmt, va_list ap);
+#define vsnprintf(_dst, _size, _fmt, varg) __ke_snprintf(_dst, _size, _fmt, varg)
+#endif // #if DBG
+
+
+// Note: This function is not defined in firegl_public.h.
+void firegl_hardwareHangRecovery(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+//
+// -------------------------- C/C++ standard typedefs ----------------------------
+//
+#ifdef __SIZE_TYPE__
+typedef __SIZE_TYPE__ size_t;
+#else // #ifdef __SIZE_TYPE__
+typedef unsigned int size_t;
+#endif // #ifdef __SIZE_TYPE__
+
+#ifdef __PTRDIFF_TYPE__
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+#else // #ifdef __PTRDIFF_TYPE__
+typedef int ptrdiff_t;
+#endif // #ifdef __PTRDIFF_TYPE__
+
+#ifndef NULL
+#ifdef __cplusplus
+#define NULL __null
+#else
+#define NULL ((void *)0)
+#endif
+#endif
+
+
+//
+// ------------------------- C/C++ standard macros ---------------------------
+//
+
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) // as it is defined in stddef.h
+#define CHAR_BIT 8 // as it is defined in limits.h
+
+//
+// --------------------------------- C RTL -----------------------------------
+//
+
+#define memset(_p, _v, _n) __ke_memset(_p, _v, _n)
+#define memcpy(_d, _s, _n) __ke_memcpy(_d, _s, _n)
+#define strlen(_s) __ke_strlen(_s)
+#define strcpy(_d, _s) __ke_strcpy(_d, _s)
+#define strncpy(_d, _s, _n) __ke_strncpy(_d, _s, _n)
+// Note: C99 supports macros with variable number of arguments. GCC also supports this C99 feature as
+// C++ extension.
+#define snprintf(_dst, _size, _fmt, arg...) __ke_snprintf(_dst, _size, _fmt, ##arg)
+
+
+#endif // #ifdef _lnx_common_defs_h_
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/r800/chip/si_ci_vi_merged_enum.h b/src/gallium/winsys/amdgpu/drm/addrlib/r800/chip/si_ci_vi_merged_enum.h
new file mode 100644
index 00000000000..5ed81add264
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/r800/chip/si_ci_vi_merged_enum.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+#if !defined (SI_CI_VI_MERGED_ENUM_HEADER)
+#define SI_CI_VI_MERGED_ENUM_HEADER
+
+typedef enum PipeInterleaveSize {
+ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x00000000,
+ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x00000001,
+} PipeInterleaveSize;
+
+typedef enum RowSize {
+ADDR_CONFIG_1KB_ROW = 0x00000000,
+ADDR_CONFIG_2KB_ROW = 0x00000001,
+ADDR_CONFIG_4KB_ROW = 0x00000002,
+} RowSize;
+
+#endif
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp b/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp
new file mode 100644
index 00000000000..7393953c120
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.cpp
@@ -0,0 +1,1782 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file ciaddrlib.cpp
+* @brief Contains the implementation for the CIAddrLib class.
+***************************************************************************************************
+*/
+
+#include "ciaddrlib.h"
+
+#include "si_gb_reg.h"
+
+#include "si_ci_vi_merged_enum.h"
+
+#if BRAHMA_BUILD
+#include "amdgpu_id.h"
+#else
+#include "ci_id.h"
+#include "kv_id.h"
+#include "vi_id.h"
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrMask
+*
+* @brief
+* Gets a mask of "width"
+* @return
+* Bit mask
+***************************************************************************************************
+*/
+static UINT_64 AddrMask(
+ UINT_32 width) ///< Width of bits
+{
+ UINT_64 ret;
+
+ if (width >= sizeof(UINT_64)*8)
+ {
+ ret = ~((UINT_64) 0);
+ }
+ else
+ {
+ return (((UINT_64) 1) << width) - 1;
+ }
+ return ret;
+}
+
+/**
+***************************************************************************************************
+* AddrGetBits
+*
+* @brief
+* Gets bits within a range of [msb, lsb]
+* @return
+* Bits of this range
+***************************************************************************************************
+*/
+static UINT_64 AddrGetBits(
+ UINT_64 bits, ///< Source bits
+ UINT_32 msb, ///< Most signicant bit
+ UINT_32 lsb) ///< Least signicant bit
+{
+ UINT_64 ret = 0;
+
+ if (msb >= lsb)
+ {
+ ret = (bits >> lsb) & (AddrMask(1 + msb - lsb));
+ }
+ return ret;
+}
+
+/**
+***************************************************************************************************
+* AddrRemoveBits
+*
+* @brief
+* Removes bits within the range of [msb, lsb]
+* @return
+* Modified bits
+***************************************************************************************************
+*/
+static UINT_64 AddrRemoveBits(
+ UINT_64 bits, ///< Source bits
+ UINT_32 msb, ///< Most signicant bit
+ UINT_32 lsb) ///< Least signicant bit
+{
+ UINT_64 ret = bits;
+
+ if (msb >= lsb)
+ {
+ ret = AddrGetBits(bits, lsb - 1, 0) // low bits
+ | (AddrGetBits(bits, 8 * sizeof(bits) - 1, msb + 1) << lsb); //high bits
+ }
+ return ret;
+}
+
+/**
+***************************************************************************************************
+* AddrInsertBits
+*
+* @brief
+* Inserts new bits into the range of [msb, lsb]
+* @return
+* Modified bits
+***************************************************************************************************
+*/
+static UINT_64 AddrInsertBits(
+ UINT_64 bits, ///< Source bits
+ UINT_64 newBits, ///< New bits to be inserted
+ UINT_32 msb, ///< Most signicant bit
+ UINT_32 lsb) ///< Least signicant bit
+{
+ UINT_64 ret = bits;
+
+ if (msb >= lsb)
+ {
+ ret = AddrGetBits(bits, lsb - 1, 0) // old low bitss
+ | (AddrGetBits(newBits, msb - lsb, 0) << lsb) //new bits
+ | (AddrGetBits(bits, 8 * sizeof(bits) - 1, lsb) << (msb + 1)); //old high bits
+ }
+ return ret;
+}
+
+
+/**
+***************************************************************************************************
+* AddrCIHwlInit
+*
+* @brief
+* Creates an CIAddrLib object.
+*
+* @return
+* Returns an CIAddrLib object pointer.
+***************************************************************************************************
+*/
+AddrLib* AddrCIHwlInit(const AddrClient* pClient)
+{
+ return CIAddrLib::CreateObj(pClient);
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::CIAddrLib
+*
+* @brief
+* Constructor
+*
+***************************************************************************************************
+*/
+CIAddrLib::CIAddrLib(const AddrClient* pClient) :
+ SIAddrLib(pClient),
+ m_noOfMacroEntries(0),
+ m_allowNonDispThickModes(FALSE)
+{
+ m_class = CI_ADDRLIB;
+ memset(&m_settings, 0, sizeof(m_settings));
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::~CIAddrLib
+*
+* @brief
+* Destructor
+***************************************************************************************************
+*/
+CIAddrLib::~CIAddrLib()
+{
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlComputeDccInfo
+*
+* @brief
+* Compute DCC key size, base alignment
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlComputeDccInfo(
+ const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+ ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ if (m_settings.isVolcanicIslands && IsMacroTiled(pIn->tileMode))
+ {
+ UINT_64 dccFastClearSize = pIn->colorSurfSize >> 8;
+
+ ADDR_ASSERT(0 == (pIn->colorSurfSize & 0xff));
+
+ if (pIn->numSamples > 1)
+ {
+ UINT_32 tileSizePerSample = BITS_TO_BYTES(pIn->bpp * MicroTileWidth * MicroTileHeight);
+ UINT_32 samplesPerSplit = pIn->tileInfo.tileSplitBytes / tileSizePerSample;
+
+ if (samplesPerSplit < pIn->numSamples)
+ {
+ UINT_32 numSplits = pIn->numSamples / samplesPerSplit;
+ UINT_32 fastClearBaseAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
+
+ ADDR_ASSERT(IsPow2(fastClearBaseAlign));
+
+ dccFastClearSize /= numSplits;
+
+ if (0 != (dccFastClearSize & (fastClearBaseAlign - 1)))
+ {
+ // Disable dcc fast clear
+ // if key size of fisrt sample split is not pipe*interleave aligned
+ dccFastClearSize = 0;
+ }
+ }
+ }
+
+ pOut->dccRamSize = pIn->colorSurfSize >> 8;
+ pOut->dccRamBaseAlign = pIn->tileInfo.banks *
+ HwlGetPipes(&pIn->tileInfo) *
+ m_pipeInterleaveBytes;
+ pOut->dccFastClearSize = dccFastClearSize;
+
+ ADDR_ASSERT(IsPow2(pOut->dccRamBaseAlign));
+
+ if (0 == (pOut->dccRamSize & (pOut->dccRamBaseAlign - 1)))
+ {
+ pOut->subLvlCompressible = TRUE;
+ }
+ else
+ {
+ UINT_64 dccRamSizeAlign = HwlGetPipes(&pIn->tileInfo) * m_pipeInterleaveBytes;
+
+ if (pOut->dccRamSize == pOut->dccFastClearSize)
+ {
+ pOut->dccFastClearSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
+ }
+ pOut->dccRamSize = PowTwoAlign(pOut->dccRamSize, dccRamSizeAlign);
+ pOut->subLvlCompressible = FALSE;
+ }
+ }
+ else
+ {
+ returnCode = ADDR_NOTSUPPORTED;
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlComputeCmaskAddrFromCoord
+*
+* @brief
+* Compute tc compatible Cmask address from fmask ram address
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlComputeCmaskAddrFromCoord(
+ const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] fmask addr/bpp/tile input
+ ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] cmask address
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_NOTSUPPORTED;
+
+ if ((m_settings.isVolcanicIslands == TRUE) &&
+ (pIn->flags.tcCompatible == TRUE))
+ {
+ UINT_32 numOfPipes = HwlGetPipes(pIn->pTileInfo);
+ UINT_32 numOfBanks = pIn->pTileInfo->banks;
+ UINT_64 fmaskAddress = pIn->fmaskAddr;
+ UINT_32 elemBits = pIn->bpp;
+ UINT_32 blockByte = 64 * elemBits / 8;
+ UINT_64 metaNibbleAddress = HwlComputeMetadataNibbleAddress(fmaskAddress,
+ 0,
+ 0,
+ 4,
+ elemBits,
+ blockByte,
+ m_pipeInterleaveBytes,
+ numOfPipes,
+ numOfBanks,
+ 1);
+ pOut->addr = (metaNibbleAddress >> 1);
+ pOut->bitPosition = (metaNibbleAddress % 2) ? 4 : 0;
+ returnCode = ADDR_OK;
+ }
+
+ return returnCode;
+}
+/**
+***************************************************************************************************
+* CIAddrLib::HwlConvertChipFamily
+*
+* @brief
+* Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision
+* @return
+* AddrChipFamily
+***************************************************************************************************
+*/
+AddrChipFamily CIAddrLib::HwlConvertChipFamily(
+ UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
+ UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
+{
+ AddrChipFamily family = ADDR_CHIP_FAMILY_CI;
+
+ switch (uChipFamily)
+ {
+ case FAMILY_CI:
+ m_settings.isSeaIsland = 1;
+ m_settings.isBonaire = ASICREV_IS_BONAIRE_M(uChipRevision);
+ m_settings.isHawaii = ASICREV_IS_HAWAII_P(uChipRevision);
+ break;
+ case FAMILY_KV:
+ m_settings.isKaveri = 1;
+ m_settings.isSpectre = ASICREV_IS_SPECTRE(uChipRevision);
+ m_settings.isSpooky = ASICREV_IS_SPOOKY(uChipRevision);
+ m_settings.isKalindi = ASICREV_IS_KALINDI(uChipRevision);
+ break;
+ case FAMILY_VI:
+ m_settings.isVolcanicIslands = 1;
+ m_settings.isIceland = ASICREV_IS_ICELAND_M(uChipRevision);
+ m_settings.isTonga = ASICREV_IS_TONGA_P(uChipRevision);
+ m_settings.isFiji = ASICREV_IS_FIJI_P(uChipRevision);
+ break;
+ case FAMILY_CZ:
+ m_settings.isCarrizo = 1;
+ m_settings.isVolcanicIslands = 1;
+ break;
+ default:
+ ADDR_ASSERT(!"This should be a unexpected Fusion");
+ break;
+ }
+
+ return family;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlInitGlobalParams
+*
+* @brief
+* Initializes global parameters
+*
+* @return
+* TRUE if all settings are valid
+*
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::HwlInitGlobalParams(
+ const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
+{
+ BOOL_32 valid = TRUE;
+
+ const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
+
+ valid = DecodeGbRegs(pRegValue);
+
+ // The following assignments for m_pipes is only for fail-safe, InitTileSettingTable should
+ // read the correct pipes from tile mode table
+ if (m_settings.isHawaii)
+ {
+ // Hawaii has 16-pipe, see GFXIP_Config_Summary.xls
+ m_pipes = 16;
+ }
+ else if (m_settings.isBonaire || m_settings.isSpectre)
+ {
+ m_pipes = 4;
+ }
+ else // Treat other KV asics to be 2-pipe
+ {
+ m_pipes = 2;
+ }
+
+ // @todo: VI
+ // Move this to VI code path once created
+ if (m_settings.isTonga)
+ {
+ m_pipes = 8;
+ }
+ else if (m_settings.isIceland)
+ {
+ m_pipes = 2;
+ }
+ else if (m_settings.isFiji)
+ {
+ m_pipes = 16;
+ }
+
+ if (valid)
+ {
+ valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
+ }
+ if (valid)
+ {
+ valid = InitMacroTileCfgTable(pRegValue->pMacroTileConfig, pRegValue->noOfMacroEntries);
+ }
+
+ return valid;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlPostCheckTileIndex
+*
+* @brief
+* Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
+* tile mode/type/info and change the index if needed
+* @return
+* Tile index.
+***************************************************************************************************
+*/
+INT_32 CIAddrLib::HwlPostCheckTileIndex(
+ const ADDR_TILEINFO* pInfo, ///< [in] Tile Info
+ AddrTileMode mode, ///< [in] Tile mode
+ AddrTileType type, ///< [in] Tile type
+ INT curIndex ///< [in] Current index assigned in HwlSetupTileInfo
+ ) const
+{
+ INT_32 index = curIndex;
+
+ if (mode == ADDR_TM_LINEAR_GENERAL)
+ {
+ index = TileIndexLinearGeneral;
+ }
+ else
+ {
+ BOOL_32 macroTiled = IsMacroTiled(mode);
+
+ // We need to find a new index if either of them is true
+ // 1. curIndex is invalid
+ // 2. tile mode is changed
+ // 3. tile info does not match for macro tiled
+ if ((index == TileIndexInvalid) ||
+ (mode != m_tileTable[index].mode) ||
+ (macroTiled && pInfo->pipeConfig != m_tileTable[index].info.pipeConfig))
+ {
+ for (index = 0; index < static_cast(m_noOfEntries); index++)
+ {
+ if (macroTiled)
+ {
+ // macro tile modes need all to match
+ if ((pInfo->pipeConfig == m_tileTable[index].info.pipeConfig) &&
+ (mode == m_tileTable[index].mode) &&
+ (type == m_tileTable[index].type))
+ {
+ // tileSplitBytes stored in m_tileTable is only valid for depth entries
+ if (type == ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ if (pInfo->tileSplitBytes == m_tileTable[index].info.tileSplitBytes)
+ {
+ break;
+ }
+ }
+ else // other entries are determined by other 3 fields
+ {
+ break;
+ }
+ }
+ }
+ else if (mode == ADDR_TM_LINEAR_ALIGNED)
+ {
+ // linear mode only needs tile mode to match
+ if (mode == m_tileTable[index].mode)
+ {
+ break;
+ }
+ }
+ else
+ {
+ // micro tile modes only need tile mode and tile type to match
+ if (mode == m_tileTable[index].mode &&
+ type == m_tileTable[index].type)
+ {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ ADDR_ASSERT(index < static_cast(m_noOfEntries));
+
+ if (index >= static_cast(m_noOfEntries))
+ {
+ index = TileIndexInvalid;
+ }
+
+ return index;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlSetupTileCfg
+*
+* @brief
+* Map tile index to tile setting.
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlSetupTileCfg(
+ INT_32 index, ///< [in] Tile index
+ INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI)
+ ADDR_TILEINFO* pInfo, ///< [out] Tile Info
+ AddrTileMode* pMode, ///< [out] Tile mode
+ AddrTileType* pType ///< [out] Tile type
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ // Global flag to control usage of tileIndex
+ if (UseTileIndex(index))
+ {
+ if (static_cast(index) >= m_noOfEntries)
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index);
+
+ if (pInfo != NULL)
+ {
+ if (IsMacroTiled(pCfgTable->mode))
+ {
+ ADDR_ASSERT(((macroModeIndex != TileIndexInvalid)
+ && (macroModeIndex != TileIndexNoMacroIndex)));
+ // Here we used tile_bytes to replace of tile_split
+ // According info as below:
+ // "tile_split_c = MIN(ROW_SIZE, tile_split)
+ // "tile_bytes = MIN(tile_split_c, num_samples * tile_bytes_1x)
+ // when using tile_bytes replacing of tile_split, the result of
+ // alignment and others(such as slicesPerTile) are unaffected -
+ // since if tile_split_c is larger, split won't happen, otherwise
+ // (num_samples * tile_bytes_1x is larger), a correct tile_split is
+ // returned.
+ *pInfo = m_macroTileTable[macroModeIndex];
+
+ if (pCfgTable->type == ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ pInfo->tileSplitBytes = pCfgTable->info.tileSplitBytes;
+ }
+ pInfo->pipeConfig = pCfgTable->info.pipeConfig;
+ }
+ else // 1D and linear modes, we return default value stored in table
+ {
+ *pInfo = pCfgTable->info;
+ }
+ }
+
+ if (pMode != NULL)
+ {
+ *pMode = pCfgTable->mode;
+ }
+
+ if (pType != NULL)
+ {
+ *pType = pCfgTable->type;
+ }
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlComputeSurfaceInfo
+*
+* @brief
+* Entry of ci's ComputeSurfaceInfo
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ // If tileIndex is invalid, force macroModeIndex to be invalid, too
+ if (pIn->tileIndex == TileIndexInvalid)
+ {
+ pOut->macroModeIndex = TileIndexInvalid;
+ }
+
+ ADDR_E_RETURNCODE retCode = SIAddrLib::HwlComputeSurfaceInfo(pIn,pOut);
+
+ if (pOut->macroModeIndex == TileIndexNoMacroIndex)
+ {
+ pOut->macroModeIndex = TileIndexInvalid;
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlFmaskSurfaceInfo
+* @brief
+* Entry of r800's ComputeFmaskInfo
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE CIAddrLib::HwlComputeFmaskInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure
+ )
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ ADDR_TILEINFO tileInfo = {0};
+ ADDR_COMPUTE_FMASK_INFO_INPUT fmaskIn;
+ fmaskIn = *pIn;
+
+ AddrTileMode tileMode = pIn->tileMode;
+
+ // Use internal tile info if pOut does not have a valid pTileInfo
+ if (pOut->pTileInfo == NULL)
+ {
+ pOut->pTileInfo = &tileInfo;
+ }
+
+ ADDR_ASSERT(tileMode == ADDR_TM_2D_TILED_THIN1 ||
+ tileMode == ADDR_TM_3D_TILED_THIN1 ||
+ tileMode == ADDR_TM_PRT_TILED_THIN1 ||
+ tileMode == ADDR_TM_PRT_2D_TILED_THIN1 ||
+ tileMode == ADDR_TM_PRT_3D_TILED_THIN1);
+
+ ADDR_ASSERT(m_tileTable[14].mode == ADDR_TM_2D_TILED_THIN1);
+ ADDR_ASSERT(m_tileTable[15].mode == ADDR_TM_3D_TILED_THIN1);
+
+ // The only valid tile modes for fmask are 2D_THIN1 and 3D_THIN1 plus non-displayable
+ INT_32 tileIndex = tileMode == ADDR_TM_2D_TILED_THIN1 ? 14 : 15;
+ ADDR_SURFACE_FLAGS flags = {{0}};
+ flags.fmask = 1;
+
+ INT_32 macroModeIndex = TileIndexInvalid;
+
+ UINT_32 numSamples = pIn->numSamples;
+ UINT_32 numFrags = pIn->numFrags == 0 ? numSamples : pIn->numFrags;
+
+ UINT_32 bpp = QLog2(numFrags);
+
+ // EQAA needs one more bit
+ if (numSamples > numFrags)
+ {
+ bpp++;
+ }
+
+ if (bpp == 3)
+ {
+ bpp = 4;
+ }
+
+ bpp = Max(8u, bpp * numSamples);
+
+ macroModeIndex = HwlComputeMacroModeIndex(tileIndex, flags, bpp, numSamples, pOut->pTileInfo);
+
+ fmaskIn.tileIndex = tileIndex;
+ fmaskIn.pTileInfo = pOut->pTileInfo;
+ pOut->macroModeIndex = macroModeIndex;
+ pOut->tileIndex = tileIndex;
+
+ retCode = DispatchComputeFmaskInfo(&fmaskIn, pOut);
+
+ if (retCode == ADDR_OK)
+ {
+ pOut->tileIndex =
+ HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
+ pOut->tileIndex);
+ }
+
+ // Resets pTileInfo to NULL if the internal tile info is used
+ if (pOut->pTileInfo == &tileInfo)
+ {
+ pOut->pTileInfo = NULL;
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlFmaskPreThunkSurfInfo
+*
+* @brief
+* Some preparation before thunking a ComputeSurfaceInfo call for Fmask
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlFmaskPreThunkSurfInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, ///< [in] Input of fmask info
+ const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, ///< [in] Output of fmask info
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, ///< [out] Input of thunked surface info
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut ///< [out] Output of thunked surface info
+ ) const
+{
+ pSurfIn->tileIndex = pFmaskIn->tileIndex;
+ pSurfOut->macroModeIndex = pFmaskOut->macroModeIndex;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlFmaskPostThunkSurfInfo
+*
+* @brief
+* Copy hwl extra field after calling thunked ComputeSurfaceInfo
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlFmaskPostThunkSurfInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, ///< [in] Output of surface info
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut ///< [out] Output of fmask info
+ ) const
+{
+ pFmaskOut->tileIndex = pSurfOut->tileIndex;
+ pFmaskOut->macroModeIndex = pSurfOut->macroModeIndex;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlDegradeThickTileMode
+*
+* @brief
+* Degrades valid tile mode for thick modes if needed
+*
+* @return
+* Suitable tile mode
+***************************************************************************************************
+*/
+AddrTileMode CIAddrLib::HwlDegradeThickTileMode(
+ AddrTileMode baseTileMode, ///< [in] base tile mode
+ UINT_32 numSlices, ///< [in] current number of slices
+ UINT_32* pBytesPerTile ///< [in/out] pointer to bytes per slice
+ ) const
+{
+ return baseTileMode;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlOverrideTileMode
+*
+* @brief
+* Override THICK to THIN, for specific formats on CI
+*
+* @return
+* Suitable tile mode
+*
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::HwlOverrideTileMode(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ AddrTileMode* pTileMode, ///< [in/out] pointer to the tile mode
+ AddrTileType* pTileType ///< [in/out] pointer to the tile type
+ ) const
+{
+ BOOL_32 bOverrided = FALSE;
+ AddrTileMode tileMode = *pTileMode;
+
+ // currently, all CI/VI family do not
+ // support ADDR_TM_PRT_2D_TILED_THICK,ADDR_TM_PRT_3D_TILED_THICK and
+ // ADDR_TM_PRT_2D_TILED_THIN1, ADDR_TM_PRT_3D_TILED_THIN1
+ switch (tileMode)
+ {
+ case ADDR_TM_PRT_2D_TILED_THICK:
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ tileMode = ADDR_TM_PRT_TILED_THICK;
+ break;
+ case ADDR_TM_PRT_2D_TILED_THIN1:
+ case ADDR_TM_PRT_3D_TILED_THIN1:
+ tileMode = ADDR_TM_PRT_TILED_THIN1;
+ break;
+ default:
+ break;
+ }
+
+ // UBTS#404321, we do not need such overriding, as THICK+THICK entries removed from the tile-mode table
+ if (!m_settings.isBonaire)
+ {
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+ // tile_thickness = (array_mode == XTHICK) ? 8 : ((array_mode == THICK) ? 4 : 1)
+ if (thickness > 1)
+ {
+ switch (pIn->format)
+ {
+ // see //gfxip/gcB/devel/cds/src/verif/tc/models/csim/tcp.cpp
+ // tcpError("Thick micro tiling is not supported for format...
+ case ADDR_FMT_X24_8_32_FLOAT:
+ case ADDR_FMT_32_AS_8:
+ case ADDR_FMT_32_AS_8_8:
+ case ADDR_FMT_32_AS_32_32_32_32:
+
+ // packed formats
+ case ADDR_FMT_GB_GR:
+ case ADDR_FMT_BG_RG:
+ case ADDR_FMT_1_REVERSED:
+ case ADDR_FMT_1:
+ case ADDR_FMT_BC1:
+ case ADDR_FMT_BC2:
+ case ADDR_FMT_BC3:
+ case ADDR_FMT_BC4:
+ case ADDR_FMT_BC5:
+ case ADDR_FMT_BC6:
+ case ADDR_FMT_BC7:
+ switch (tileMode)
+ {
+ case ADDR_TM_1D_TILED_THICK:
+ tileMode = ADDR_TM_1D_TILED_THIN1;
+ break;
+
+ case ADDR_TM_2D_TILED_XTHICK:
+ case ADDR_TM_2D_TILED_THICK:
+ tileMode = ADDR_TM_2D_TILED_THIN1;
+ break;
+
+ case ADDR_TM_3D_TILED_XTHICK:
+ case ADDR_TM_3D_TILED_THICK:
+ tileMode = ADDR_TM_3D_TILED_THIN1;
+ break;
+
+ case ADDR_TM_PRT_TILED_THICK:
+ tileMode = ADDR_TM_PRT_TILED_THIN1;
+ break;
+
+ case ADDR_TM_PRT_2D_TILED_THICK:
+ tileMode = ADDR_TM_PRT_2D_TILED_THIN1;
+ break;
+
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ tileMode = ADDR_TM_PRT_3D_TILED_THIN1;
+ break;
+
+ default:
+ break;
+
+ }
+
+ // Switch tile type from thick to thin
+ if (tileMode != *pTileMode)
+ {
+ // see tileIndex: 13-18
+ *pTileType = ADDR_NON_DISPLAYABLE;
+ }
+
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ if (tileMode != *pTileMode)
+ {
+ *pTileMode = tileMode;
+ bOverrided = TRUE;
+ }
+
+ return bOverrided;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlSetupTileInfo
+*
+* @brief
+* Setup default value of tile info for SI
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlSetupTileInfo(
+ AddrTileMode tileMode, ///< [in] Tile mode
+ ADDR_SURFACE_FLAGS flags, ///< [in] Surface type flags
+ UINT_32 bpp, ///< [in] Bits per pixel
+ UINT_32 pitch, ///< [in] Pitch in pixels
+ UINT_32 height, ///< [in] Height in pixels
+ UINT_32 numSamples, ///< [in] Number of samples
+ ADDR_TILEINFO* pTileInfoIn, ///< [in] Tile info input: NULL for default
+ ADDR_TILEINFO* pTileInfoOut, ///< [out] Tile info output
+ AddrTileType inTileType, ///< [in] Tile type
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output
+ ) const
+{
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+ ADDR_TILEINFO* pTileInfo = pTileInfoOut;
+ INT index = TileIndexInvalid;
+ INT macroModeIndex = TileIndexInvalid;
+
+ // Fail-safe code
+ if (!IsLinear(tileMode))
+ {
+ // Thick tile modes must use thick micro tile mode but Bonaire does not support due to
+ // old derived netlists (UBTS 404321)
+ if (thickness > 1)
+ {
+ if (m_settings.isBonaire)
+ {
+ inTileType = ADDR_NON_DISPLAYABLE;
+ }
+ else if ((m_allowNonDispThickModes == FALSE) || (inTileType != ADDR_NON_DISPLAYABLE))
+ {
+ inTileType = ADDR_THICK;
+ }
+ }
+ // 128 bpp tiling must be non-displayable.
+ // Fmask reuse color buffer's entry but bank-height field can be from another entry
+ // To simplify the logic, fmask entry should be picked from non-displayable ones
+ else if (bpp == 128 || flags.fmask)
+ {
+ inTileType = ADDR_NON_DISPLAYABLE;
+ }
+ // These two modes only have non-disp entries though they can be other micro tile modes
+ else if (tileMode == ADDR_TM_3D_TILED_THIN1 || tileMode == ADDR_TM_PRT_3D_TILED_THIN1)
+ {
+ inTileType = ADDR_NON_DISPLAYABLE;
+ }
+
+ if (flags.depth || flags.stencil)
+ {
+ inTileType = ADDR_DEPTH_SAMPLE_ORDER;
+ }
+ }
+
+ if (IsTileInfoAllZero(pTileInfo))
+ {
+ // See table entries 0-4
+ if (flags.depth || flags.stencil)
+ {
+ if (flags.depth && flags.tcCompatible)
+ {
+ // tileSize = bpp * numSamples * 8 * 8 / 8
+ UINT_32 tileSize = bpp * numSamples * 8;
+
+ // Texure readable depth surface should not be split
+ switch (tileSize)
+ {
+ case 128:
+ index = 1;
+ break;
+ case 256:
+ index = 2;
+ break;
+ case 512:
+ index = 3;
+ break;
+ default:
+ index = 4;
+ break;
+ }
+ }
+ else
+ {
+ // Depth and stencil need to use the same index, thus the pre-defined tile_split
+ // can meet the requirement to choose the same macro mode index
+ // uncompressed depth/stencil are not supported for now
+ switch (numSamples)
+ {
+ case 1:
+ index = 0;
+ break;
+ case 2:
+ case 4:
+ index = 1;
+ break;
+ case 8:
+ index = 2;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ // See table entries 5-6
+ if (inTileType == ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ switch (tileMode)
+ {
+ case ADDR_TM_1D_TILED_THIN1:
+ index = 5;
+ break;
+ case ADDR_TM_PRT_TILED_THIN1:
+ index = 6;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // See table entries 8-12
+ if (inTileType == ADDR_DISPLAYABLE)
+ {
+ switch (tileMode)
+ {
+ case ADDR_TM_1D_TILED_THIN1:
+ index = 9;
+ break;
+ case ADDR_TM_2D_TILED_THIN1:
+ index = 10;
+ break;
+ case ADDR_TM_PRT_TILED_THIN1:
+ index = 11;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // See table entries 13-18
+ if (inTileType == ADDR_NON_DISPLAYABLE)
+ {
+ switch (tileMode)
+ {
+ case ADDR_TM_1D_TILED_THIN1:
+ index = 13;
+ break;
+ case ADDR_TM_2D_TILED_THIN1:
+ index = 14;
+ break;
+ case ADDR_TM_3D_TILED_THIN1:
+ index = 15;
+ break;
+ case ADDR_TM_PRT_TILED_THIN1:
+ index = 16;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // See table entries 19-26
+ if (thickness > 1)
+ {
+ switch (tileMode)
+ {
+ case ADDR_TM_1D_TILED_THICK:
+ //special check for bonaire, for the compatablity between old KMD and new UMD for bonaire
+ index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 19 : 18;
+ break;
+ case ADDR_TM_2D_TILED_THICK:
+ // special check for bonaire, for the compatablity between old KMD and new UMD for bonaire
+ index = ((inTileType == ADDR_THICK) || m_settings.isBonaire) ? 20 : 24;
+ break;
+ case ADDR_TM_3D_TILED_THICK:
+ index = 21;
+ break;
+ case ADDR_TM_PRT_TILED_THICK:
+ index = 22;
+ break;
+ case ADDR_TM_2D_TILED_XTHICK:
+ index = 25;
+ break;
+ case ADDR_TM_3D_TILED_XTHICK:
+ index = 26;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // See table entries 27-30
+ if (inTileType == ADDR_ROTATED)
+ {
+ switch (tileMode)
+ {
+ case ADDR_TM_1D_TILED_THIN1:
+ index = 27;
+ break;
+ case ADDR_TM_2D_TILED_THIN1:
+ index = 28;
+ break;
+ case ADDR_TM_PRT_TILED_THIN1:
+ index = 29;
+ break;
+ case ADDR_TM_PRT_2D_TILED_THIN1:
+ index = 30;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (m_pipes >= 8)
+ {
+ ADDR_ASSERT((index + 1) < static_cast(m_noOfEntries));
+ // Only do this when tile mode table is updated.
+ if (((tileMode == ADDR_TM_PRT_TILED_THIN1) || (tileMode == ADDR_TM_PRT_TILED_THICK)) &&
+ (m_tileTable[index+1].mode == tileMode))
+ {
+ UINT_32 bytesXSamples = bpp * numSamples / 8;
+ UINT_32 bytesXThickness = bpp * thickness / 8;
+ UINT_32 switchP4Threshold = (m_pipes == 16) ? 8 : 32;
+
+ if ((bytesXSamples > switchP4Threshold) || (bytesXThickness > switchP4Threshold))
+ {
+ // Pick next 4 pipe entry
+ index += 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ // A pre-filled tile info is ready
+ index = pOut->tileIndex;
+ macroModeIndex = pOut->macroModeIndex;
+
+ // pass tile type back for post tile index compute
+ pOut->tileType = inTileType;
+ }
+
+ // We only need to set up tile info if there is a valid index but macroModeIndex is invalid
+ if (index != TileIndexInvalid && macroModeIndex == TileIndexInvalid)
+ {
+ macroModeIndex = HwlComputeMacroModeIndex(index, flags, bpp, numSamples, pTileInfo);
+
+ /// Copy to pOut->tileType/tileIndex/macroModeIndex
+ pOut->tileIndex = index;
+ pOut->tileType = m_tileTable[index].type; // Or inTileType, the samea
+ pOut->macroModeIndex = macroModeIndex;
+ }
+ else if (tileMode == ADDR_TM_LINEAR_GENERAL)
+ {
+ pOut->tileIndex = TileIndexLinearGeneral;
+
+ // Copy linear-aligned entry??
+ *pTileInfo = m_tileTable[8].info;
+ }
+ else if (tileMode == ADDR_TM_LINEAR_ALIGNED)
+ {
+ pOut->tileIndex = 8;
+ *pTileInfo = m_tileTable[8].info;
+ }
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::ReadGbTileMode
+*
+* @brief
+* Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG.
+* @return
+* NA.
+***************************************************************************************************
+*/
+VOID CIAddrLib::ReadGbTileMode(
+ UINT_32 regValue, ///< [in] GB_TILE_MODE register
+ ADDR_TILECONFIG* pCfg ///< [out] output structure
+ ) const
+{
+ GB_TILE_MODE gbTileMode;
+ gbTileMode.val = regValue;
+
+ pCfg->type = static_cast(gbTileMode.f.micro_tile_mode_new);
+ pCfg->info.pipeConfig = static_cast(gbTileMode.f.pipe_config + 1);
+
+ if (pCfg->type == ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
+ }
+ else
+ {
+ pCfg->info.tileSplitBytes = 1 << gbTileMode.f.sample_split;
+ }
+
+ UINT_32 regArrayMode = gbTileMode.f.array_mode;
+
+ pCfg->mode = static_cast(regArrayMode);
+
+ switch (regArrayMode)
+ {
+ case 5:
+ pCfg->mode = ADDR_TM_PRT_TILED_THIN1;
+ break;
+ case 6:
+ pCfg->mode = ADDR_TM_PRT_2D_TILED_THIN1;
+ break;
+ case 8:
+ pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
+ break;
+ case 9:
+ pCfg->mode = ADDR_TM_PRT_TILED_THICK;
+ break;
+ case 0xa:
+ pCfg->mode = ADDR_TM_PRT_2D_TILED_THICK;
+ break;
+ case 0xb:
+ pCfg->mode = ADDR_TM_PRT_3D_TILED_THIN1;
+ break;
+ case 0xe:
+ pCfg->mode = ADDR_TM_3D_TILED_XTHICK;
+ break;
+ case 0xf:
+ pCfg->mode = ADDR_TM_PRT_3D_TILED_THICK;
+ break;
+ default:
+ break;
+ }
+
+ // Fail-safe code for these always convert tile info, as the non-macro modes
+ // return the entry of tile mode table directly without looking up macro mode table
+ if (!IsMacroTiled(pCfg->mode))
+ {
+ pCfg->info.banks = 2;
+ pCfg->info.bankWidth = 1;
+ pCfg->info.bankHeight = 1;
+ pCfg->info.macroAspectRatio = 1;
+ pCfg->info.tileSplitBytes = 64;
+ }
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::InitTileSettingTable
+*
+* @brief
+* Initialize the ADDR_TILE_CONFIG table.
+* @return
+* TRUE if tile table is correctly initialized
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::InitTileSettingTable(
+ const UINT_32* pCfg, ///< [in] Pointer to table of tile configs
+ UINT_32 noOfEntries ///< [in] Numbe of entries in the table above
+ )
+{
+ BOOL_32 initOk = TRUE;
+
+ ADDR_ASSERT(noOfEntries <= TileTableSize);
+
+ memset(m_tileTable, 0, sizeof(m_tileTable));
+
+ if (noOfEntries != 0)
+ {
+ m_noOfEntries = noOfEntries;
+ }
+ else
+ {
+ m_noOfEntries = TileTableSize;
+ }
+
+ if (pCfg) // From Client
+ {
+ for (UINT_32 i = 0; i < m_noOfEntries; i++)
+ {
+ ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
+ }
+ }
+ else
+ {
+ ADDR_ASSERT_ALWAYS();
+ initOk = FALSE;
+ }
+
+ if (initOk)
+ {
+ ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
+
+ if (m_settings.isBonaire == FALSE)
+ {
+ // Check if entry 18 is "thick+thin" combination
+ if ((m_tileTable[18].mode == ADDR_TM_1D_TILED_THICK) &&
+ (m_tileTable[18].type == ADDR_NON_DISPLAYABLE))
+ {
+ m_allowNonDispThickModes = TRUE;
+ ADDR_ASSERT(m_tileTable[24].mode == ADDR_TM_2D_TILED_THICK);
+ }
+ }
+ else
+ {
+ m_allowNonDispThickModes = TRUE;
+ }
+
+ // Assume the first entry is always programmed with full pipes
+ m_pipes = HwlGetPipes(&m_tileTable[0].info);
+ }
+
+ return initOk;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::ReadGbMacroTileCfg
+*
+* @brief
+* Convert GB_MACRO_TILE_CFG HW value to ADDR_TILE_CONFIG.
+* @return
+* NA.
+***************************************************************************************************
+*/
+VOID CIAddrLib::ReadGbMacroTileCfg(
+ UINT_32 regValue, ///< [in] GB_MACRO_TILE_MODE register
+ ADDR_TILEINFO* pCfg ///< [out] output structure
+ ) const
+{
+ GB_MACROTILE_MODE gbTileMode;
+ gbTileMode.val = regValue;
+
+ pCfg->bankHeight = 1 << gbTileMode.f.bank_height;
+ pCfg->bankWidth = 1 << gbTileMode.f.bank_width;
+ pCfg->banks = 1 << (gbTileMode.f.num_banks + 1);
+ pCfg->macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::InitMacroTileCfgTable
+*
+* @brief
+* Initialize the ADDR_MACRO_TILE_CONFIG table.
+* @return
+* TRUE if macro tile table is correctly initialized
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::InitMacroTileCfgTable(
+ const UINT_32* pCfg, ///< [in] Pointer to table of tile configs
+ UINT_32 noOfMacroEntries ///< [in] Numbe of entries in the table above
+ )
+{
+ BOOL_32 initOk = TRUE;
+
+ ADDR_ASSERT(noOfMacroEntries <= MacroTileTableSize);
+
+ memset(m_macroTileTable, 0, sizeof(m_macroTileTable));
+
+ if (noOfMacroEntries != 0)
+ {
+ m_noOfMacroEntries = noOfMacroEntries;
+ }
+ else
+ {
+ m_noOfMacroEntries = MacroTileTableSize;
+ }
+
+ if (pCfg) // From Client
+ {
+ for (UINT_32 i = 0; i < m_noOfMacroEntries; i++)
+ {
+ ReadGbMacroTileCfg(*(pCfg + i), &m_macroTileTable[i]);
+
+ m_macroTileTable[i].tileSplitBytes = 64 << (i % 8);
+ }
+ }
+ else
+ {
+ ADDR_ASSERT_ALWAYS();
+ initOk = FALSE;
+ }
+ return initOk;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlComputeMacroModeIndex
+*
+* @brief
+* Computes macro tile mode index
+* @return
+* TRUE if macro tile table is correctly initialized
+***************************************************************************************************
+*/
+INT_32 CIAddrLib::HwlComputeMacroModeIndex(
+ INT_32 tileIndex, ///< [in] Tile mode index
+ ADDR_SURFACE_FLAGS flags, ///< [in] Surface flags
+ UINT_32 bpp, ///< [in] Bit per pixel
+ UINT_32 numSamples, ///< [in] Number of samples
+ ADDR_TILEINFO* pTileInfo, ///< [out] Pointer to ADDR_TILEINFO
+ AddrTileMode* pTileMode, ///< [out] Pointer to AddrTileMode
+ AddrTileType* pTileType ///< [out] Pointer to AddrTileType
+ ) const
+{
+ INT_32 macroModeIndex = TileIndexInvalid;
+
+ if (flags.tcCompatible && flags.stencil)
+ {
+ // Don't compute macroModeIndex for tc compatible stencil surface
+ macroModeIndex = TileIndexNoMacroIndex;
+ }
+ else
+ {
+ AddrTileMode tileMode = m_tileTable[tileIndex].mode;
+ AddrTileType tileType = m_tileTable[tileIndex].type;
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+ if (!IsMacroTiled(tileMode))
+ {
+ *pTileInfo = m_tileTable[tileIndex].info;
+ macroModeIndex = TileIndexNoMacroIndex;
+ }
+ else
+ {
+ UINT_32 tileBytes1x = BITS_TO_BYTES(bpp * MicroTilePixels * thickness);
+ UINT_32 tileSplit;
+
+ if (m_tileTable[tileIndex].type == ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ // Depth entries store real tileSplitBytes
+ tileSplit = m_tileTable[tileIndex].info.tileSplitBytes;
+ }
+ else
+ {
+ // Non-depth entries store a split factor
+ UINT_32 sampleSplit = m_tileTable[tileIndex].info.tileSplitBytes;
+ UINT_32 colorTileSplit = Max(256u, sampleSplit * tileBytes1x);
+
+ tileSplit = colorTileSplit;
+ }
+
+ UINT_32 tileSplitC = Min(m_rowSize, tileSplit);
+ UINT_32 tileBytes;
+
+ if (flags.fmask)
+ {
+ tileBytes = Min(tileSplitC, tileBytes1x);
+ }
+ else
+ {
+ tileBytes = Min(tileSplitC, numSamples * tileBytes1x);
+ }
+
+ if (tileBytes < 64)
+ {
+ tileBytes = 64;
+ }
+
+ macroModeIndex = Log2(tileBytes / 64);
+
+ if (flags.prt || IsPrtTileMode(tileMode))
+ {
+ // Unknown - assume it is 1/2 of table size
+ const UINT_32 PrtMacroModeOffset = MacroTileTableSize / 2;
+
+ macroModeIndex += PrtMacroModeOffset;
+ *pTileInfo = m_macroTileTable[macroModeIndex];
+ }
+ else
+ {
+ *pTileInfo = m_macroTileTable[macroModeIndex];
+ }
+
+ pTileInfo->pipeConfig = m_tileTable[tileIndex].info.pipeConfig;
+
+ if (m_tileTable[tileIndex].type != ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ pTileInfo->tileSplitBytes = tileSplitC;
+ }
+ else
+ {
+ pTileInfo->tileSplitBytes = m_tileTable[tileIndex].info.tileSplitBytes;
+ }
+ }
+
+ if (NULL != pTileMode)
+ {
+ *pTileMode = tileMode;
+ }
+
+ if (NULL != pTileType)
+ {
+ *pTileType = tileType;
+ }
+ }
+
+ return macroModeIndex;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlComputeTileDataWidthAndHeightLinear
+*
+* @brief
+* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
+*
+* @return
+* N/A
+*
+* @note
+* MacroWidth and macroHeight are measured in pixels
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlComputeTileDataWidthAndHeightLinear(
+ UINT_32* pMacroWidth, ///< [out] macro tile width
+ UINT_32* pMacroHeight, ///< [out] macro tile height
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_TILEINFO* pTileInfo ///< [in] tile info
+ ) const
+{
+ ADDR_ASSERT(pTileInfo != NULL);
+
+ UINT_32 numTiles;
+
+ switch (pTileInfo->pipeConfig)
+ {
+ case ADDR_PIPECFG_P16_32x32_8x16:
+ case ADDR_PIPECFG_P16_32x32_16x16:
+ case ADDR_PIPECFG_P8_32x64_32x32:
+ case ADDR_PIPECFG_P8_32x32_16x32:
+ case ADDR_PIPECFG_P8_32x32_16x16:
+ case ADDR_PIPECFG_P8_32x32_8x16:
+ case ADDR_PIPECFG_P4_32x32:
+ numTiles = 8;
+ break;
+ default:
+ numTiles = 4;
+ break;
+ }
+
+ *pMacroWidth = numTiles * MicroTileWidth;
+ *pMacroHeight = numTiles * MicroTileHeight;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlStereoCheckRightOffsetPadding
+*
+* @brief
+* check if the height needs extra padding for stereo right eye offset, to avoid swizzling
+*
+* @return
+* TRUE is the extra padding is needed
+*
+* @note
+* Kalindi (Kabini) is the only one that needs this padding as there is a uncertain
+* possible HW issue where the right eye displays incorrectly with some type of swizzles, if
+* the right eye offset is not 64KB aligned - EPR#366461
+* Other Kaveri APUs also need the padding according to DXX team's report otherwise
+* corruption observed. - EPR#374788
+***************************************************************************************************
+*/
+BOOL_32 CIAddrLib::HwlStereoCheckRightOffsetPadding() const
+{
+ BOOL_32 bNeedPadding = FALSE;
+
+ if (m_settings.isKaveri)
+ {
+ bNeedPadding = TRUE;
+ }
+
+ return bNeedPadding;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlComputeMetadataNibbleAddress
+*
+* @brief
+* calculate meta data address based on input information
+*
+* ¶meter
+* uncompressedDataByteAddress - address of a pixel in color surface
+* dataBaseByteAddress - base address of color surface
+* metadataBaseByteAddress - base address of meta ram
+* metadataBitSize - meta key size, 8 for DCC, 4 for cmask
+* elementBitSize - element size of color surface
+* blockByteSize - compression block size, 256 for DCC
+* pipeInterleaveBytes - pipe interleave size
+* numOfPipes - number of pipes
+* numOfBanks - number of banks
+* numOfSamplesPerSplit - number of samples per tile split
+* @return
+* meta data nibble address (nibble address is used to support DCC compatible cmask)
+*
+***************************************************************************************************
+*/
+UINT_64 CIAddrLib::HwlComputeMetadataNibbleAddress(
+ UINT_64 uncompressedDataByteAddress,
+ UINT_64 dataBaseByteAddress,
+ UINT_64 metadataBaseByteAddress,
+ UINT_32 metadataBitSize,
+ UINT_32 elementBitSize,
+ UINT_32 blockByteSize,
+ UINT_32 pipeInterleaveBytes,
+ UINT_32 numOfPipes,
+ UINT_32 numOfBanks,
+ UINT_32 numOfSamplesPerSplit) const
+{
+ ///--------------------------------------------------------------------------------------------
+ /// Get pipe interleave, bank and pipe bits
+ ///--------------------------------------------------------------------------------------------
+ UINT_32 pipeInterleaveBits = Log2(pipeInterleaveBytes);
+ UINT_32 pipeBits = Log2(numOfPipes);
+ UINT_32 bankBits = Log2(numOfBanks);
+
+ ///--------------------------------------------------------------------------------------------
+ /// Clear pipe and bank swizzles
+ ///--------------------------------------------------------------------------------------------
+ UINT_32 dataMacrotileBits = pipeInterleaveBits + pipeBits + bankBits;
+ UINT_32 metadataMacrotileBits = pipeInterleaveBits + pipeBits + bankBits;
+
+ UINT_64 dataMacrotileClearMask = ~((1L << dataMacrotileBits) - 1);
+ UINT_64 metadataMacrotileClearMask = ~((1L << metadataMacrotileBits) - 1);
+
+ UINT_64 dataBaseByteAddressNoSwizzle = dataBaseByteAddress & dataMacrotileClearMask;
+ UINT_64 metadataBaseByteAddressNoSwizzle = metadataBaseByteAddress & metadataMacrotileClearMask;
+
+ ///--------------------------------------------------------------------------------------------
+ /// Modify metadata base before adding in so that when final address is divided by data ratio,
+ /// the base address returns to where it should be
+ ///--------------------------------------------------------------------------------------------
+ ADDR_ASSERT((0 != metadataBitSize));
+ UINT_64 metadataBaseShifted = metadataBaseByteAddressNoSwizzle * blockByteSize * 8 /
+ metadataBitSize;
+ UINT_64 offset = uncompressedDataByteAddress -
+ dataBaseByteAddressNoSwizzle +
+ metadataBaseShifted;
+
+ ///--------------------------------------------------------------------------------------------
+ /// Save bank data bits
+ ///--------------------------------------------------------------------------------------------
+ UINT_32 lsb = pipeBits + pipeInterleaveBits;
+ UINT_32 msb = bankBits - 1 + lsb;
+
+ UINT_64 bankDataBits = AddrGetBits(offset, msb, lsb);
+
+ ///--------------------------------------------------------------------------------------------
+ /// Save pipe data bits
+ ///--------------------------------------------------------------------------------------------
+ lsb = pipeInterleaveBits;
+ msb = pipeBits - 1 + lsb;
+
+ UINT_64 pipeDataBits = AddrGetBits(offset, msb, lsb);
+
+ ///--------------------------------------------------------------------------------------------
+ /// Remove pipe and bank bits
+ ///--------------------------------------------------------------------------------------------
+ lsb = pipeInterleaveBits;
+ msb = dataMacrotileBits - 1;
+
+ UINT_64 offsetWithoutPipeBankBits = AddrRemoveBits(offset, msb, lsb);
+
+ ADDR_ASSERT((0 != blockByteSize));
+ UINT_64 blockInBankpipe = offsetWithoutPipeBankBits / blockByteSize;
+
+ UINT_32 tileSize = 8 * 8 * elementBitSize/8 * numOfSamplesPerSplit;
+ UINT_32 blocksInTile = tileSize / blockByteSize;
+
+ if (0 == blocksInTile)
+ {
+ lsb = 0;
+ }
+ else
+ {
+ lsb = Log2(blocksInTile);
+ }
+ msb = bankBits - 1 + lsb;
+
+ UINT_64 blockInBankpipeWithBankBits = AddrInsertBits(blockInBankpipe, bankDataBits, msb, lsb);
+
+ /// NOTE *2 because we are converting to Nibble address in this step
+ UINT_64 metaAddressInPipe = blockInBankpipeWithBankBits * 2 * metadataBitSize / 8;
+
+
+ ///--------------------------------------------------------------------------------------------
+ /// Reinsert pipe bits back into the final address
+ ///--------------------------------------------------------------------------------------------
+ lsb = pipeInterleaveBits + 1; ///<+1 due to Nibble address now gives interleave bits extra lsb.
+ msb = pipeBits - 1 + lsb;
+ UINT_64 metadataAddress = AddrInsertBits(metaAddressInPipe, pipeDataBits, msb, lsb);
+
+ return metadataAddress;
+}
+
+/**
+***************************************************************************************************
+* CIAddrLib::HwlPadDimensions
+*
+* @brief
+* Helper function to pad dimensions
+*
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID CIAddrLib::HwlPadDimensions(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 numSamples, ///< [in] number of samples
+ ADDR_TILEINFO* pTileInfo, ///< [in/out] bank structure.
+ UINT_32 padDims, ///< [in] Dimensions to pad valid value 1,2,3
+ UINT_32 mipLevel, ///< [in] MipLevel
+ UINT_32* pPitch, ///< [in/out] pitch in pixels
+ UINT_32 pitchAlign, ///< [in] pitch alignment
+ UINT_32* pHeight, ///< [in/out] height in pixels
+ UINT_32 heightAlign, ///< [in] height alignment
+ UINT_32* pSlices, ///< [in/out] number of slices
+ UINT_32 sliceAlign ///< [in] number of slice alignment
+ ) const
+{
+ if (m_settings.isVolcanicIslands &&
+ flags.dccCompatible &&
+ (numSamples > 1) &&
+ (mipLevel == 0) &&
+ IsMacroTiled(tileMode))
+ {
+ UINT_32 tileSizePerSample = BITS_TO_BYTES(bpp * MicroTileWidth * MicroTileHeight);
+ UINT_32 samplesPerSplit = pTileInfo->tileSplitBytes / tileSizePerSample;
+
+ if (samplesPerSplit < numSamples)
+ {
+ UINT_32 dccFastClearByteAlign = HwlGetPipes(pTileInfo) * m_pipeInterleaveBytes * 256;
+ UINT_32 bytesPerSplit = BITS_TO_BYTES((*pPitch) * (*pHeight) * bpp * samplesPerSplit);
+
+ ADDR_ASSERT(IsPow2(dccFastClearByteAlign));
+
+ if (0 != (bytesPerSplit & (dccFastClearByteAlign - 1)))
+ {
+ UINT_32 dccFastClearPixelAlign = dccFastClearByteAlign /
+ BITS_TO_BYTES(bpp) /
+ samplesPerSplit;
+ UINT_32 macroTilePixelAlign = pitchAlign * heightAlign;
+
+ if ((dccFastClearPixelAlign >= macroTilePixelAlign) &&
+ ((dccFastClearPixelAlign % macroTilePixelAlign) == 0))
+ {
+ UINT_32 dccFastClearPitchAlignInMacroTile =
+ dccFastClearPixelAlign / macroTilePixelAlign;
+ UINT_32 heightInMacroTile = *pHeight / heightAlign;
+ UINT_32 dccFastClearPitchAlignInPixels;
+
+ while ((heightInMacroTile > 1) &&
+ ((heightInMacroTile % 2) == 0) &&
+ (dccFastClearPitchAlignInMacroTile > 1) &&
+ ((dccFastClearPitchAlignInMacroTile % 2) == 0))
+ {
+ heightInMacroTile >>= 1;
+ dccFastClearPitchAlignInMacroTile >>= 1;
+ }
+
+ dccFastClearPitchAlignInPixels = pitchAlign * dccFastClearPitchAlignInMacroTile;
+
+ if (IsPow2(dccFastClearPitchAlignInPixels))
+ {
+ *pPitch = PowTwoAlign((*pPitch), dccFastClearPitchAlignInPixels);
+ }
+ else
+ {
+ *pPitch += (dccFastClearPitchAlignInPixels - 1);
+ *pPitch /= dccFastClearPitchAlignInPixels;
+ *pPitch *= dccFastClearPitchAlignInPixels;
+ }
+ }
+ }
+ }
+ }
+}
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h b/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h
new file mode 100644
index 00000000000..451508619f9
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/r800/ciaddrlib.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file ciaddrlib.h
+* @brief Contains the CIAddrLib class definition.
+***************************************************************************************************
+*/
+
+#ifndef __CI_ADDR_LIB_H__
+#define __CI_ADDR_LIB_H__
+
+#include "addrlib.h"
+#include "siaddrlib.h"
+
+/**
+***************************************************************************************************
+* @brief CI specific settings structure.
+***************************************************************************************************
+*/
+struct CIChipSettings
+{
+ struct
+ {
+ UINT_32 isSeaIsland : 1;
+ UINT_32 isBonaire : 1;
+ UINT_32 isKaveri : 1;
+ UINT_32 isSpectre : 1;
+ UINT_32 isSpooky : 1;
+ UINT_32 isKalindi : 1;
+ // Hawaii is GFXIP 7.2, similar with CI (Bonaire)
+ UINT_32 isHawaii : 1;
+
+ // VI
+ UINT_32 isVolcanicIslands : 1;
+ UINT_32 isIceland : 1;
+ UINT_32 isTonga : 1;
+ UINT_32 isFiji : 1;
+ // VI fusion (Carrizo)
+ UINT_32 isCarrizo : 1;
+ };
+};
+
+/**
+***************************************************************************************************
+* @brief This class is the CI specific address library
+* function set.
+***************************************************************************************************
+*/
+class CIAddrLib : public SIAddrLib
+{
+public:
+ /// Creates CIAddrLib object
+ static AddrLib* CreateObj(const AddrClient* pClient)
+ {
+ return new(pClient) CIAddrLib(pClient);
+ }
+
+private:
+ CIAddrLib(const AddrClient* pClient);
+ virtual ~CIAddrLib();
+
+protected:
+
+ // Hwl interface - defined in AddrLib
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+ virtual AddrChipFamily HwlConvertChipFamily(
+ UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+ virtual BOOL_32 HwlInitGlobalParams(
+ const ADDR_CREATE_INPUT* pCreateIn);
+
+ virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
+ INT_32 index, INT_32 macroModeIndex, ADDR_TILEINFO* pInfo,
+ AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
+
+ virtual VOID HwlComputeTileDataWidthAndHeightLinear(
+ UINT_32* pMacroWidth, UINT_32* pMacroHeight,
+ UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
+
+ virtual INT_32 HwlComputeMacroModeIndex(
+ INT_32 tileIndex, ADDR_SURFACE_FLAGS flags, UINT_32 bpp, UINT_32 numSamples,
+ ADDR_TILEINFO* pTileInfo, AddrTileMode* pTileMode = NULL, AddrTileType* pTileType = NULL
+ ) const;
+
+ // Sub-hwl interface - defined in EgBasedAddrLib
+ virtual VOID HwlSetupTileInfo(
+ AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
+ UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+ ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
+ AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ virtual INT_32 HwlPostCheckTileIndex(
+ const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
+ INT curIndex = TileIndexInvalid) const;
+
+ virtual VOID HwlFmaskPreThunkSurfInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
+ const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
+
+ virtual VOID HwlFmaskPostThunkSurfInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
+
+ virtual AddrTileMode HwlDegradeThickTileMode(
+ AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
+
+ virtual BOOL_32 HwlOverrideTileMode(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ AddrTileMode* pTileMode,
+ AddrTileType* pTileType) const;
+
+ virtual BOOL_32 HwlStereoCheckRightOffsetPadding() const;
+
+ virtual ADDR_E_RETURNCODE HwlComputeDccInfo(
+ const ADDR_COMPUTE_DCCINFO_INPUT* pIn,
+ ADDR_COMPUTE_DCCINFO_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlComputeCmaskAddrFromCoord(
+ const ADDR_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+protected:
+ virtual VOID HwlPadDimensions(
+ AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+ UINT_32 numSamples, ADDR_TILEINFO* pTileInfo, UINT_32 padDims, UINT_32 mipLevel,
+ UINT_32* pPitch, UINT_32 pitchAlign, UINT_32* pHeight, UINT_32 heightAlign,
+ UINT_32* pSlices, UINT_32 sliceAlign) const;
+
+private:
+ VOID ReadGbTileMode(
+ UINT_32 regValue, ADDR_TILECONFIG* pCfg) const;
+
+ VOID ReadGbMacroTileCfg(
+ UINT_32 regValue, ADDR_TILEINFO* pCfg) const;
+
+ BOOL_32 InitTileSettingTable(
+ const UINT_32 *pSetting, UINT_32 noOfEntries);
+
+ BOOL_32 InitMacroTileCfgTable(
+ const UINT_32 *pSetting, UINT_32 noOfEntries);
+
+ UINT_64 HwlComputeMetadataNibbleAddress(
+ UINT_64 uncompressedDataByteAddress,
+ UINT_64 dataBaseByteAddress,
+ UINT_64 metadataBaseByteAddress,
+ UINT_32 metadataBitSize,
+ UINT_32 elementBitSize,
+ UINT_32 blockByteSize,
+ UINT_32 pipeInterleaveBytes,
+ UINT_32 numOfPipes,
+ UINT_32 numOfBanks,
+ UINT_32 numOfSamplesPerSplit) const;
+
+ static const UINT_32 MacroTileTableSize = 16;
+ ADDR_TILEINFO m_macroTileTable[MacroTileTableSize];
+ UINT_32 m_noOfMacroEntries;
+ BOOL_32 m_allowNonDispThickModes;
+
+ CIChipSettings m_settings;
+};
+
+#endif
+
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp b/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp
new file mode 100644
index 00000000000..b1e008b8392
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.cpp
@@ -0,0 +1,4575 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file egbaddrlib.cpp
+* @brief Contains the EgBasedAddrLib class implementation
+***************************************************************************************************
+*/
+
+#include "egbaddrlib.h"
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::EgBasedAddrLib
+*
+* @brief
+* Constructor
+*
+* @note
+*
+***************************************************************************************************
+*/
+EgBasedAddrLib::EgBasedAddrLib(const AddrClient* pClient) :
+ AddrLib(pClient),
+ m_ranks(0),
+ m_logicalBanks(0),
+ m_bankInterleave(1)
+{
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::~EgBasedAddrLib
+*
+* @brief
+* Destructor
+***************************************************************************************************
+*/
+EgBasedAddrLib::~EgBasedAddrLib()
+{
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::DispatchComputeSurfaceInfo
+*
+* @brief
+* Compute surface sizes include padded pitch,height,slices,total size in bytes,
+* meanwhile output suitable tile mode and base alignment might be changed in this
+* call as well. Results are returned through output parameters.
+*
+* @return
+* TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::DispatchComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ AddrTileMode tileMode = pIn->tileMode;
+ UINT_32 bpp = pIn->bpp;
+ UINT_32 numSamples = pIn->numSamples;
+ UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
+ UINT_32 pitch = pIn->width;
+ UINT_32 height = pIn->height;
+ UINT_32 numSlices = pIn->numSlices;
+ UINT_32 mipLevel = pIn->mipLevel;
+ ADDR_SURFACE_FLAGS flags = pIn->flags;
+
+ ADDR_TILEINFO tileInfoDef = {0};
+ ADDR_TILEINFO* pTileInfo = &tileInfoDef;
+
+ UINT_32 padDims = 0;
+ BOOL_32 valid;
+
+ tileMode = DegradeLargeThickTile(tileMode, bpp);
+
+ // Only override numSamples for NI above
+ if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+ {
+ if (numFrags != numSamples) // This means EQAA
+ {
+ // The real surface size needed is determined by number of fragments
+ numSamples = numFrags;
+ }
+
+ // Save altered numSamples in pOut
+ pOut->numSamples = numSamples;
+ }
+
+ // Caller makes sure pOut->pTileInfo is not NULL, see HwlComputeSurfaceInfo
+ ADDR_ASSERT(pOut->pTileInfo);
+
+ if (pOut->pTileInfo != NULL)
+ {
+ pTileInfo = pOut->pTileInfo;
+ }
+
+ // Set default values
+ if (pIn->pTileInfo != NULL)
+ {
+ if (pTileInfo != pIn->pTileInfo)
+ {
+ *pTileInfo = *pIn->pTileInfo;
+ }
+ }
+ else
+ {
+ memset(pTileInfo, 0, sizeof(ADDR_TILEINFO));
+ }
+
+ // For macro tile mode, we should calculate default tiling parameters
+ HwlSetupTileInfo(tileMode,
+ flags,
+ bpp,
+ pitch,
+ height,
+ numSamples,
+ pIn->pTileInfo,
+ pTileInfo,
+ pIn->tileType,
+ pOut);
+
+ if (flags.cube)
+ {
+ if (mipLevel == 0)
+ {
+ padDims = 2;
+ }
+
+ if (numSlices == 1)
+ {
+ // This is calculating one face, remove cube flag
+ flags.cube = 0;
+ }
+ }
+
+ switch (tileMode)
+ {
+ case ADDR_TM_LINEAR_GENERAL://fall through
+ case ADDR_TM_LINEAR_ALIGNED:
+ valid = ComputeSurfaceInfoLinear(pIn, pOut, padDims);
+ break;
+
+ case ADDR_TM_1D_TILED_THIN1://fall through
+ case ADDR_TM_1D_TILED_THICK:
+ valid = ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, tileMode);
+ break;
+
+ case ADDR_TM_2D_TILED_THIN1: //fall through
+ case ADDR_TM_2D_TILED_THICK: //fall through
+ case ADDR_TM_3D_TILED_THIN1: //fall through
+ case ADDR_TM_3D_TILED_THICK: //fall through
+ case ADDR_TM_2D_TILED_XTHICK: //fall through
+ case ADDR_TM_3D_TILED_XTHICK: //fall through
+ case ADDR_TM_PRT_TILED_THIN1: //fall through
+ case ADDR_TM_PRT_2D_TILED_THIN1://fall through
+ case ADDR_TM_PRT_3D_TILED_THIN1://fall through
+ case ADDR_TM_PRT_TILED_THICK: //fall through
+ case ADDR_TM_PRT_2D_TILED_THICK://fall through
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ valid = ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, tileMode);
+ break;
+
+ default:
+ valid = FALSE;
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+
+ return valid;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceInfoLinear
+*
+* @brief
+* Compute linear surface sizes include padded pitch, height, slices, total size in
+* bytes, meanwhile alignments as well. Since it is linear mode, so output tile mode
+* will not be changed here. Results are returned through output parameters.
+*
+* @return
+* TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoLinear(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, ///< [out] Output structure
+ UINT_32 padDims ///< [in] Dimensions to padd
+ ) const
+{
+ UINT_32 expPitch = pIn->width;
+ UINT_32 expHeight = pIn->height;
+ UINT_32 expNumSlices = pIn->numSlices;
+
+ // No linear MSAA on real H/W, keep this for TGL
+ UINT_32 numSamples = pOut->numSamples;
+
+ const UINT_32 microTileThickness = 1;
+
+ //
+ // Compute the surface alignments.
+ //
+ ComputeSurfaceAlignmentsLinear(pIn->tileMode,
+ pIn->bpp,
+ pIn->flags,
+ &pOut->baseAlign,
+ &pOut->pitchAlign,
+ &pOut->heightAlign);
+
+ if ((pIn->tileMode == ADDR_TM_LINEAR_GENERAL) && pIn->flags.color && (pIn->height > 1))
+ {
+#if !ALT_TEST
+ // When linear_general surface is accessed in multiple lines, it requires 8 pixels in pitch
+ // alignment since PITCH_TILE_MAX is in unit of 8 pixels.
+ // It is OK if it is accessed per line.
+ ADDR_ASSERT((pIn->width % 8) == 0);
+#endif
+ }
+
+ pOut->depthAlign = microTileThickness;
+
+ expPitch = HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
+
+ //
+ // Pad pitch and height to the required granularities.
+ //
+ PadDimensions(pIn->tileMode,
+ pIn->bpp,
+ pIn->flags,
+ numSamples,
+ pOut->pTileInfo,
+ padDims,
+ pIn->mipLevel,
+ &expPitch, pOut->pitchAlign,
+ &expHeight, pOut->heightAlign,
+ &expNumSlices, microTileThickness);
+
+ expPitch = HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
+
+ //
+ // Adjust per HWL
+ //
+
+ UINT_64 logicalSliceSize;
+
+ logicalSliceSize = HwlGetSizeAdjustmentLinear(pIn->tileMode,
+ pIn->bpp,
+ numSamples,
+ pOut->baseAlign,
+ pOut->pitchAlign,
+ &expPitch,
+ &expHeight,
+ &pOut->heightAlign);
+
+
+ pOut->pitch = expPitch;
+ pOut->height = expHeight;
+ pOut->depth = expNumSlices;
+
+ pOut->surfSize = logicalSliceSize * expNumSlices;
+
+ pOut->tileMode = pIn->tileMode;
+
+ return TRUE;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceInfoMicroTiled
+*
+* @brief
+* Compute 1D/Micro Tiled surface sizes include padded pitch, height, slices, total
+* size in bytes, meanwhile alignments as well. Results are returned through output
+* parameters.
+*
+* @return
+* TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMicroTiled(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, ///< [out] Output structure
+ UINT_32 padDims, ///< [in] Dimensions to padd
+ AddrTileMode expTileMode ///< [in] Expected tile mode
+ ) const
+{
+ BOOL_32 valid = TRUE;
+
+ UINT_32 microTileThickness;
+ UINT_32 expPitch = pIn->width;
+ UINT_32 expHeight = pIn->height;
+ UINT_32 expNumSlices = pIn->numSlices;
+
+ // No 1D MSAA on real H/W, keep this for TGL
+ UINT_32 numSamples = pOut->numSamples;
+
+ //
+ // Compute the micro tile thickness.
+ //
+ microTileThickness = ComputeSurfaceThickness(expTileMode);
+
+ //
+ // Extra override for mip levels
+ //
+ if (pIn->mipLevel > 0)
+ {
+ //
+ // Reduce tiling mode from thick to thin if the number of slices is less than the
+ // micro tile thickness.
+ //
+ if ((expTileMode == ADDR_TM_1D_TILED_THICK) &&
+ (expNumSlices < ThickTileThickness))
+ {
+ expTileMode = HwlDegradeThickTileMode(ADDR_TM_1D_TILED_THICK, expNumSlices, NULL);
+ if (expTileMode != ADDR_TM_1D_TILED_THICK)
+ {
+ microTileThickness = 1;
+ }
+ }
+ }
+
+ //
+ // Compute the surface restrictions.
+ //
+ ComputeSurfaceAlignmentsMicroTiled(expTileMode,
+ pIn->bpp,
+ pIn->flags,
+ numSamples,
+ &pOut->baseAlign,
+ &pOut->pitchAlign,
+ &pOut->heightAlign);
+
+ pOut->depthAlign = microTileThickness;
+
+ //
+ // Pad pitch and height to the required granularities.
+ // Compute surface size.
+ // Return parameters.
+ //
+ PadDimensions(expTileMode,
+ pIn->bpp,
+ pIn->flags,
+ numSamples,
+ pOut->pTileInfo,
+ padDims,
+ pIn->mipLevel,
+ &expPitch, pOut->pitchAlign,
+ &expHeight, pOut->heightAlign,
+ &expNumSlices, microTileThickness);
+
+ //
+ // Get HWL specific pitch adjustment
+ //
+ UINT_64 logicalSliceSize = HwlGetSizeAdjustmentMicroTiled(microTileThickness,
+ pIn->bpp,
+ pIn->flags,
+ numSamples,
+ pOut->baseAlign,
+ pOut->pitchAlign,
+ &expPitch,
+ &expHeight);
+
+
+ pOut->pitch = expPitch;
+ pOut->height = expHeight;
+ pOut->depth = expNumSlices;
+
+ pOut->surfSize = logicalSliceSize * expNumSlices;
+
+ pOut->tileMode = expTileMode;
+
+ return valid;
+}
+
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceInfoMacroTiled
+*
+* @brief
+* Compute 2D/macro tiled surface sizes include padded pitch, height, slices, total
+* size in bytes, meanwhile output suitable tile mode and alignments might be changed
+* in this call as well. Results are returned through output parameters.
+*
+* @return
+* TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceInfoMacroTiled(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut, ///< [out] Output structure
+ UINT_32 padDims, ///< [in] Dimensions to padd
+ AddrTileMode expTileMode ///< [in] Expected tile mode
+ ) const
+{
+ BOOL_32 valid = TRUE;
+
+ AddrTileMode origTileMode = expTileMode;
+ UINT_32 microTileThickness;
+
+ UINT_32 paddedPitch;
+ UINT_32 paddedHeight;
+ UINT_64 bytesPerSlice;
+
+ UINT_32 expPitch = pIn->width;
+ UINT_32 expHeight = pIn->height;
+ UINT_32 expNumSlices = pIn->numSlices;
+
+ UINT_32 numSamples = pOut->numSamples;
+
+ //
+ // Compute the surface restrictions as base
+ // SanityCheckMacroTiled is called in ComputeSurfaceAlignmentsMacroTiled
+ //
+ valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
+ pIn->bpp,
+ pIn->flags,
+ pIn->mipLevel,
+ numSamples,
+ pOut->pTileInfo,
+ &pOut->baseAlign,
+ &pOut->pitchAlign,
+ &pOut->heightAlign);
+
+ if (valid)
+ {
+ //
+ // Compute the micro tile thickness.
+ //
+ microTileThickness = ComputeSurfaceThickness(expTileMode);
+
+ //
+ // Find the correct tiling mode for mip levels
+ //
+ if (pIn->mipLevel > 0)
+ {
+ //
+ // Try valid tile mode
+ //
+ expTileMode = ComputeSurfaceMipLevelTileMode(expTileMode,
+ pIn->bpp,
+ expPitch,
+ expHeight,
+ expNumSlices,
+ numSamples,
+ pOut->pitchAlign,
+ pOut->heightAlign,
+ pOut->pTileInfo);
+
+ if (!IsMacroTiled(expTileMode)) // Downgraded to micro-tiled
+ {
+ return ComputeSurfaceInfoMicroTiled(pIn, pOut, padDims, expTileMode);
+ }
+ else
+ {
+ if (microTileThickness != ComputeSurfaceThickness(expTileMode))
+ {
+ //
+ // Re-compute if thickness changed since bank-height may be changed!
+ //
+ return ComputeSurfaceInfoMacroTiled(pIn, pOut, padDims, expTileMode);
+ }
+ }
+ }
+
+ paddedPitch = expPitch;
+ paddedHeight = expHeight;
+
+ //
+ // Re-cal alignment
+ //
+ if (expTileMode != origTileMode) // Tile mode is changed but still macro-tiled
+ {
+ valid = ComputeSurfaceAlignmentsMacroTiled(expTileMode,
+ pIn->bpp,
+ pIn->flags,
+ pIn->mipLevel,
+ numSamples,
+ pOut->pTileInfo,
+ &pOut->baseAlign,
+ &pOut->pitchAlign,
+ &pOut->heightAlign);
+ }
+
+ //
+ // Do padding
+ //
+ PadDimensions(expTileMode,
+ pIn->bpp,
+ pIn->flags,
+ numSamples,
+ pOut->pTileInfo,
+ padDims,
+ pIn->mipLevel,
+ &paddedPitch, pOut->pitchAlign,
+ &paddedHeight, pOut->heightAlign,
+ &expNumSlices, microTileThickness);
+
+ if (pIn->flags.qbStereo &&
+ (pOut->pStereoInfo != NULL) &&
+ HwlStereoCheckRightOffsetPadding())
+ {
+ // Eye height's bank bits are different from y == 0?
+ // Since 3D rendering treats right eye buffer starting from y == "eye height" while
+ // display engine treats it to be 0, so the bank bits may be different, we pad
+ // more in height to make sure y == "eye height" has the same bank bits as y == 0.
+ UINT_32 checkMask = pOut->pTileInfo->banks - 1;
+ UINT_32 bankBits = 0;
+ do
+ {
+ bankBits = (paddedHeight / 8 / pOut->pTileInfo->bankHeight) & checkMask;
+
+ if (bankBits)
+ {
+ paddedHeight += pOut->heightAlign;
+ }
+ } while (bankBits);
+ }
+
+ //
+ // Compute the size of a slice.
+ //
+ bytesPerSlice = BITS_TO_BYTES(static_cast(paddedPitch) *
+ paddedHeight * NextPow2(pIn->bpp) * numSamples);
+
+ pOut->pitch = paddedPitch;
+ // Put this check right here to workaround special mipmap cases which the original height
+ // is needed.
+ // The original height is pre-stored in pOut->height in PostComputeMipLevel and
+ // pOut->pitch is needed in HwlCheckLastMacroTiledLvl, too.
+ if (m_configFlags.checkLast2DLevel && numSamples == 1) // Don't check MSAA
+ {
+ // Set a TRUE in pOut if next Level is the first 1D sub level
+ HwlCheckLastMacroTiledLvl(pIn, pOut);
+ }
+ pOut->height = paddedHeight;
+
+ pOut->depth = expNumSlices;
+
+ pOut->surfSize = bytesPerSlice * expNumSlices;
+
+ pOut->tileMode = expTileMode;
+
+ pOut->depthAlign = microTileThickness;
+
+ } // if (valid)
+
+ return valid;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceAlignmentsLinear
+*
+* @brief
+* Compute linear surface alignment, calculation results are returned through
+* output parameters.
+*
+* @return
+* TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsLinear(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32* pBaseAlign, ///< [out] base address alignment in bytes
+ UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels
+ UINT_32* pHeightAlign ///< [out] height alignment in pixels
+ ) const
+{
+ BOOL_32 valid = TRUE;
+
+ switch (tileMode)
+ {
+ case ADDR_TM_LINEAR_GENERAL:
+ //
+ // The required base alignment and pitch and height granularities is to 1 element.
+ //
+ *pBaseAlign = (bpp > 8) ? bpp / 8 : 1;
+ *pPitchAlign = 1;
+ *pHeightAlign = 1;
+ break;
+ case ADDR_TM_LINEAR_ALIGNED:
+ //
+ // The required alignment for base is the pipe interleave size.
+ // The required granularity for pitch is hwl dependent.
+ // The required granularity for height is one row.
+ //
+ *pBaseAlign = m_pipeInterleaveBytes;
+ *pPitchAlign = HwlGetPitchAlignmentLinear(bpp, flags);
+ *pHeightAlign = 1;
+ break;
+ default:
+ *pBaseAlign = 1;
+ *pPitchAlign = 1;
+ *pHeightAlign = 1;
+ ADDR_UNHANDLED_CASE();
+ break;
+ }
+
+ AdjustPitchAlignment(flags, pPitchAlign);
+
+ return valid;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled
+*
+* @brief
+* Compute 1D tiled surface alignment, calculation results are returned through
+* output parameters.
+*
+* @return
+* TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMicroTiled(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 numSamples, ///< [in] number of samples
+ UINT_32* pBaseAlign, ///< [out] base address alignment in bytes
+ UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels
+ UINT_32* pHeightAlign ///< [out] height alignment in pixels
+ ) const
+{
+ BOOL_32 valid = TRUE;
+
+ //
+ // The required alignment for base is the pipe interleave size.
+ //
+ *pBaseAlign = m_pipeInterleaveBytes;
+
+ *pPitchAlign = HwlGetPitchAlignmentMicroTiled(tileMode, bpp, flags, numSamples);
+
+ *pHeightAlign = MicroTileHeight;
+
+ AdjustPitchAlignment(flags, pPitchAlign);
+
+ // ECR#393489
+ // Workaround 2 for 1D tiling - There is HW bug for Carrizo
+ // where it requires the following alignments for 1D tiling.
+ if (flags.czDispCompatible)
+ {
+ *pBaseAlign = PowTwoAlign(*pBaseAlign, 4096); //Base address MOD 4096 = 0
+ *pPitchAlign = PowTwoAlign(*pPitchAlign, 512 >> (BITS_TO_BYTES(bpp))); //(8 lines * pitch * bytes per pixel) MOD 4096 = 0
+ }
+ // end Carrizo workaround for 1D tilling
+
+ return valid;
+}
+
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlReduceBankWidthHeight
+*
+* @brief
+* Additional checks, reduce bankHeight/bankWidth if needed and possible
+* tileSize*BANK_WIDTH*BANK_HEIGHT <= ROW_SIZE
+*
+* @return
+* TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::HwlReduceBankWidthHeight(
+ UINT_32 tileSize, ///< [in] tile size
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 numSamples, ///< [in] number of samples
+ UINT_32 bankHeightAlign, ///< [in] bank height alignment
+ UINT_32 pipes, ///< [in] pipes
+ ADDR_TILEINFO* pTileInfo ///< [in/out] bank structure.
+ ) const
+{
+ UINT_32 macroAspectAlign;
+ BOOL_32 valid = TRUE;
+
+ if (tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize)
+ {
+ BOOL_32 stillGreater = TRUE;
+
+ // Try reducing bankWidth first
+ if (stillGreater && pTileInfo->bankWidth > 1)
+ {
+ while (stillGreater && pTileInfo->bankWidth > 0)
+ {
+ pTileInfo->bankWidth >>= 1;
+
+ if (pTileInfo->bankWidth == 0)
+ {
+ pTileInfo->bankWidth = 1;
+ break;
+ }
+
+ stillGreater =
+ tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
+ }
+
+ // bankWidth is reduced above, so we need to recalculate bankHeight and ratio
+ bankHeightAlign = Max(1u,
+ m_pipeInterleaveBytes * m_bankInterleave /
+ (tileSize * pTileInfo->bankWidth)
+ );
+
+ // We cannot increase bankHeight so just assert this case.
+ ADDR_ASSERT((pTileInfo->bankHeight % bankHeightAlign) == 0);
+
+ if (numSamples == 1)
+ {
+ macroAspectAlign = Max(1u,
+ m_pipeInterleaveBytes * m_bankInterleave /
+ (tileSize * pipes * pTileInfo->bankWidth)
+ );
+ pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio,
+ macroAspectAlign);
+ }
+ }
+
+ // Early quit bank_height degradation for "64" bit z buffer
+ if (flags.depth && bpp >= 64)
+ {
+ stillGreater = FALSE;
+ }
+
+ // Then try reducing bankHeight
+ if (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
+ {
+ while (stillGreater && pTileInfo->bankHeight > bankHeightAlign)
+ {
+ pTileInfo->bankHeight >>= 1;
+
+ if (pTileInfo->bankHeight < bankHeightAlign)
+ {
+ pTileInfo->bankHeight = bankHeightAlign;
+ break;
+ }
+
+ stillGreater =
+ tileSize * pTileInfo->bankWidth * pTileInfo->bankHeight > m_rowSize;
+ }
+ }
+
+ valid = !stillGreater;
+
+ // Generate a warning if we still fail to meet this constraint
+ if (!valid)
+ {
+ ADDR_WARN(
+ 0, ("TILE_SIZE(%d)*BANK_WIDTH(%d)*BANK_HEIGHT(%d) <= ROW_SIZE(%d)",
+ tileSize, pTileInfo->bankWidth, pTileInfo->bankHeight, m_rowSize));
+ }
+ }
+
+ return valid;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled
+*
+* @brief
+* Compute 2D tiled surface alignment, calculation results are returned through
+* output parameters.
+*
+* @return
+* TRUE if no error occurs
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::ComputeSurfaceAlignmentsMacroTiled(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 mipLevel, ///< [in] mip level
+ UINT_32 numSamples, ///< [in] number of samples
+ ADDR_TILEINFO* pTileInfo, ///< [in/out] bank structure.
+ UINT_32* pBaseAlign, ///< [out] base address alignment in bytes
+ UINT_32* pPitchAlign, ///< [out] pitch alignment in pixels
+ UINT_32* pHeightAlign ///< [out] height alignment in pixels
+ ) const
+{
+ BOOL_32 valid = SanityCheckMacroTiled(pTileInfo);
+
+ if (valid)
+ {
+ UINT_32 macroTileWidth;
+ UINT_32 macroTileHeight;
+
+ UINT_32 tileSize;
+ UINT_32 bankHeightAlign;
+ UINT_32 macroAspectAlign;
+
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+ UINT_32 pipes = HwlGetPipes(pTileInfo);
+
+ //
+ // Align bank height first according to latest h/w spec
+ //
+
+ // tile_size = MIN(tile_split, 64 * tile_thickness * element_bytes * num_samples)
+ tileSize = Min(pTileInfo->tileSplitBytes,
+ BITS_TO_BYTES(64 * thickness * bpp * numSamples));
+
+ // bank_height_align =
+ // MAX(1, (pipe_interleave_bytes * bank_interleave)/(tile_size*bank_width))
+ bankHeightAlign = Max(1u,
+ m_pipeInterleaveBytes * m_bankInterleave /
+ (tileSize * pTileInfo->bankWidth)
+ );
+
+ pTileInfo->bankHeight = PowTwoAlign(pTileInfo->bankHeight, bankHeightAlign);
+
+ // num_pipes * bank_width * macro_tile_aspect >=
+ // (pipe_interleave_size * bank_interleave) / tile_size
+ if (numSamples == 1)
+ {
+ // this restriction is only for mipmap (mipmap's numSamples must be 1)
+ macroAspectAlign = Max(1u,
+ m_pipeInterleaveBytes * m_bankInterleave /
+ (tileSize * pipes * pTileInfo->bankWidth)
+ );
+ pTileInfo->macroAspectRatio = PowTwoAlign(pTileInfo->macroAspectRatio, macroAspectAlign);
+ }
+
+ valid = HwlReduceBankWidthHeight(tileSize,
+ bpp,
+ flags,
+ numSamples,
+ bankHeightAlign,
+ pipes,
+ pTileInfo);
+
+ //
+ // The required granularity for pitch is the macro tile width.
+ //
+ macroTileWidth = MicroTileWidth * pTileInfo->bankWidth * pipes *
+ pTileInfo->macroAspectRatio;
+
+ *pPitchAlign = macroTileWidth;
+
+ AdjustPitchAlignment(flags, pPitchAlign);
+
+ //
+ // The required granularity for height is the macro tile height.
+ //
+ macroTileHeight = MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks /
+ pTileInfo->macroAspectRatio;
+
+ *pHeightAlign = macroTileHeight;
+
+ //
+ // Compute base alignment
+ //
+ *pBaseAlign = pipes *
+ pTileInfo->bankWidth * pTileInfo->banks * pTileInfo->bankHeight * tileSize;
+
+ if ((mipLevel == 0) && (flags.prt) && (m_chipFamily == ADDR_CHIP_FAMILY_SI))
+ {
+ static const UINT_32 PrtTileSize = 0x10000;
+
+ UINT_32 macroTileSize = macroTileWidth * macroTileHeight * numSamples * bpp / 8;
+
+ if (macroTileSize < PrtTileSize)
+ {
+ UINT_32 numMacroTiles = PrtTileSize / macroTileSize;
+
+ ADDR_ASSERT((PrtTileSize % macroTileSize) == 0);
+
+ *pPitchAlign *= numMacroTiles;
+ *pBaseAlign *= numMacroTiles;
+ }
+ }
+ }
+
+ return valid;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::SanityCheckMacroTiled
+*
+* @brief
+* Check if macro-tiled parameters are valid
+* @return
+* TRUE if valid
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::SanityCheckMacroTiled(
+ ADDR_TILEINFO* pTileInfo ///< [in] macro-tiled parameters
+ ) const
+{
+ BOOL_32 valid = TRUE;
+ UINT_32 numPipes = HwlGetPipes(pTileInfo);
+
+ switch (pTileInfo->banks)
+ {
+ case 2: //fall through
+ case 4: //fall through
+ case 8: //fall through
+ case 16:
+ break;
+ default:
+ valid = FALSE;
+ break;
+
+ }
+
+ if (valid)
+ {
+ switch (pTileInfo->bankWidth)
+ {
+ case 1: //fall through
+ case 2: //fall through
+ case 4: //fall through
+ case 8:
+ break;
+ default:
+ valid = FALSE;
+ break;
+ }
+ }
+
+ if (valid)
+ {
+ switch (pTileInfo->bankHeight)
+ {
+ case 1: //fall through
+ case 2: //fall through
+ case 4: //fall through
+ case 8:
+ break;
+ default:
+ valid = FALSE;
+ break;
+ }
+ }
+
+ if (valid)
+ {
+ switch (pTileInfo->macroAspectRatio)
+ {
+ case 1: //fall through
+ case 2: //fall through
+ case 4: //fall through
+ case 8:
+ break;
+ default:
+ valid = FALSE;
+ break;
+ }
+ }
+
+ if (valid)
+ {
+ if (pTileInfo->banks < pTileInfo->macroAspectRatio)
+ {
+ // This will generate macro tile height <= 1
+ valid = FALSE;
+ }
+ }
+
+ if (valid)
+ {
+ if (pTileInfo->tileSplitBytes > m_rowSize)
+ {
+ valid = FALSE;
+ }
+ }
+
+ if (valid)
+ {
+ valid = HwlSanityCheckMacroTiled(pTileInfo);
+ }
+
+ ADDR_ASSERT(valid == TRUE);
+
+ // Add this assert for guidance
+ ADDR_ASSERT(numPipes * pTileInfo->banks >= 4);
+
+ return valid;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceMipLevelTileMode
+*
+* @brief
+* Compute valid tile mode for surface mipmap sub-levels
+*
+* @return
+* Suitable tile mode
+***************************************************************************************************
+*/
+AddrTileMode EgBasedAddrLib::ComputeSurfaceMipLevelTileMode(
+ AddrTileMode baseTileMode, ///< [in] base tile mode
+ UINT_32 bpp, ///< [in] bits per pixels
+ UINT_32 pitch, ///< [in] current level pitch
+ UINT_32 height, ///< [in] current level height
+ UINT_32 numSlices, ///< [in] current number of slices
+ UINT_32 numSamples, ///< [in] number of samples
+ UINT_32 pitchAlign, ///< [in] pitch alignment
+ UINT_32 heightAlign, ///< [in] height alignment
+ ADDR_TILEINFO* pTileInfo ///< [in] ptr to bank structure
+ ) const
+{
+ UINT_32 bytesPerTile;
+
+ AddrTileMode expTileMode = baseTileMode;
+ UINT_32 microTileThickness = ComputeSurfaceThickness(expTileMode);
+ UINT_32 interleaveSize = m_pipeInterleaveBytes * m_bankInterleave;
+
+ //
+ // Compute the size of a slice.
+ //
+ bytesPerTile = BITS_TO_BYTES(MicroTilePixels * microTileThickness * NextPow2(bpp) * numSamples);
+
+ //
+ // Reduce tiling mode from thick to thin if the number of slices is less than the
+ // micro tile thickness.
+ //
+ if (numSlices < microTileThickness)
+ {
+ expTileMode = HwlDegradeThickTileMode(expTileMode, numSlices, &bytesPerTile);
+ }
+
+ if (bytesPerTile > pTileInfo->tileSplitBytes)
+ {
+ bytesPerTile = pTileInfo->tileSplitBytes;
+ }
+
+ UINT_32 threshold1 =
+ bytesPerTile * HwlGetPipes(pTileInfo) * pTileInfo->bankWidth * pTileInfo->macroAspectRatio;
+
+ UINT_32 threshold2 =
+ bytesPerTile * pTileInfo->bankWidth * pTileInfo->bankHeight;
+
+ //
+ // Reduce the tile mode from 2D/3D to 1D in following conditions
+ //
+ switch (expTileMode)
+ {
+ case ADDR_TM_2D_TILED_THIN1: //fall through
+ case ADDR_TM_3D_TILED_THIN1:
+ case ADDR_TM_PRT_TILED_THIN1:
+ case ADDR_TM_PRT_2D_TILED_THIN1:
+ case ADDR_TM_PRT_3D_TILED_THIN1:
+ if ((pitch < pitchAlign) ||
+ (height < heightAlign) ||
+ (interleaveSize > threshold1) ||
+ (interleaveSize > threshold2))
+ {
+ expTileMode = ADDR_TM_1D_TILED_THIN1;
+ }
+ break;
+ case ADDR_TM_2D_TILED_THICK: //fall through
+ case ADDR_TM_3D_TILED_THICK:
+ case ADDR_TM_2D_TILED_XTHICK:
+ case ADDR_TM_3D_TILED_XTHICK:
+ case ADDR_TM_PRT_TILED_THICK:
+ case ADDR_TM_PRT_2D_TILED_THICK:
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ if ((pitch < pitchAlign) ||
+ (height < heightAlign))
+ {
+ expTileMode = ADDR_TM_1D_TILED_THICK;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return expTileMode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlDegradeBaseLevel
+* @brief
+* Check if degrade is needed for base level
+* @return
+* TRUE if degrade is suggested
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::HwlDegradeBaseLevel(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const
+{
+ BOOL_32 degrade = FALSE;
+ BOOL_32 valid = TRUE;
+
+ ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
+
+ UINT_32 baseAlign;
+ UINT_32 pitchAlign;
+ UINT_32 heightAlign;
+
+ ADDR_ASSERT(pIn->pTileInfo);
+ ADDR_TILEINFO tileInfo = *pIn->pTileInfo;
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
+
+ if (UseTileIndex(pIn->tileIndex))
+ {
+ out.tileIndex = pIn->tileIndex;
+ out.macroModeIndex = TileIndexInvalid;
+ }
+
+ HwlSetupTileInfo(pIn->tileMode,
+ pIn->flags,
+ pIn->bpp,
+ pIn->width,
+ pIn->height,
+ pIn->numSamples,
+ &tileInfo,
+ &tileInfo,
+ pIn->tileType,
+ &out);
+
+ valid = ComputeSurfaceAlignmentsMacroTiled(pIn->tileMode,
+ pIn->bpp,
+ pIn->flags,
+ pIn->mipLevel,
+ pIn->numSamples,
+ &tileInfo,
+ &baseAlign,
+ &pitchAlign,
+ &heightAlign);
+
+ if (valid)
+ {
+ degrade = (pIn->width < pitchAlign || pIn->height < heightAlign);
+ }
+ else
+ {
+ degrade = TRUE;
+ }
+
+ return degrade;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlDegradeThickTileMode
+*
+* @brief
+* Degrades valid tile mode for thick modes if needed
+*
+* @return
+* Suitable tile mode
+***************************************************************************************************
+*/
+AddrTileMode EgBasedAddrLib::HwlDegradeThickTileMode(
+ AddrTileMode baseTileMode, ///< [in] base tile mode
+ UINT_32 numSlices, ///< [in] current number of slices
+ UINT_32* pBytesPerTile ///< [in/out] pointer to bytes per slice
+ ) const
+{
+ ADDR_ASSERT(numSlices < ComputeSurfaceThickness(baseTileMode));
+ // if pBytesPerTile is NULL, this is a don't-care....
+ UINT_32 bytesPerTile = pBytesPerTile != NULL ? *pBytesPerTile : 64;
+
+ AddrTileMode expTileMode = baseTileMode;
+ switch (baseTileMode)
+ {
+ case ADDR_TM_1D_TILED_THICK:
+ expTileMode = ADDR_TM_1D_TILED_THIN1;
+ bytesPerTile >>= 2;
+ break;
+ case ADDR_TM_2D_TILED_THICK:
+ expTileMode = ADDR_TM_2D_TILED_THIN1;
+ bytesPerTile >>= 2;
+ break;
+ case ADDR_TM_3D_TILED_THICK:
+ expTileMode = ADDR_TM_3D_TILED_THIN1;
+ bytesPerTile >>= 2;
+ break;
+ case ADDR_TM_2D_TILED_XTHICK:
+ if (numSlices < ThickTileThickness)
+ {
+ expTileMode = ADDR_TM_2D_TILED_THIN1;
+ bytesPerTile >>= 3;
+ }
+ else
+ {
+ expTileMode = ADDR_TM_2D_TILED_THICK;
+ bytesPerTile >>= 1;
+ }
+ break;
+ case ADDR_TM_3D_TILED_XTHICK:
+ if (numSlices < ThickTileThickness)
+ {
+ expTileMode = ADDR_TM_3D_TILED_THIN1;
+ bytesPerTile >>= 3;
+ }
+ else
+ {
+ expTileMode = ADDR_TM_3D_TILED_THICK;
+ bytesPerTile >>= 1;
+ }
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+
+ if (pBytesPerTile != NULL)
+ {
+ *pBytesPerTile = bytesPerTile;
+ }
+
+ return expTileMode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::DispatchComputeSurfaceAddrFromCoord
+*
+* @brief
+* Compute surface address from given coord (x, y, slice,sample)
+*
+* @return
+* Address in bytes
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::DispatchComputeSurfaceAddrFromCoord(
+ const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ UINT_32 x = pIn->x;
+ UINT_32 y = pIn->y;
+ UINT_32 slice = pIn->slice;
+ UINT_32 sample = pIn->sample;
+ UINT_32 bpp = pIn->bpp;
+ UINT_32 pitch = pIn->pitch;
+ UINT_32 height = pIn->height;
+ UINT_32 numSlices = pIn->numSlices;
+ UINT_32 numSamples = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
+ UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
+ AddrTileMode tileMode = pIn->tileMode;
+ AddrTileType microTileType = pIn->tileType;
+ BOOL_32 ignoreSE = pIn->ignoreSE;
+ BOOL_32 isDepthSampleOrder = pIn->isDepth;
+ ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
+
+ UINT_32* pBitPosition = &pOut->bitPosition;
+ UINT_64 addr;
+
+#if ADDR_AM_BUILD
+ UINT_32 addr5Bit = 0;
+ UINT_32 addr5Swizzle = pIn->addr5Swizzle;
+ BOOL_32 is32ByteTile = pIn->is32ByteTile;
+#endif
+
+ // ADDR_DEPTH_SAMPLE_ORDER = non-disp + depth-sample-order
+ if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ isDepthSampleOrder = TRUE;
+ }
+
+ if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+ {
+ if (numFrags != numSamples)
+ {
+ numSamples = numFrags;
+ ADDR_ASSERT(sample < numSamples);
+ }
+
+ /// @note
+ /// 128 bit/thick tiled surface doesn't support display tiling and
+ /// mipmap chain must have the same tileType, so please fill tileType correctly
+ if (!IsLinear(pIn->tileMode))
+ {
+ if (bpp >= 128 || ComputeSurfaceThickness(tileMode) > 1)
+ {
+ ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
+ }
+ }
+ }
+
+ switch (tileMode)
+ {
+ case ADDR_TM_LINEAR_GENERAL://fall through
+ case ADDR_TM_LINEAR_ALIGNED:
+ addr = ComputeSurfaceAddrFromCoordLinear(x,
+ y,
+ slice,
+ sample,
+ bpp,
+ pitch,
+ height,
+ numSlices,
+ pBitPosition);
+ break;
+ case ADDR_TM_1D_TILED_THIN1://fall through
+ case ADDR_TM_1D_TILED_THICK:
+ addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
+ y,
+ slice,
+ sample,
+ bpp,
+ pitch,
+ height,
+ numSamples,
+ tileMode,
+ microTileType,
+ isDepthSampleOrder,
+ pBitPosition);
+ break;
+ case ADDR_TM_2D_TILED_THIN1: //fall through
+ case ADDR_TM_2D_TILED_THICK: //fall through
+ case ADDR_TM_3D_TILED_THIN1: //fall through
+ case ADDR_TM_3D_TILED_THICK: //fall through
+ case ADDR_TM_2D_TILED_XTHICK: //fall through
+ case ADDR_TM_3D_TILED_XTHICK: //fall through
+ case ADDR_TM_PRT_TILED_THIN1: //fall through
+ case ADDR_TM_PRT_2D_TILED_THIN1://fall through
+ case ADDR_TM_PRT_3D_TILED_THIN1://fall through
+ case ADDR_TM_PRT_TILED_THICK: //fall through
+ case ADDR_TM_PRT_2D_TILED_THICK://fall through
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ UINT_32 pipeSwizzle;
+ UINT_32 bankSwizzle;
+
+ if (m_configFlags.useCombinedSwizzle)
+ {
+ ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+ &bankSwizzle, &pipeSwizzle);
+ }
+ else
+ {
+ pipeSwizzle = pIn->pipeSwizzle;
+ bankSwizzle = pIn->bankSwizzle;
+ }
+
+ addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
+ y,
+ slice,
+ sample,
+ bpp,
+ pitch,
+ height,
+ numSamples,
+ tileMode,
+ microTileType,
+ ignoreSE,
+ isDepthSampleOrder,
+ pipeSwizzle,
+ bankSwizzle,
+ pTileInfo,
+ pBitPosition);
+ break;
+ default:
+ addr = 0;
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+
+#if ADDR_AM_BUILD
+ if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+ {
+ if (addr5Swizzle && isDepthSampleOrder && is32ByteTile)
+ {
+ UINT_32 tx = x >> 3;
+ UINT_32 ty = y >> 3;
+ UINT_32 tileBits = ((ty&0x3) << 2) | (tx&0x3);
+
+ tileBits = tileBits & addr5Swizzle;
+ addr5Bit = XorReduce(tileBits, 4);
+
+ addr = addr | static_cast(addr5Bit << 5);
+ }
+ }
+#endif
+
+ return addr;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled
+*
+* @brief
+* Computes the surface address and bit position from a
+* coordinate for 2D tilied (macro tiled)
+* @return
+* The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeSurfaceAddrFromCoordMacroTiled(
+ UINT_32 x, ///< [in] x coordinate
+ UINT_32 y, ///< [in] y coordinate
+ UINT_32 slice, ///< [in] slice index
+ UINT_32 sample, ///< [in] sample index
+ UINT_32 bpp, ///< [in] bits per pixel
+ UINT_32 pitch, ///< [in] surface pitch, in pixels
+ UINT_32 height, ///< [in] surface height, in pixels
+ UINT_32 numSamples, ///< [in] number of samples
+ AddrTileMode tileMode, ///< [in] tile mode
+ AddrTileType microTileType, ///< [in] micro tiling type
+ BOOL_32 ignoreSE, ///< [in] TRUE if shader enginers can be ignored
+ BOOL_32 isDepthSampleOrder, ///< [in] TRUE if it depth sample ordering is used
+ UINT_32 pipeSwizzle, ///< [in] pipe swizzle
+ UINT_32 bankSwizzle, ///< [in] bank swizzle
+ ADDR_TILEINFO* pTileInfo, ///< [in] bank structure
+ /// **All fields to be valid on entry**
+ UINT_32* pBitPosition ///< [out] bit position, e.g. FMT_1 will use this
+ ) const
+{
+ UINT_64 addr;
+
+ UINT_32 microTileBytes;
+ UINT_32 microTileBits;
+ UINT_32 sampleOffset;
+ UINT_32 pixelIndex;
+ UINT_32 pixelOffset;
+ UINT_32 elementOffset;
+ UINT_32 tileSplitSlice;
+ UINT_32 pipe;
+ UINT_32 bank;
+ UINT_64 sliceBytes;
+ UINT_64 sliceOffset;
+ UINT_32 macroTilePitch;
+ UINT_32 macroTileHeight;
+ UINT_32 macroTilesPerRow;
+ UINT_32 macroTilesPerSlice;
+ UINT_64 macroTileBytes;
+ UINT_32 macroTileIndexX;
+ UINT_32 macroTileIndexY;
+ UINT_64 macroTileOffset;
+ UINT_64 totalOffset;
+ UINT_64 pipeInterleaveMask;
+ UINT_64 bankInterleaveMask;
+ UINT_64 pipeInterleaveOffset;
+ UINT_32 bankInterleaveOffset;
+ UINT_64 offset;
+ UINT_32 tileRowIndex;
+ UINT_32 tileColumnIndex;
+ UINT_32 tileIndex;
+ UINT_32 tileOffset;
+
+ UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+ //
+ // Compute the number of group, pipe, and bank bits.
+ //
+ UINT_32 numPipes = HwlGetPipes(pTileInfo);
+ UINT_32 numPipeInterleaveBits = Log2(m_pipeInterleaveBytes);
+ UINT_32 numPipeBits = Log2(numPipes);
+ UINT_32 numBankInterleaveBits = Log2(m_bankInterleave);
+ UINT_32 numBankBits = Log2(pTileInfo->banks);
+
+ //
+ // Compute the micro tile size.
+ //
+ microTileBits = MicroTilePixels * microTileThickness * bpp * numSamples;
+
+ microTileBytes = microTileBits / 8;
+ //
+ // Compute the pixel index within the micro tile.
+ //
+ pixelIndex = ComputePixelIndexWithinMicroTile(x,
+ y,
+ slice,
+ bpp,
+ tileMode,
+ microTileType);
+
+ //
+ // Compute the sample offset and pixel offset.
+ //
+ if (isDepthSampleOrder)
+ {
+ //
+ // For depth surfaces, samples are stored contiguously for each element, so the sample
+ // offset is the sample number times the element size.
+ //
+ sampleOffset = sample * bpp;
+ pixelOffset = pixelIndex * bpp * numSamples;
+ }
+ else
+ {
+ //
+ // For color surfaces, all elements for a particular sample are stored contiguously, so
+ // the sample offset is the sample number times the micro tile size divided yBit the number
+ // of samples.
+ //
+ sampleOffset = sample * (microTileBits / numSamples);
+ pixelOffset = pixelIndex * bpp;
+ }
+
+ //
+ // Compute the element offset.
+ //
+ elementOffset = pixelOffset + sampleOffset;
+
+ *pBitPosition = static_cast(elementOffset % 8);
+
+ elementOffset /= 8; //bit-to-byte
+
+ //
+ // Determine if tiles need to be split across slices.
+ //
+ // If the size of the micro tile is larger than the tile split size, then the tile will be
+ // split across multiple slices.
+ //
+ UINT_32 slicesPerTile = 1;
+
+ if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
+ { //don't support for thick mode
+
+ //
+ // Compute the number of slices per tile.
+ //
+ slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
+
+ //
+ // Compute the tile split slice number for use in rotating the bank.
+ //
+ tileSplitSlice = elementOffset / pTileInfo->tileSplitBytes;
+
+ //
+ // Adjust the element offset to account for the portion of the tile that is being moved to
+ // a new slice..
+ //
+ elementOffset %= pTileInfo->tileSplitBytes;
+
+ //
+ // Adjust the microTileBytes size to tileSplitBytes size since
+ // a new slice..
+ //
+ microTileBytes = pTileInfo->tileSplitBytes;
+ }
+ else
+ {
+ tileSplitSlice = 0;
+ }
+
+ //
+ // Compute macro tile pitch and height.
+ //
+ macroTilePitch =
+ (MicroTileWidth * pTileInfo->bankWidth * numPipes) * pTileInfo->macroAspectRatio;
+ macroTileHeight =
+ (MicroTileHeight * pTileInfo->bankHeight * pTileInfo->banks) / pTileInfo->macroAspectRatio;
+
+ //
+ // Compute the number of bytes per macro tile. Note: bytes of the same bank/pipe actually
+ //
+ macroTileBytes =
+ static_cast(microTileBytes) *
+ (macroTilePitch / MicroTileWidth) * (macroTileHeight / MicroTileHeight) /
+ (numPipes * pTileInfo->banks);
+
+ //
+ // Compute the number of macro tiles per row.
+ //
+ macroTilesPerRow = pitch / macroTilePitch;
+
+ //
+ // Compute the offset to the macro tile containing the specified coordinate.
+ //
+ macroTileIndexX = x / macroTilePitch;
+ macroTileIndexY = y / macroTileHeight;
+ macroTileOffset = ((macroTileIndexY * macroTilesPerRow) + macroTileIndexX) * macroTileBytes;
+
+ //
+ // Compute the number of macro tiles per slice.
+ //
+ macroTilesPerSlice = macroTilesPerRow * (height / macroTileHeight);
+
+ //
+ // Compute the slice size.
+ //
+ sliceBytes = macroTilesPerSlice * macroTileBytes;
+
+ //
+ // Compute the slice offset.
+ //
+ sliceOffset = sliceBytes * (tileSplitSlice + slicesPerTile * (slice / microTileThickness));
+
+ //
+ // Compute tile offest
+ //
+ tileRowIndex = (y / MicroTileHeight) % pTileInfo->bankHeight;
+ tileColumnIndex = ((x / MicroTileWidth) / numPipes) % pTileInfo->bankWidth;
+ tileIndex = (tileRowIndex * pTileInfo->bankWidth) + tileColumnIndex;
+ tileOffset = tileIndex * microTileBytes;
+
+ //
+ // Combine the slice offset and macro tile offset with the pixel and sample offsets, accounting
+ // for the pipe and bank bits in the middle of the address.
+ //
+ totalOffset = sliceOffset + macroTileOffset + elementOffset + tileOffset;
+
+ //
+ // Get the pipe and bank.
+ //
+
+ // when the tileMode is PRT type, then adjust x and y coordinates
+ if (IsPrtNoRotationTileMode(tileMode))
+ {
+ x = x % macroTilePitch;
+ y = y % macroTileHeight;
+ }
+
+ pipe = ComputePipeFromCoord(x,
+ y,
+ slice,
+ tileMode,
+ pipeSwizzle,
+ ignoreSE,
+ pTileInfo);
+
+ bank = ComputeBankFromCoord(x,
+ y,
+ slice,
+ tileMode,
+ bankSwizzle,
+ tileSplitSlice,
+ pTileInfo);
+
+
+ //
+ // Split the offset to put some bits below the pipe+bank bits and some above.
+ //
+ pipeInterleaveMask = (1 << numPipeInterleaveBits) - 1;
+ bankInterleaveMask = (1 << numBankInterleaveBits) - 1;
+ pipeInterleaveOffset = totalOffset & pipeInterleaveMask;
+ bankInterleaveOffset = static_cast((totalOffset >> numPipeInterleaveBits) &
+ bankInterleaveMask);
+ offset = totalOffset >> (numPipeInterleaveBits + numBankInterleaveBits);
+
+ //
+ // Assemble the address from its components.
+ //
+ addr = pipeInterleaveOffset;
+ // This is to remove /analyze warnings
+ UINT_32 pipeBits = pipe << numPipeInterleaveBits;
+ UINT_32 bankInterleaveBits = bankInterleaveOffset << (numPipeInterleaveBits + numPipeBits);
+ UINT_32 bankBits = bank << (numPipeInterleaveBits + numPipeBits +
+ numBankInterleaveBits);
+ UINT_64 offsetBits = offset << (numPipeInterleaveBits + numPipeBits +
+ numBankInterleaveBits + numBankBits);
+
+ addr |= pipeBits;
+ addr |= bankInterleaveBits;
+ addr |= bankBits;
+ addr |= offsetBits;
+
+ return addr;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled
+*
+* @brief
+* Computes the surface address and bit position from a coordinate for 1D tilied
+* (micro tiled)
+* @return
+* The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeSurfaceAddrFromCoordMicroTiled(
+ UINT_32 x, ///< [in] x coordinate
+ UINT_32 y, ///< [in] y coordinate
+ UINT_32 slice, ///< [in] slice index
+ UINT_32 sample, ///< [in] sample index
+ UINT_32 bpp, ///< [in] bits per pixel
+ UINT_32 pitch, ///< [in] pitch, in pixels
+ UINT_32 height, ///< [in] height, in pixels
+ UINT_32 numSamples, ///< [in] number of samples
+ AddrTileMode tileMode, ///< [in] tile mode
+ AddrTileType microTileType, ///< [in] micro tiling type
+ BOOL_32 isDepthSampleOrder, ///< [in] TRUE if depth sample ordering is used
+ UINT_32* pBitPosition ///< [out] bit position, e.g. FMT_1 will use this
+ ) const
+{
+ UINT_64 addr = 0;
+
+ UINT_32 microTileBytes;
+ UINT_64 sliceBytes;
+ UINT_32 microTilesPerRow;
+ UINT_32 microTileIndexX;
+ UINT_32 microTileIndexY;
+ UINT_32 microTileIndexZ;
+ UINT_64 sliceOffset;
+ UINT_64 microTileOffset;
+ UINT_32 sampleOffset;
+ UINT_32 pixelIndex;
+ UINT_32 pixelOffset;
+
+ UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+ //
+ // Compute the micro tile size.
+ //
+ microTileBytes = BITS_TO_BYTES(MicroTilePixels * microTileThickness * bpp * numSamples);
+
+ //
+ // Compute the slice size.
+ //
+ sliceBytes =
+ BITS_TO_BYTES(static_cast(pitch) * height * microTileThickness * bpp * numSamples);
+
+ //
+ // Compute the number of micro tiles per row.
+ //
+ microTilesPerRow = pitch / MicroTileWidth;
+
+ //
+ // Compute the micro tile index.
+ //
+ microTileIndexX = x / MicroTileWidth;
+ microTileIndexY = y / MicroTileHeight;
+ microTileIndexZ = slice / microTileThickness;
+
+ //
+ // Compute the slice offset.
+ //
+ sliceOffset = static_cast(microTileIndexZ) * sliceBytes;
+
+ //
+ // Compute the offset to the micro tile containing the specified coordinate.
+ //
+ microTileOffset = (static_cast(microTileIndexY) * microTilesPerRow + microTileIndexX) *
+ microTileBytes;
+
+ //
+ // Compute the pixel index within the micro tile.
+ //
+ pixelIndex = ComputePixelIndexWithinMicroTile(x,
+ y,
+ slice,
+ bpp,
+ tileMode,
+ microTileType);
+
+ // Compute the sample offset.
+ //
+ if (isDepthSampleOrder)
+ {
+ //
+ // For depth surfaces, samples are stored contiguously for each element, so the sample
+ // offset is the sample number times the element size.
+ //
+ sampleOffset = sample * bpp;
+ pixelOffset = pixelIndex * bpp * numSamples;
+ }
+ else
+ {
+ //
+ // For color surfaces, all elements for a particular sample are stored contiguously, so
+ // the sample offset is the sample number times the micro tile size divided yBit the number
+ // of samples.
+ //
+ sampleOffset = sample * (microTileBytes*8 / numSamples);
+ pixelOffset = pixelIndex * bpp;
+ }
+
+ //
+ // Compute the bit position of the pixel. Each element is stored with one bit per sample.
+ //
+
+ UINT_32 elemOffset = sampleOffset + pixelOffset;
+
+ *pBitPosition = elemOffset % 8;
+ elemOffset /= 8;
+
+ //
+ // Combine the slice offset, micro tile offset, sample offset, and pixel offsets.
+ //
+ addr = sliceOffset + microTileOffset + elemOffset;
+
+ return addr;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputePixelCoordFromOffset
+*
+* @brief
+* Compute pixel coordinate from offset inside a micro tile
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::HwlComputePixelCoordFromOffset(
+ UINT_32 offset, ///< [in] offset inside micro tile in bits
+ UINT_32 bpp, ///< [in] bits per pixel
+ UINT_32 numSamples, ///< [in] number of samples
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 tileBase, ///< [in] base offset within a tile
+ UINT_32 compBits, ///< [in] component bits actually needed(for planar surface)
+ UINT_32* pX, ///< [out] x coordinate
+ UINT_32* pY, ///< [out] y coordinate
+ UINT_32* pSlice, ///< [out] slice index
+ UINT_32* pSample, ///< [out] sample index
+ AddrTileType microTileType, ///< [in] micro tiling type
+ BOOL_32 isDepthSampleOrder ///< [in] TRUE if depth sample order in microtile is used
+ ) const
+{
+ UINT_32 x = 0;
+ UINT_32 y = 0;
+ UINT_32 z = 0;
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+
+ // For planar surface, we adjust offset acoording to tile base
+ if ((bpp != compBits) && (compBits != 0) && isDepthSampleOrder)
+ {
+ offset -= tileBase;
+
+ ADDR_ASSERT(microTileType == ADDR_NON_DISPLAYABLE ||
+ microTileType == ADDR_DEPTH_SAMPLE_ORDER);
+
+ bpp = compBits;
+ }
+
+ UINT_32 sampleTileBits;
+ UINT_32 samplePixelBits;
+ UINT_32 pixelIndex;
+
+ if (isDepthSampleOrder)
+ {
+ samplePixelBits = bpp * numSamples;
+ pixelIndex = offset / samplePixelBits;
+ *pSample = (offset % samplePixelBits) / bpp;
+ }
+ else
+ {
+ sampleTileBits = MicroTilePixels * bpp * thickness;
+ *pSample = offset / sampleTileBits;
+ pixelIndex = (offset % sampleTileBits) / bpp;
+ }
+
+ if (microTileType != ADDR_THICK)
+ {
+ if (microTileType == ADDR_DISPLAYABLE) // displayable
+ {
+ switch (bpp)
+ {
+ case 8:
+ x = pixelIndex & 0x7;
+ y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
+ break;
+ case 16:
+ x = pixelIndex & 0x7;
+ y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
+ break;
+ case 32:
+ x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
+ y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
+ break;
+ case 64:
+ x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+ y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
+ break;
+ case 128:
+ x = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,2),_BIT(pixelIndex,1));
+ y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,0));
+ break;
+ default:
+ break;
+ }
+ }
+ else if (microTileType == ADDR_NON_DISPLAYABLE || microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+ y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
+ }
+ else if (microTileType == ADDR_ROTATED)
+ {
+ /*
+ 8-Bit Elements
+ element_index[5:0] = { x[2], x[0], x[1], y[2], y[1], y[0] }
+
+ 16-Bit Elements
+ element_index[5:0] = { x[2], x[1], x[0], y[2], y[1], y[0] }
+
+ 32-Bit Elements
+ element_index[5:0] = { x[2], x[1], y[2], x[0], y[1], y[0] }
+
+ 64-Bit Elements
+ element_index[5:0] = { y[2], x[2], x[1], y[1], x[0], y[0] }
+ */
+ switch(bpp)
+ {
+ case 8:
+ x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,3),_BIT(pixelIndex,4));
+ y = pixelIndex & 0x7;
+ break;
+ case 16:
+ x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,3));
+ y = pixelIndex & 0x7;
+ break;
+ case 32:
+ x = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,4),_BIT(pixelIndex,2));
+ y = Bits2Number(3, _BIT(pixelIndex,3),_BIT(pixelIndex,1),_BIT(pixelIndex,0));
+ break;
+ case 64:
+ x = Bits2Number(3, _BIT(pixelIndex,4),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
+ y = Bits2Number(3, _BIT(pixelIndex,5),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+ }
+
+ if (thickness > 1) // thick
+ {
+ z = Bits2Number(3, _BIT(pixelIndex,8),_BIT(pixelIndex,7),_BIT(pixelIndex,6));
+ }
+ }
+ else
+ {
+ ADDR_ASSERT((m_chipFamily >= ADDR_CHIP_FAMILY_CI) && (thickness > 1));
+ /*
+ 8-Bit Elements and 16-Bit Elements
+ element_index[7:0] = { y[2], x[2], z[1], z[0], y[1], x[1], y[0], x[0] }
+
+ 32-Bit Elements
+ element_index[7:0] = { y[2], x[2], z[1], y[1], z[0], x[1], y[0], x[0] }
+
+ 64-Bit Elements and 128-Bit Elements
+ element_index[7:0] = { y[2], x[2], z[1], y[1], x[1], z[0], y[0], x[0] }
+
+ The equation to compute the element index for the extra thick tile:
+ element_index[8] = z[2]
+ */
+ switch (bpp)
+ {
+ case 8:
+ case 16: // fall-through
+ x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+ y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,3),_BIT(pixelIndex,1));
+ z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,4));
+ break;
+ case 32:
+ x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,2),_BIT(pixelIndex,0));
+ y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
+ z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,3));
+ break;
+ case 64:
+ case 128: // fall-through
+ x = Bits2Number(3, _BIT(pixelIndex,6),_BIT(pixelIndex,3),_BIT(pixelIndex,0));
+ y = Bits2Number(3, _BIT(pixelIndex,7),_BIT(pixelIndex,4),_BIT(pixelIndex,1));
+ z = Bits2Number(2, _BIT(pixelIndex,5),_BIT(pixelIndex,2));
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+
+ if (thickness == 8)
+ {
+ z += Bits2Number(3,_BIT(pixelIndex,8),0,0);
+ }
+ }
+
+ *pX = x;
+ *pY = y;
+ *pSlice += z;
+}
+
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::DispatchComputeSurfaceCoordFromAddrDispatch
+*
+* @brief
+* Compute (x,y,slice,sample) coordinates from surface address
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::DispatchComputeSurfaceCoordFromAddr(
+ const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ UINT_64 addr = pIn->addr;
+ UINT_32 bitPosition = pIn->bitPosition;
+ UINT_32 bpp = pIn->bpp;
+ UINT_32 pitch = pIn->pitch;
+ UINT_32 height = pIn->height;
+ UINT_32 numSlices = pIn->numSlices;
+ UINT_32 numSamples = ((pIn->numSamples == 0) ? 1 : pIn->numSamples);
+ UINT_32 numFrags = ((pIn->numFrags == 0) ? numSamples : pIn->numFrags);
+ AddrTileMode tileMode = pIn->tileMode;
+ UINT_32 tileBase = pIn->tileBase;
+ UINT_32 compBits = pIn->compBits;
+ AddrTileType microTileType = pIn->tileType;
+ BOOL_32 ignoreSE = pIn->ignoreSE;
+ BOOL_32 isDepthSampleOrder = pIn->isDepth;
+ ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
+
+ UINT_32* pX = &pOut->x;
+ UINT_32* pY = &pOut->y;
+ UINT_32* pSlice = &pOut->slice;
+ UINT_32* pSample = &pOut->sample;
+
+ if (microTileType == ADDR_DEPTH_SAMPLE_ORDER)
+ {
+ isDepthSampleOrder = TRUE;
+ }
+
+ if (m_chipFamily >= ADDR_CHIP_FAMILY_NI)
+ {
+ if (numFrags != numSamples)
+ {
+ numSamples = numFrags;
+ }
+
+ /// @note
+ /// 128 bit/thick tiled surface doesn't support display tiling and
+ /// mipmap chain must have the same tileType, so please fill tileType correctly
+ if (!IsLinear(pIn->tileMode))
+ {
+ if (bpp >= 128 || ComputeSurfaceThickness(tileMode) > 1)
+ {
+ ADDR_ASSERT(microTileType != ADDR_DISPLAYABLE);
+ }
+ }
+ }
+
+ switch (tileMode)
+ {
+ case ADDR_TM_LINEAR_GENERAL://fall through
+ case ADDR_TM_LINEAR_ALIGNED:
+ ComputeSurfaceCoordFromAddrLinear(addr,
+ bitPosition,
+ bpp,
+ pitch,
+ height,
+ numSlices,
+ pX,
+ pY,
+ pSlice,
+ pSample);
+ break;
+ case ADDR_TM_1D_TILED_THIN1://fall through
+ case ADDR_TM_1D_TILED_THICK:
+ ComputeSurfaceCoordFromAddrMicroTiled(addr,
+ bitPosition,
+ bpp,
+ pitch,
+ height,
+ numSamples,
+ tileMode,
+ tileBase,
+ compBits,
+ pX,
+ pY,
+ pSlice,
+ pSample,
+ microTileType,
+ isDepthSampleOrder);
+ break;
+ case ADDR_TM_2D_TILED_THIN1: //fall through
+ case ADDR_TM_2D_TILED_THICK: //fall through
+ case ADDR_TM_3D_TILED_THIN1: //fall through
+ case ADDR_TM_3D_TILED_THICK: //fall through
+ case ADDR_TM_2D_TILED_XTHICK: //fall through
+ case ADDR_TM_3D_TILED_XTHICK: //fall through
+ case ADDR_TM_PRT_TILED_THIN1: //fall through
+ case ADDR_TM_PRT_2D_TILED_THIN1://fall through
+ case ADDR_TM_PRT_3D_TILED_THIN1://fall through
+ case ADDR_TM_PRT_TILED_THICK: //fall through
+ case ADDR_TM_PRT_2D_TILED_THICK://fall through
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ UINT_32 pipeSwizzle;
+ UINT_32 bankSwizzle;
+
+ if (m_configFlags.useCombinedSwizzle)
+ {
+ ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+ &bankSwizzle, &pipeSwizzle);
+ }
+ else
+ {
+ pipeSwizzle = pIn->pipeSwizzle;
+ bankSwizzle = pIn->bankSwizzle;
+ }
+
+ ComputeSurfaceCoordFromAddrMacroTiled(addr,
+ bitPosition,
+ bpp,
+ pitch,
+ height,
+ numSamples,
+ tileMode,
+ tileBase,
+ compBits,
+ microTileType,
+ ignoreSE,
+ isDepthSampleOrder,
+ pipeSwizzle,
+ bankSwizzle,
+ pTileInfo,
+ pX,
+ pY,
+ pSlice,
+ pSample);
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ }
+}
+
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceCoordFromAddrMacroTiled
+*
+* @brief
+* Compute surface coordinates from address for macro tiled surface
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ComputeSurfaceCoordFromAddrMacroTiled(
+ UINT_64 addr, ///< [in] byte address
+ UINT_32 bitPosition, ///< [in] bit position
+ UINT_32 bpp, ///< [in] bits per pixel
+ UINT_32 pitch, ///< [in] pitch in pixels
+ UINT_32 height, ///< [in] height in pixels
+ UINT_32 numSamples, ///< [in] number of samples
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 tileBase, ///< [in] tile base offset
+ UINT_32 compBits, ///< [in] component bits (for planar surface)
+ AddrTileType microTileType, ///< [in] micro tiling type
+ BOOL_32 ignoreSE, ///< [in] TRUE if shader engines can be ignored
+ BOOL_32 isDepthSampleOrder, ///< [in] TRUE if depth sample order is used
+ UINT_32 pipeSwizzle, ///< [in] pipe swizzle
+ UINT_32 bankSwizzle, ///< [in] bank swizzle
+ ADDR_TILEINFO* pTileInfo, ///< [in] bank structure.
+ /// **All fields to be valid on entry**
+ UINT_32* pX, ///< [out] X coord
+ UINT_32* pY, ///< [out] Y coord
+ UINT_32* pSlice, ///< [out] slice index
+ UINT_32* pSample ///< [out] sample index
+ ) const
+{
+ UINT_32 mx;
+ UINT_32 my;
+ UINT_64 tileBits;
+ UINT_64 macroTileBits;
+ UINT_32 slices;
+ UINT_32 tileSlices;
+ UINT_64 elementOffset;
+ UINT_64 macroTileIndex;
+ UINT_32 tileIndex;
+ UINT_64 totalOffset;
+
+
+ UINT_32 bank;
+ UINT_32 pipe;
+ UINT_32 groupBits = m_pipeInterleaveBytes << 3;
+ UINT_32 pipes = HwlGetPipes(pTileInfo);
+ UINT_32 banks = pTileInfo->banks;
+
+ UINT_32 bankInterleave = m_bankInterleave;
+
+ UINT_64 addrBits = BYTES_TO_BITS(addr) + bitPosition;
+
+ //
+ // remove bits for bank and pipe
+ //
+ totalOffset = (addrBits % groupBits) +
+ (((addrBits / groupBits / pipes) % bankInterleave) * groupBits) +
+ (((addrBits / groupBits / pipes) / bankInterleave) / banks) * groupBits * bankInterleave;
+
+ UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+ UINT_32 microTileBits = bpp * microTileThickness * MicroTilePixels * numSamples;
+
+ UINT_32 microTileBytes = BITS_TO_BYTES(microTileBits);
+ //
+ // Determine if tiles need to be split across slices.
+ //
+ // If the size of the micro tile is larger than the tile split size, then the tile will be
+ // split across multiple slices.
+ //
+ UINT_32 slicesPerTile = 1; //_State->TileSlices
+
+ if ((microTileBytes > pTileInfo->tileSplitBytes) && (microTileThickness == 1))
+ { //don't support for thick mode
+
+ //
+ // Compute the number of slices per tile.
+ //
+ slicesPerTile = microTileBytes / pTileInfo->tileSplitBytes;
+ }
+
+ tileBits = microTileBits / slicesPerTile; // micro tile bits
+
+ // in micro tiles because not MicroTileWidth timed.
+ UINT_32 macroWidth = pTileInfo->bankWidth * pipes * pTileInfo->macroAspectRatio;
+ // in micro tiles as well
+ UINT_32 macroHeight = pTileInfo->bankHeight * banks / pTileInfo->macroAspectRatio;
+
+ UINT_32 pitchInMacroTiles = pitch / MicroTileWidth / macroWidth;
+
+ macroTileBits = (macroWidth * macroHeight) * tileBits / (banks * pipes);
+
+ macroTileIndex = totalOffset / macroTileBits;
+
+ // pitchMacros * height / heightMacros; macroTilesPerSlice == _State->SliceMacros
+ UINT_32 macroTilesPerSlice = (pitch / (macroWidth * MicroTileWidth)) * height /
+ (macroHeight * MicroTileWidth);
+
+ slices = static_cast(macroTileIndex / macroTilesPerSlice);
+
+ *pSlice = static_cast(slices / slicesPerTile * microTileThickness);
+
+ //
+ // calculate element offset and x[2:0], y[2:0], z[1:0] for thick
+ //
+ tileSlices = slices % slicesPerTile;
+
+ elementOffset = tileSlices * tileBits;
+ elementOffset += totalOffset % tileBits;
+
+ UINT_32 coordZ = 0;
+
+ HwlComputePixelCoordFromOffset(static_cast(elementOffset),
+ bpp,
+ numSamples,
+ tileMode,
+ tileBase,
+ compBits,
+ pX,
+ pY,
+ &coordZ,
+ pSample,
+ microTileType,
+ isDepthSampleOrder);
+
+ macroTileIndex = macroTileIndex % macroTilesPerSlice;
+ *pY += static_cast(macroTileIndex / pitchInMacroTiles * macroHeight * MicroTileHeight);
+ *pX += static_cast(macroTileIndex % pitchInMacroTiles * macroWidth * MicroTileWidth);
+
+ *pSlice += coordZ;
+
+ tileIndex = static_cast((totalOffset % macroTileBits) / tileBits);
+
+ my = (tileIndex / pTileInfo->bankWidth) % pTileInfo->bankHeight * MicroTileHeight;
+ mx = (tileIndex % pTileInfo->bankWidth) * pipes * MicroTileWidth;
+
+ *pY += my;
+ *pX += mx;
+
+ bank = ComputeBankFromAddr(addr, banks, pipes);
+ pipe = ComputePipeFromAddr(addr, pipes);
+
+ HwlComputeSurfaceCoord2DFromBankPipe(tileMode,
+ pX,
+ pY,
+ *pSlice,
+ bank,
+ pipe,
+ bankSwizzle,
+ pipeSwizzle,
+ tileSlices,
+ ignoreSE,
+ pTileInfo);
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSurfaceCoord2DFromBankPipe
+*
+* @brief
+* Compute surface x,y coordinates from bank/pipe info
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ComputeSurfaceCoord2DFromBankPipe(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 x, ///< [in] x coordinate
+ UINT_32 y, ///< [in] y coordinate
+ UINT_32 slice, ///< [in] slice index
+ UINT_32 bank, ///< [in] bank number
+ UINT_32 pipe, ///< [in] pipe number
+ UINT_32 bankSwizzle,///< [in] bank swizzle
+ UINT_32 pipeSwizzle,///< [in] pipe swizzle
+ UINT_32 tileSlices, ///< [in] slices in a micro tile
+ ADDR_TILEINFO* pTileInfo, ///< [in] bank structure. **All fields to be valid on entry**
+ CoordFromBankPipe* pOutput ///< [out] pointer to extracted x/y bits
+ ) const
+{
+ UINT_32 yBit3 = 0;
+ UINT_32 yBit4 = 0;
+ UINT_32 yBit5 = 0;
+ UINT_32 yBit6 = 0;
+
+ UINT_32 xBit3 = 0;
+ UINT_32 xBit4 = 0;
+ UINT_32 xBit5 = 0;
+
+ UINT_32 tileSplitRotation;
+
+ UINT_32 numPipes = HwlGetPipes(pTileInfo);
+
+ UINT_32 bankRotation = ComputeBankRotation(tileMode,
+ pTileInfo->banks, numPipes);
+
+ UINT_32 pipeRotation = ComputePipeRotation(tileMode, numPipes);
+
+ UINT_32 xBit = x / (MicroTileWidth * pTileInfo->bankWidth * numPipes);
+ UINT_32 yBit = y / (MicroTileHeight * pTileInfo->bankHeight);
+
+ //calculate the bank and pipe before rotation and swizzle
+
+ switch (tileMode)
+ {
+ case ADDR_TM_2D_TILED_THIN1: //fall through
+ case ADDR_TM_2D_TILED_THICK: //fall through
+ case ADDR_TM_2D_TILED_XTHICK: //fall through
+ case ADDR_TM_3D_TILED_THIN1: //fall through
+ case ADDR_TM_3D_TILED_THICK: //fall through
+ case ADDR_TM_3D_TILED_XTHICK:
+ tileSplitRotation = ((pTileInfo->banks / 2) + 1);
+ break;
+ default:
+ tileSplitRotation = 0;
+ break;
+ }
+
+ UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+ bank ^= tileSplitRotation * tileSlices;
+ if (pipeRotation == 0)
+ {
+ bank ^= bankRotation * (slice / microTileThickness) + bankSwizzle;
+ bank %= pTileInfo->banks;
+ pipe ^= pipeSwizzle;
+ }
+ else
+ {
+ bank ^= bankRotation * (slice / microTileThickness) / numPipes + bankSwizzle;
+ bank %= pTileInfo->banks;
+ pipe ^= pipeRotation * (slice / microTileThickness) + pipeSwizzle;
+ }
+
+ if (pTileInfo->macroAspectRatio == 1)
+ {
+ switch (pTileInfo->banks)
+ {
+ case 2:
+ yBit3 = _BIT(bank, 0) ^ _BIT(xBit,0);
+ break;
+ case 4:
+ yBit4 = _BIT(bank, 0) ^ _BIT(xBit,0);
+ yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
+ break;
+ case 8:
+ yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
+ yBit5 = _BIT(bank, 0) ^ _BIT(xBit,0);
+ yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ yBit5;
+ break;
+ case 16:
+ yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3);
+ yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2);
+ yBit6 = _BIT(bank, 0) ^ _BIT(xBit, 0);
+ yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ yBit6;
+ break;
+ default:
+ break;
+ }
+
+ }
+ else if (pTileInfo->macroAspectRatio == 2)
+ {
+ switch (pTileInfo->banks)
+ {
+ case 2: //xBit3 = yBit3^b0
+ xBit3 = _BIT(bank, 0) ^ _BIT(yBit,0);
+ break;
+ case 4: //xBit3=yBit4^b0; yBit3=xBit4^b1
+ xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
+ yBit3 = _BIT(bank, 1) ^ _BIT(xBit,1);
+ break;
+ case 8: //xBit4, xBit5, yBit5 are known
+ xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
+ yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
+ yBit4 = _BIT(bank, 1) ^ _BIT(xBit,1) ^ _BIT(yBit, 2);
+ break;
+ case 16://x4,x5,x6,y6 are known
+ xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3); //x3 = y6 ^ b0
+ yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
+ yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = x5 ^ b2
+ yBit5 = _BIT(bank, 1) ^ _BIT(xBit, 1) ^ _BIT(yBit, 3); //y5=x4^y6^b1
+ break;
+ default:
+ break;
+ }
+ }
+ else if (pTileInfo->macroAspectRatio == 4)
+ {
+ switch (pTileInfo->banks)
+ {
+ case 4: //yBit3, yBit4
+ xBit3 = _BIT(bank, 0) ^ _BIT(yBit,1);
+ xBit4 = _BIT(bank, 1) ^ _BIT(yBit,0);
+ break;
+ case 8: //xBit5, yBit4, yBit5
+ xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2);
+ yBit3 = _BIT(bank, 2) ^ _BIT(xBit,2);
+ xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit,2);
+ break;
+ case 16: //xBit5, xBit6, yBit5, yBit6
+ xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = b0 ^ y6
+ xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = b1 ^ y5 ^ y6;
+ yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = b3 ^ x6;
+ yBit4 = _BIT(bank, 2) ^ _BIT(xBit, 2); //y4 = b2 ^ x5;
+ break;
+ default:
+ break;
+ }
+ }
+ else if (pTileInfo->macroAspectRatio == 8)
+ {
+ switch (pTileInfo->banks)
+ {
+ case 8: //yBit3, yBit4, yBit5
+ xBit3 = _BIT(bank, 0) ^ _BIT(yBit,2); //x3 = b0 ^ y5;
+ xBit4 = _BIT(bank, 1) ^ _BIT(yBit,1) ^ _BIT(yBit, 2);//x4 = b1 ^ y4 ^ y5;
+ xBit5 = _BIT(bank, 2) ^ _BIT(yBit,0);
+ break;
+ case 16: //xBit6, yBit4, yBit5, yBit6
+ xBit3 = _BIT(bank, 0) ^ _BIT(yBit, 3);//x3 = y6 ^ b0
+ xBit4 = _BIT(bank, 1) ^ _BIT(yBit, 2) ^ _BIT(yBit, 3);//x4 = y5 ^ y6 ^ b1
+ xBit5 = _BIT(bank, 2) ^ _BIT(yBit, 1);//x5 = y4 ^ b2
+ yBit3 = _BIT(bank, 3) ^ _BIT(xBit, 3); //y3 = x6 ^ b3
+ break;
+ default:
+ break;
+ }
+ }
+
+ pOutput->xBits = xBit;
+ pOutput->yBits = yBit;
+
+ pOutput->xBit3 = xBit3;
+ pOutput->xBit4 = xBit4;
+ pOutput->xBit5 = xBit5;
+ pOutput->yBit3 = yBit3;
+ pOutput->yBit4 = yBit4;
+ pOutput->yBit5 = yBit5;
+ pOutput->yBit6 = yBit6;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlExtractBankPipeSwizzle
+* @brief
+* Entry of EgBasedAddrLib ExtractBankPipeSwizzle
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlExtractBankPipeSwizzle(
+ const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn, ///< [in] input structure
+ ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ExtractBankPipeSwizzle(pIn->base256b,
+ pIn->pTileInfo,
+ &pOut->bankSwizzle,
+ &pOut->pipeSwizzle);
+
+ return ADDR_OK;
+}
+
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlCombineBankPipeSwizzle
+* @brief
+* Combine bank/pipe swizzle
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlCombineBankPipeSwizzle(
+ UINT_32 bankSwizzle, ///< [in] bank swizzle
+ UINT_32 pipeSwizzle, ///< [in] pipe swizzle
+ ADDR_TILEINFO* pTileInfo, ///< [in] tile info
+ UINT_64 baseAddr, ///< [in] base address
+ UINT_32* pTileSwizzle ///< [out] combined swizzle
+ ) const
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ if (pTileSwizzle)
+ {
+ *pTileSwizzle = GetBankPipeSwizzle(bankSwizzle, pipeSwizzle, baseAddr, pTileInfo);
+ }
+ else
+ {
+ retCode = ADDR_INVALIDPARAMS;
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeBaseSwizzle
+* @brief
+* Compute base swizzle
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeBaseSwizzle(
+ const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut
+ ) const
+{
+ UINT_32 bankSwizzle = 0;
+ UINT_32 pipeSwizzle = 0;
+ ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
+
+ ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
+ ADDR_ASSERT(pIn->pTileInfo);
+
+ /// This is a legacy misreading of h/w doc, use it as it doesn't hurt.
+ static const UINT_8 bankRotationArray[4][16] = {
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // ADDR_SURF_2_BANK
+ { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // ADDR_SURF_4_BANK
+ { 0, 3, 6, 1, 4, 7, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0 }, // ADDR_SURF_8_BANK
+ { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 }, // ADDR_SURF_16_BANK
+ };
+
+ UINT_32 banks = pTileInfo ? pTileInfo->banks : 2;
+ UINT_32 hwNumBanks;
+
+ // Uses less bank swizzle bits
+ if (pIn->option.reduceBankBit && banks > 2)
+ {
+ banks >>= 1;
+ }
+
+ switch (banks)
+ {
+ case 2:
+ hwNumBanks = 0;
+ break;
+ case 4:
+ hwNumBanks = 1;
+ break;
+ case 8:
+ hwNumBanks = 2;
+ break;
+ case 16:
+ hwNumBanks = 3;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ hwNumBanks = 0;
+ break;
+ }
+
+ if (pIn->option.genOption == ADDR_SWIZZLE_GEN_LINEAR)
+ {
+ bankSwizzle = pIn->surfIndex & (banks - 1);
+ }
+ else // (pIn->option.genOption == ADDR_SWIZZLE_GEN_DEFAULT)
+ {
+ bankSwizzle = bankRotationArray[hwNumBanks][pIn->surfIndex & (banks - 1)];
+ }
+
+ if (IsMacro3dTiled(pIn->tileMode))
+ {
+ pipeSwizzle = pIn->surfIndex & (HwlGetPipes(pTileInfo) - 1);
+ }
+
+ return HwlCombineBankPipeSwizzle(bankSwizzle, pipeSwizzle, pTileInfo, 0, &pOut->tileSwizzle);
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ExtractBankPipeSwizzle
+* @brief
+* Extract bank/pipe swizzle from base256b
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ExtractBankPipeSwizzle(
+ UINT_32 base256b, ///< [in] input base256b register value
+ ADDR_TILEINFO* pTileInfo, ///< [in] 2D tile parameters. Client must provide all data
+ UINT_32* pBankSwizzle, ///< [out] bank swizzle
+ UINT_32* pPipeSwizzle ///< [out] pipe swizzle
+ ) const
+{
+ UINT_32 bankSwizzle = 0;
+ UINT_32 pipeSwizzle = 0;
+
+ if (base256b != 0)
+ {
+ UINT_32 numPipes = HwlGetPipes(pTileInfo);
+ UINT_32 bankBits = QLog2(pTileInfo->banks);
+ UINT_32 pipeBits = QLog2(numPipes);
+ UINT_32 groupBytes = m_pipeInterleaveBytes;
+ UINT_32 bankInterleave = m_bankInterleave;
+
+ pipeSwizzle =
+ (base256b / (groupBytes >> 8)) & ((1<> 8) / numPipes / bankInterleave) & ((1 << bankBits) - 1);
+ }
+
+ *pPipeSwizzle = pipeSwizzle;
+ *pBankSwizzle = bankSwizzle;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::GetBankPipeSwizzle
+* @brief
+* Combine bank/pipe swizzle
+* @return
+* Base256b bits (only filled bank/pipe bits)
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::GetBankPipeSwizzle(
+ UINT_32 bankSwizzle, ///< [in] bank swizzle
+ UINT_32 pipeSwizzle, ///< [in] pipe swizzle
+ UINT_64 baseAddr, ///< [in] base address
+ ADDR_TILEINFO* pTileInfo ///< [in] tile info
+ ) const
+{
+ UINT_32 pipeBits = QLog2(HwlGetPipes(pTileInfo));
+ UINT_32 bankInterleaveBits = QLog2(m_bankInterleave);
+ UINT_32 tileSwizzle = pipeSwizzle + ((bankSwizzle << bankInterleaveBits) << pipeBits);
+
+ baseAddr ^= tileSwizzle * m_pipeInterleaveBytes;
+ baseAddr >>= 8;
+
+ return static_cast(baseAddr);
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeSliceTileSwizzle
+* @brief
+* Compute cubemap/3d texture faces/slices tile swizzle
+* @return
+* Tile swizzle
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeSliceTileSwizzle(
+ AddrTileMode tileMode, ///< [in] Tile mode
+ UINT_32 baseSwizzle, ///< [in] Base swizzle
+ UINT_32 slice, ///< [in] Slice index, Cubemap face index, 0 means +X
+ UINT_64 baseAddr, ///< [in] Base address
+ ADDR_TILEINFO* pTileInfo ///< [in] Bank structure
+ ) const
+{
+ UINT_32 tileSwizzle = 0;
+
+ if (IsMacroTiled(tileMode)) // Swizzle only for macro tile mode
+ {
+ UINT_32 firstSlice = slice / ComputeSurfaceThickness(tileMode);
+
+ UINT_32 numPipes = HwlGetPipes(pTileInfo);
+ UINT_32 numBanks = pTileInfo->banks;
+
+ UINT_32 pipeRotation;
+ UINT_32 bankRotation;
+
+ UINT_32 bankSwizzle = 0;
+ UINT_32 pipeSwizzle = 0;
+
+ pipeRotation = ComputePipeRotation(tileMode, numPipes);
+ bankRotation = ComputeBankRotation(tileMode, numBanks, numPipes);
+
+ if (baseSwizzle != 0)
+ {
+ ExtractBankPipeSwizzle(baseSwizzle,
+ pTileInfo,
+ &bankSwizzle,
+ &pipeSwizzle);
+ }
+
+ if (pipeRotation == 0) //2D mode
+ {
+ bankSwizzle += firstSlice * bankRotation;
+ bankSwizzle %= numBanks;
+ }
+ else //3D mode
+ {
+ pipeSwizzle += firstSlice * pipeRotation;
+ pipeSwizzle %= numPipes;
+ bankSwizzle += firstSlice * bankRotation / numPipes;
+ bankSwizzle %= numBanks;
+ }
+
+ tileSwizzle = GetBankPipeSwizzle(bankSwizzle,
+ pipeSwizzle,
+ baseAddr,
+ pTileInfo);
+ }
+
+ return tileSwizzle;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeQbStereoRightSwizzle
+*
+* @brief
+* Compute right eye swizzle
+* @return
+* swizzle
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::HwlComputeQbStereoRightSwizzle(
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo ///< [in] Surface info, must be valid
+ ) const
+{
+ UINT_32 bankBits = 0;
+ UINT_32 swizzle = 0;
+
+ // The assumption is default swizzle for left eye is 0
+ if (IsMacroTiled(pInfo->tileMode) && pInfo->pStereoInfo && pInfo->pTileInfo)
+ {
+ bankBits = ComputeBankFromCoord(0, pInfo->height, 0,
+ pInfo->tileMode, 0, 0, pInfo->pTileInfo);
+
+ if (bankBits)
+ {
+ HwlCombineBankPipeSwizzle(bankBits, 0, pInfo->pTileInfo, 0, &swizzle);
+ }
+ }
+
+ return swizzle;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeBankFromCoord
+*
+* @brief
+* Compute bank number from coordinates
+* @return
+* Bank number
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeBankFromCoord(
+ UINT_32 x, ///< [in] x coordinate
+ UINT_32 y, ///< [in] y coordinate
+ UINT_32 slice, ///< [in] slice index
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 bankSwizzle, ///< [in] bank swizzle
+ UINT_32 tileSplitSlice, ///< [in] If the size of the pixel offset is larger than the
+ /// tile split size, then the pixel will be moved to a separate
+ /// slice. This value equals pixelOffset / tileSplitBytes
+ /// in this case. Otherwise this is 0.
+ ADDR_TILEINFO* pTileInfo ///< [in] tile info
+ ) const
+{
+ UINT_32 pipes = HwlGetPipes(pTileInfo);
+ UINT_32 bankBit0 = 0;
+ UINT_32 bankBit1 = 0;
+ UINT_32 bankBit2 = 0;
+ UINT_32 bankBit3 = 0;
+ UINT_32 sliceRotation;
+ UINT_32 tileSplitRotation;
+ UINT_32 bank;
+ UINT_32 numBanks = pTileInfo->banks;
+ UINT_32 bankWidth = pTileInfo->bankWidth;
+ UINT_32 bankHeight = pTileInfo->bankHeight;
+
+ UINT_32 tx = x / MicroTileWidth / (bankWidth * pipes);
+ UINT_32 ty = y / MicroTileHeight / bankHeight;
+
+ UINT_32 x3 = _BIT(tx,0);
+ UINT_32 x4 = _BIT(tx,1);
+ UINT_32 x5 = _BIT(tx,2);
+ UINT_32 x6 = _BIT(tx,3);
+ UINT_32 y3 = _BIT(ty,0);
+ UINT_32 y4 = _BIT(ty,1);
+ UINT_32 y5 = _BIT(ty,2);
+ UINT_32 y6 = _BIT(ty,3);
+
+ switch (numBanks)
+ {
+ case 16:
+ bankBit0 = x3 ^ y6;
+ bankBit1 = x4 ^ y5 ^ y6;
+ bankBit2 = x5 ^ y4;
+ bankBit3 = x6 ^ y3;
+ break;
+ case 8:
+ bankBit0 = x3 ^ y5;
+ bankBit1 = x4 ^ y4 ^ y5;
+ bankBit2 = x5 ^ y3;
+ break;
+ case 4:
+ bankBit0 = x3 ^ y4;
+ bankBit1 = x4 ^ y3;
+ break;
+ case 2:
+ bankBit0 = x3 ^ y3;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+
+ bank = bankBit0 | (bankBit1 << 1) | (bankBit2 << 2) | (bankBit3 << 3);
+
+ //Bits2Number(4, bankBit3, bankBit2, bankBit1, bankBit0);
+
+ bank = HwlPreAdjustBank((x / MicroTileWidth), bank, pTileInfo);
+ //
+ // Compute bank rotation for the slice.
+ //
+ UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+ switch (tileMode)
+ {
+ case ADDR_TM_2D_TILED_THIN1: // fall through
+ case ADDR_TM_2D_TILED_THICK: // fall through
+ case ADDR_TM_2D_TILED_XTHICK:
+ sliceRotation = ((numBanks / 2) - 1) * (slice / microTileThickness);
+ break;
+ case ADDR_TM_3D_TILED_THIN1: // fall through
+ case ADDR_TM_3D_TILED_THICK: // fall through
+ case ADDR_TM_3D_TILED_XTHICK:
+ sliceRotation =
+ Max(1u, (pipes / 2) - 1) * (slice / microTileThickness) / pipes;
+ break;
+ default:
+ sliceRotation = 0;
+ break;
+ }
+
+
+ //
+ // Compute bank rotation for the tile split slice.
+ //
+ // The sample slice will be non-zero if samples must be split across multiple slices.
+ // This situation arises when the micro tile size multiplied yBit the number of samples exceeds
+ // the split size (set in GB_ADDR_CONFIG).
+ //
+ switch (tileMode)
+ {
+ case ADDR_TM_2D_TILED_THIN1: //fall through
+ case ADDR_TM_3D_TILED_THIN1: //fall through
+ case ADDR_TM_PRT_2D_TILED_THIN1: //fall through
+ case ADDR_TM_PRT_3D_TILED_THIN1: //fall through
+ tileSplitRotation = ((numBanks / 2) + 1) * tileSplitSlice;
+ break;
+ default:
+ tileSplitRotation = 0;
+ break;
+ }
+
+ //
+ // Apply bank rotation for the slice and tile split slice.
+ //
+ bank ^= bankSwizzle + sliceRotation;
+ bank ^= tileSplitRotation;
+
+ bank &= (numBanks - 1);
+
+ return bank;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeBankFromAddr
+*
+* @brief
+* Compute the bank number from an address
+* @return
+* Bank number
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeBankFromAddr(
+ UINT_64 addr, ///< [in] address
+ UINT_32 numBanks, ///< [in] number of banks
+ UINT_32 numPipes ///< [in] number of pipes
+ ) const
+{
+ UINT_32 bank;
+
+ //
+ // The LSBs of the address are arranged as follows:
+ // bank | bankInterleave | pipe | pipeInterleave
+ //
+ // To get the bank number, shift off the pipe interleave, pipe, and bank interlave bits and
+ // mask the bank bits.
+ //
+ bank = static_cast(
+ (addr >> Log2(m_pipeInterleaveBytes * numPipes * m_bankInterleave)) &
+ (numBanks - 1)
+ );
+
+ return bank;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputePipeRotation
+*
+* @brief
+* Compute pipe rotation value
+* @return
+* Pipe rotation
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputePipeRotation(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 numPipes ///< [in] number of pipes
+ ) const
+{
+ UINT_32 rotation;
+
+ switch (tileMode)
+ {
+ case ADDR_TM_3D_TILED_THIN1: //fall through
+ case ADDR_TM_3D_TILED_THICK: //fall through
+ case ADDR_TM_3D_TILED_XTHICK: //fall through
+ case ADDR_TM_PRT_3D_TILED_THIN1: //fall through
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1);
+ break;
+ default:
+ rotation = 0;
+ }
+
+ return rotation;
+}
+
+
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeBankRotation
+*
+* @brief
+* Compute bank rotation value
+* @return
+* Bank rotation
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeBankRotation(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 numBanks, ///< [in] number of banks
+ UINT_32 numPipes ///< [in] number of pipes
+ ) const
+{
+ UINT_32 rotation;
+
+ switch (tileMode)
+ {
+ case ADDR_TM_2D_TILED_THIN1: // fall through
+ case ADDR_TM_2D_TILED_THICK: // fall through
+ case ADDR_TM_2D_TILED_XTHICK:
+ case ADDR_TM_PRT_2D_TILED_THIN1:
+ case ADDR_TM_PRT_2D_TILED_THICK:
+ // Rotate banks per Z-slice yBit 1 for 4-bank or 3 for 8-bank
+ rotation = numBanks / 2 - 1;
+ break;
+ case ADDR_TM_3D_TILED_THIN1: // fall through
+ case ADDR_TM_3D_TILED_THICK: // fall through
+ case ADDR_TM_3D_TILED_XTHICK:
+ case ADDR_TM_PRT_3D_TILED_THIN1:
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ rotation = (numPipes < 4) ? 1 : (numPipes / 2 - 1); // rotate pipes & banks
+ break;
+ default:
+ rotation = 0;
+ }
+
+ return rotation;
+}
+
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeHtileBytes
+*
+* @brief
+* Compute htile size in bytes
+*
+* @return
+* Htile size in bytes
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeHtileBytes(
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 bpp, ///< [in] bits per pixel
+ BOOL_32 isLinear, ///< [in] if it is linear mode
+ UINT_32 numSlices, ///< [in] number of slices
+ UINT_64* sliceBytes, ///< [out] bytes per slice
+ UINT_32 baseAlign ///< [in] base alignments
+ ) const
+{
+ UINT_64 surfBytes;
+
+ const UINT_64 HtileCacheLineSize = BITS_TO_BYTES(HtileCacheBits);
+
+ *sliceBytes = BITS_TO_BYTES(static_cast(pitch) * height * bpp / 64);
+
+ if (m_configFlags.useHtileSliceAlign)
+ {
+ // Align the sliceSize to htilecachelinesize * pipes at first
+ *sliceBytes = PowTwoAlign(*sliceBytes, HtileCacheLineSize * m_pipes);
+ surfBytes = *sliceBytes * numSlices;
+ }
+ else
+ {
+ // Align the surfSize to htilecachelinesize * pipes at last
+ surfBytes = *sliceBytes * numSlices;
+ surfBytes = PowTwoAlign(surfBytes, HtileCacheLineSize * m_pipes);
+ }
+
+ return surfBytes;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::DispatchComputeFmaskInfo
+*
+* @brief
+* Compute fmask sizes include padded pitch, height, slices, total size in bytes,
+* meanwhile output suitable tile mode and alignments as well. Results are returned
+* through output parameters.
+*
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::DispatchComputeFmaskInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut) ///< [out] output structure
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ ADDR_COMPUTE_SURFACE_INFO_INPUT surfIn = {0};
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT surfOut = {0};
+
+ // Setup input structure
+ surfIn.tileMode = pIn->tileMode;
+ surfIn.width = pIn->pitch;
+ surfIn.height = pIn->height;
+ surfIn.numSlices = pIn->numSlices;
+ surfIn.pTileInfo = pIn->pTileInfo;
+ surfIn.tileType = ADDR_NON_DISPLAYABLE;
+ surfIn.flags.fmask = 1;
+
+ // Setup output structure
+ surfOut.pTileInfo = pOut->pTileInfo;
+
+ // Setup hwl specific fields
+ HwlFmaskPreThunkSurfInfo(pIn, pOut, &surfIn, &surfOut);
+
+ surfIn.bpp = HwlComputeFmaskBits(pIn, &surfIn.numSamples);
+
+ // ComputeSurfaceInfo needs numSamples in surfOut as surface routines need adjusted numSamples
+ surfOut.numSamples = surfIn.numSamples;
+
+ retCode = HwlComputeSurfaceInfo(&surfIn, &surfOut);
+
+ // Save bpp field for surface dump support
+ surfOut.bpp = surfIn.bpp;
+
+ if (retCode == ADDR_OK)
+ {
+ pOut->bpp = surfOut.bpp;
+ pOut->pitch = surfOut.pitch;
+ pOut->height = surfOut.height;
+ pOut->numSlices = surfOut.depth;
+ pOut->fmaskBytes = surfOut.surfSize;
+ pOut->baseAlign = surfOut.baseAlign;
+ pOut->pitchAlign = surfOut.pitchAlign;
+ pOut->heightAlign = surfOut.heightAlign;
+
+ if (surfOut.depth > 1)
+ {
+ // For fmask, expNumSlices is stored in depth.
+ pOut->sliceSize = surfOut.surfSize / surfOut.depth;
+ }
+ else
+ {
+ pOut->sliceSize = surfOut.surfSize;
+ }
+
+ // Save numSamples field for surface dump support
+ pOut->numSamples = surfOut.numSamples;
+
+ HwlFmaskPostThunkSurfInfo(&surfOut, pOut);
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlFmaskSurfaceInfo
+* @brief
+* Entry of EgBasedAddrLib ComputeFmaskInfo
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeFmaskInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut ///< [out] output structure
+ )
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ ADDR_TILEINFO tileInfo = {0};
+
+ // Use internal tile info if pOut does not have a valid pTileInfo
+ if (pOut->pTileInfo == NULL)
+ {
+ pOut->pTileInfo = &tileInfo;
+ }
+
+ retCode = DispatchComputeFmaskInfo(pIn, pOut);
+
+ if (retCode == ADDR_OK)
+ {
+ pOut->tileIndex =
+ HwlPostCheckTileIndex(pOut->pTileInfo, pIn->tileMode, ADDR_NON_DISPLAYABLE,
+ pOut->tileIndex);
+ }
+
+ // Resets pTileInfo to NULL if the internal tile info is used
+ if (pOut->pTileInfo == &tileInfo)
+ {
+ pOut->pTileInfo = NULL;
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeFmaskAddrFromCoord
+* @brief
+* Entry of EgBasedAddrLib ComputeFmaskAddrFromCoord
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeFmaskAddrFromCoord(
+ const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+#if ADDR_AM_BUILD
+ if ((pIn->x > pIn->pitch) ||
+ (pIn->y > pIn->height) ||
+ (pIn->numSamples > m_maxSamples) ||
+ (pIn->sample >= m_maxSamples))
+ {
+ retCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ pOut->addr = DispatchComputeFmaskAddrFromCoord(pIn, pOut);
+ }
+#endif
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeFmaskCoordFromAddr
+* @brief
+* Entry of EgBasedAddrLib ComputeFmaskCoordFromAddr
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeFmaskCoordFromAddr(
+ const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+#if ADDR_AM_BUILD
+ if ((pIn->bitPosition >= 8) ||
+ (pIn->numSamples > m_maxSamples))
+ {
+ retCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ DispatchComputeFmaskCoordFromAddr(pIn, pOut);
+ }
+#endif
+
+ return retCode;
+}
+
+#if ADDR_AM_BUILD
+/**
+***************************************************************************************************
+* EgBasedAddrLib::DispatchComputeFmaskAddrFromCoord
+*
+* @brief
+* Computes the FMASK address and bit position from a coordinate.
+* @return
+* The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::DispatchComputeFmaskAddrFromCoord(
+ const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ UINT_32 x = pIn->x;
+ UINT_32 y = pIn->y;
+ UINT_32 slice = pIn->slice;
+ UINT_32 sample = pIn->sample;
+ UINT_32 plane = pIn->plane;
+ UINT_32 pitch = pIn->pitch;
+ UINT_32 height = pIn->height;
+ UINT_32 numSamples = pIn->numSamples;
+ AddrTileMode tileMode = pIn->tileMode;
+ BOOL_32 ignoreSE = pIn->ignoreSE;
+ ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
+ BOOL_32 resolved = pIn->resolved;
+
+ UINT_32* pBitPosition = &pOut->bitPosition;
+ UINT_64 addr = 0;
+
+ ADDR_ASSERT(numSamples > 1);
+ ADDR_ASSERT(ComputeSurfaceThickness(tileMode) == 1);
+
+ switch (tileMode)
+ {
+ case ADDR_TM_1D_TILED_THIN1:
+ addr = ComputeFmaskAddrFromCoordMicroTiled(x,
+ y,
+ slice,
+ sample,
+ plane,
+ pitch,
+ height,
+ numSamples,
+ tileMode,
+ resolved,
+ pBitPosition);
+ break;
+ case ADDR_TM_2D_TILED_THIN1: //fall through
+ case ADDR_TM_3D_TILED_THIN1:
+ UINT_32 pipeSwizzle;
+ UINT_32 bankSwizzle;
+
+ if (m_configFlags.useCombinedSwizzle)
+ {
+ ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+ &bankSwizzle, &pipeSwizzle);
+ }
+ else
+ {
+ pipeSwizzle = pIn->pipeSwizzle;
+ bankSwizzle = pIn->bankSwizzle;
+ }
+
+ addr = ComputeFmaskAddrFromCoordMacroTiled(x,
+ y,
+ slice,
+ sample,
+ plane,
+ pitch,
+ height,
+ numSamples,
+ tileMode,
+ pipeSwizzle,
+ bankSwizzle,
+ ignoreSE,
+ pTileInfo,
+ resolved,
+ pBitPosition);
+ break;
+ default:
+ *pBitPosition = 0;
+ break;
+ }
+
+ return addr;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeFmaskAddrFromCoordMicroTiled
+*
+* @brief
+* Computes the FMASK address and bit position from a coordinate for 1D tilied (micro
+* tiled)
+* @return
+* The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeFmaskAddrFromCoordMicroTiled(
+ UINT_32 x, ///< [in] x coordinate
+ UINT_32 y, ///< [in] y coordinate
+ UINT_32 slice, ///< [in] slice index
+ UINT_32 sample, ///< [in] sample number
+ UINT_32 plane, ///< [in] plane number
+ UINT_32 pitch, ///< [in] surface pitch in pixels
+ UINT_32 height, ///< [in] surface height in pixels
+ UINT_32 numSamples, ///< [in] number of samples
+ AddrTileMode tileMode, ///< [in] tile mode
+ BOOL_32 resolved, ///< [in] TRUE if this is for resolved fmask
+ UINT_32* pBitPosition ///< [out] pointer to returned bit position
+ ) const
+{
+ UINT_64 addr = 0;
+ UINT_32 effectiveBpp;
+ UINT_32 effectiveSamples;
+
+ //
+ // 2xAA use the same layout as 4xAA
+ //
+ if (numSamples == 2)
+ {
+ numSamples = 4;
+ }
+
+ //
+ // Compute the number of planes.
+ //
+ if (!resolved)
+ {
+ effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);;
+ effectiveBpp = numSamples;
+
+ //
+ // Compute the address just like a color surface with numSamples bits per element and
+ // numPlanes samples.
+ //
+ addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
+ y,
+ slice,
+ plane, // sample
+ effectiveBpp,
+ pitch,
+ height,
+ effectiveSamples,
+ tileMode,
+ ADDR_NON_DISPLAYABLE,
+ FALSE,
+ pBitPosition);
+
+ //
+ // Compute the real bit position. Each (sample, plane) is stored with one bit per sample.
+ //
+
+ //
+ // Compute the pixel index with in the micro tile
+ //
+ UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x % 8,
+ y % 8,
+ slice,
+ 1,
+ tileMode,
+ ADDR_NON_DISPLAYABLE);
+
+ *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1);
+
+ UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition;
+
+ addr = bitAddr / 8;
+ }
+ else
+ {
+ effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+ effectiveSamples = 1;
+
+ //
+ // Compute the address just like a color surface with numSamples bits per element and
+ // numPlanes samples.
+ //
+ addr = ComputeSurfaceAddrFromCoordMicroTiled(x,
+ y,
+ slice,
+ sample,
+ effectiveBpp,
+ pitch,
+ height,
+ effectiveSamples,
+ tileMode,
+ ADDR_NON_DISPLAYABLE,
+ TRUE,
+ pBitPosition);
+ }
+
+ return addr;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeFmaskAddrFromCoordMacroTiled
+*
+* @brief
+* Computes the FMASK address and bit position from a coordinate for 2D tilied (macro
+* tiled)
+* @return
+* The byte address
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::ComputeFmaskAddrFromCoordMacroTiled(
+ UINT_32 x, ///< [in] x coordinate
+ UINT_32 y, ///< [in] y coordinate
+ UINT_32 slice, ///< [in] slice index
+ UINT_32 sample, ///< [in] sample number
+ UINT_32 plane, ///< [in] plane number
+ UINT_32 pitch, ///< [in] surface pitch in pixels
+ UINT_32 height, ///< [in] surface height in pixels
+ UINT_32 numSamples, ///< [in] number of samples
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 pipeSwizzle, ///< [in] pipe swizzle
+ UINT_32 bankSwizzle, ///< [in] bank swizzle
+ BOOL_32 ignoreSE, ///< [in] TRUE if ignore shader engine
+ ADDR_TILEINFO* pTileInfo, ///< [in] bank structure.**All fields to be valid on entry**
+ BOOL_32 resolved, ///< [in] TRUE if this is for resolved fmask
+ UINT_32* pBitPosition ///< [out] pointer to returned bit position
+ ) const
+{
+ UINT_64 addr = 0;
+ UINT_32 effectiveBpp;
+ UINT_32 effectiveSamples;
+
+ //
+ // 2xAA use the same layout as 4xAA
+ //
+ if (numSamples == 2)
+ {
+ numSamples = 4;
+ }
+
+ //
+ // Compute the number of planes.
+ //
+ if (!resolved)
+ {
+ effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
+ effectiveBpp = numSamples;
+
+ //
+ // Compute the address just like a color surface with numSamples bits per element and
+ // numPlanes samples.
+ //
+ addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
+ y,
+ slice,
+ plane, // sample
+ effectiveBpp,
+ pitch,
+ height,
+ effectiveSamples,
+ tileMode,
+ ADDR_NON_DISPLAYABLE,// isdisp
+ ignoreSE,// ignore_shader
+ FALSE,// depth_sample_order
+ pipeSwizzle,
+ bankSwizzle,
+ pTileInfo,
+ pBitPosition);
+
+ //
+ // Compute the real bit position. Each (sample, plane) is stored with one bit per sample.
+ //
+
+
+ //
+ // Compute the pixel index with in the micro tile
+ //
+ UINT_32 pixelIndex = ComputePixelIndexWithinMicroTile(x ,
+ y ,
+ slice,
+ effectiveBpp,
+ tileMode,
+ ADDR_NON_DISPLAYABLE);
+
+ *pBitPosition = ((pixelIndex * numSamples) + sample) & (BITS_PER_BYTE-1);
+
+ UINT_64 bitAddr = BYTES_TO_BITS(addr) + *pBitPosition;
+
+ addr = bitAddr / 8;
+
+ }
+ else
+ {
+ effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+ effectiveSamples = 1;
+
+ //
+ // Compute the address just like a color surface with numSamples bits per element and
+ // numPlanes samples.
+ //
+ addr = ComputeSurfaceAddrFromCoordMacroTiled(x,
+ y,
+ slice,
+ sample,
+ effectiveBpp,
+ pitch,
+ height,
+ effectiveSamples,
+ tileMode,
+ ADDR_NON_DISPLAYABLE,
+ ignoreSE,
+ TRUE,
+ pipeSwizzle,
+ bankSwizzle,
+ pTileInfo,
+ pBitPosition);
+ }
+
+ return addr;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeFmaskCoordFromAddrMicroTiled
+*
+* @brief
+* Compute (x,y,slice,sample,plane) coordinates from fmask address
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ComputeFmaskCoordFromAddrMicroTiled(
+ UINT_64 addr, ///< [in] byte address
+ UINT_32 bitPosition,///< [in] bit position
+ UINT_32 pitch, ///< [in] pitch in pixels
+ UINT_32 height, ///< [in] height in pixels
+ UINT_32 numSamples, ///< [in] number of samples (of color buffer)
+ AddrTileMode tileMode, ///< [in] tile mode
+ BOOL_32 resolved, ///< [in] TRUE if it is resolved fmask
+ UINT_32* pX, ///< [out] X coord
+ UINT_32* pY, ///< [out] Y coord
+ UINT_32* pSlice, ///< [out] slice index
+ UINT_32* pSample, ///< [out] sample index
+ UINT_32* pPlane ///< [out] plane index
+ ) const
+{
+ UINT_32 effectiveBpp;
+ UINT_32 effectiveSamples;
+
+ // 2xAA use the same layout as 4xAA
+ if (numSamples == 2)
+ {
+ numSamples = 4;
+ }
+
+ if (!resolved)
+ {
+ effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
+ effectiveBpp = numSamples;
+
+ ComputeSurfaceCoordFromAddrMicroTiled(addr,
+ bitPosition,
+ effectiveBpp,
+ pitch,
+ height,
+ effectiveSamples,
+ tileMode,
+ 0, // tileBase
+ 0, // compBits
+ pX,
+ pY,
+ pSlice,
+ pPlane,
+ ADDR_NON_DISPLAYABLE, // microTileType
+ FALSE // isDepthSampleOrder
+ );
+
+
+ if ( pSample )
+ {
+ *pSample = bitPosition % numSamples;
+ }
+ }
+ else
+ {
+ effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+ effectiveSamples = 1;
+
+ ComputeSurfaceCoordFromAddrMicroTiled(addr,
+ bitPosition,
+ effectiveBpp,
+ pitch,
+ height,
+ effectiveSamples,
+ tileMode,
+ 0, // tileBase
+ 0, // compBits
+ pX,
+ pY,
+ pSlice,
+ pSample,
+ ADDR_NON_DISPLAYABLE, // microTileType
+ TRUE // isDepthSampleOrder
+ );
+ }
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeFmaskCoordFromAddrMacroTiled
+*
+* @brief
+* Compute (x,y,slice,sample,plane) coordinates from
+* fmask address
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::ComputeFmaskCoordFromAddrMacroTiled(
+ UINT_64 addr, ///< [in] byte address
+ UINT_32 bitPosition,///< [in] bit position
+ UINT_32 pitch, ///< [in] pitch in pixels
+ UINT_32 height, ///< [in] height in pixels
+ UINT_32 numSamples, ///< [in] number of samples (of color buffer)
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 pipeSwizzle,///< [in] pipe swizzle
+ UINT_32 bankSwizzle,///< [in] bank swizzle
+ BOOL_32 ignoreSE, ///< [in] TRUE if ignore shader engine
+ ADDR_TILEINFO* pTileInfo, ///< [in] bank structure. **All fields to be valid on entry**
+ BOOL_32 resolved, ///< [in] TRUE if it is resolved fmask
+ UINT_32* pX, ///< [out] X coord
+ UINT_32* pY, ///< [out] Y coord
+ UINT_32* pSlice, ///< [out] slice index
+ UINT_32* pSample, ///< [out] sample index
+ UINT_32* pPlane ///< [out] plane index
+ ) const
+{
+ UINT_32 effectiveBpp;
+ UINT_32 effectiveSamples;
+
+ // 2xAA use the same layout as 4xAA
+ if (numSamples == 2)
+ {
+ numSamples = 4;
+ }
+
+ //
+ // Compute the number of planes.
+ //
+ if (!resolved)
+ {
+ effectiveSamples = ComputeFmaskNumPlanesFromNumSamples(numSamples);
+ effectiveBpp = numSamples;
+
+ ComputeSurfaceCoordFromAddrMacroTiled(addr,
+ bitPosition,
+ effectiveBpp,
+ pitch,
+ height,
+ effectiveSamples,
+ tileMode,
+ 0, // No tileBase
+ 0, // No compBits
+ ADDR_NON_DISPLAYABLE,
+ ignoreSE,
+ FALSE,
+ pipeSwizzle,
+ bankSwizzle,
+ pTileInfo,
+ pX,
+ pY,
+ pSlice,
+ pPlane);
+
+ if (pSample)
+ {
+ *pSample = bitPosition % numSamples;
+ }
+ }
+ else
+ {
+ effectiveBpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+ effectiveSamples = 1;
+
+ ComputeSurfaceCoordFromAddrMacroTiled(addr,
+ bitPosition,
+ effectiveBpp,
+ pitch,
+ height,
+ effectiveSamples,
+ tileMode,
+ 0, // No tileBase
+ 0, // No compBits
+ ADDR_NON_DISPLAYABLE,
+ ignoreSE,
+ TRUE,
+ pipeSwizzle,
+ bankSwizzle,
+ pTileInfo,
+ pX,
+ pY,
+ pSlice,
+ pSample);
+ }
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::DispatchComputeFmaskCoordFromAddr
+*
+* @brief
+* Compute (x,y,slice,sample,plane) coordinates from
+* fmask address
+* @return
+* N/A
+*
+***************************************************************************************************
+*/
+VOID EgBasedAddrLib::DispatchComputeFmaskCoordFromAddr(
+ const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ UINT_64 addr = pIn->addr;
+ UINT_32 bitPosition = pIn->bitPosition;
+ UINT_32 pitch = pIn->pitch;
+ UINT_32 height = pIn->height;
+ UINT_32 numSamples = pIn->numSamples;
+ AddrTileMode tileMode = pIn->tileMode;
+ BOOL_32 ignoreSE = pIn->ignoreSE;
+ ADDR_TILEINFO* pTileInfo = pIn->pTileInfo;
+ BOOL_32 resolved = pIn->resolved;
+
+ UINT_32* pX = &pOut->x;
+ UINT_32* pY = &pOut->y;
+ UINT_32* pSlice = &pOut->slice;
+ UINT_32* pSample = &pOut->sample;
+ UINT_32* pPlane = &pOut->plane;
+
+ switch (tileMode)
+ {
+ case ADDR_TM_1D_TILED_THIN1:
+ ComputeFmaskCoordFromAddrMicroTiled(addr,
+ bitPosition,
+ pitch,
+ height,
+ numSamples,
+ tileMode,
+ resolved,
+ pX,
+ pY,
+ pSlice,
+ pSample,
+ pPlane);
+ break;
+ case ADDR_TM_2D_TILED_THIN1://fall through
+ case ADDR_TM_3D_TILED_THIN1:
+ UINT_32 pipeSwizzle;
+ UINT_32 bankSwizzle;
+
+ if (m_configFlags.useCombinedSwizzle)
+ {
+ ExtractBankPipeSwizzle(pIn->tileSwizzle, pIn->pTileInfo,
+ &bankSwizzle, &pipeSwizzle);
+ }
+ else
+ {
+ pipeSwizzle = pIn->pipeSwizzle;
+ bankSwizzle = pIn->bankSwizzle;
+ }
+
+ ComputeFmaskCoordFromAddrMacroTiled(addr,
+ bitPosition,
+ pitch,
+ height,
+ numSamples,
+ tileMode,
+ pipeSwizzle,
+ bankSwizzle,
+ ignoreSE,
+ pTileInfo,
+ resolved,
+ pX,
+ pY,
+ pSlice,
+ pSample,
+ pPlane);
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+
+ }
+}
+#endif
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeFmaskNumPlanesFromNumSamples
+*
+* @brief
+* Compute fmask number of planes from number of samples
+*
+* @return
+* Number of planes
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeFmaskNumPlanesFromNumSamples(
+ UINT_32 numSamples) ///< [in] number of samples
+{
+ UINT_32 numPlanes;
+
+ //
+ // FMASK is stored such that each micro tile is composed of elements containing N bits, where
+ // N is the number of samples. There is a micro tile for each bit in the FMASK address, and
+ // micro tiles for each address bit, sometimes referred to as a plane, are stored sequentially.
+ // The FMASK for a 2-sample surface looks like a general surface with 2 bits per element.
+ // The FMASK for a 4-sample surface looks like a general surface with 4 bits per element and
+ // 2 samples. The FMASK for an 8-sample surface looks like a general surface with 8 bits per
+ // element and 4 samples. R6xx and R7xx only stored 3 planes for 8-sample FMASK surfaces.
+ // This was changed for R8xx to simplify the logic in the CB.
+ //
+ switch (numSamples)
+ {
+ case 2:
+ numPlanes = 1;
+ break;
+ case 4:
+ numPlanes = 2;
+ break;
+ case 8:
+ numPlanes = 4;
+ break;
+ default:
+ ADDR_UNHANDLED_CASE();
+ numPlanes = 0;
+ break;
+ }
+ return numPlanes;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::ComputeFmaskResolvedBppFromNumSamples
+*
+* @brief
+* Compute resolved fmask effective bpp based on number of samples
+*
+* @return
+* bpp
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::ComputeFmaskResolvedBppFromNumSamples(
+ UINT_32 numSamples) ///< number of samples
+{
+ UINT_32 bpp;
+
+ //
+ // Resolved FMASK surfaces are generated yBit the CB and read yBit the texture unit
+ // so that the texture unit can read compressed multi-sample color data.
+ // These surfaces store each index value packed per element.
+ // Each element contains at least num_samples * log2(num_samples) bits.
+ // Resolved FMASK surfaces are addressed as follows:
+ // 2-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
+ // 4-sample Addressed similarly to a color surface with 8 bits per element and 1 sample.
+ // 8-sample Addressed similarly to a color surface with 32 bits per element and 1 sample.
+
+ switch (numSamples)
+ {
+ case 2:
+ bpp = 8;
+ break;
+ case 4:
+ bpp = 8;
+ break;
+ case 8:
+ bpp = 32;
+ break;
+ default:
+ ADDR_UNHANDLED_CASE();
+ bpp = 0;
+ break;
+ }
+ return bpp;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::IsTileInfoAllZero
+*
+* @brief
+* Return TRUE if all field are zero
+* @note
+* Since NULL input is consider to be all zero
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::IsTileInfoAllZero(
+ ADDR_TILEINFO* pTileInfo)
+{
+ BOOL_32 allZero = TRUE;
+
+ if (pTileInfo)
+ {
+ if ((pTileInfo->banks != 0) ||
+ (pTileInfo->bankWidth != 0) ||
+ (pTileInfo->bankHeight != 0) ||
+ (pTileInfo->macroAspectRatio != 0) ||
+ (pTileInfo->tileSplitBytes != 0) ||
+ (pTileInfo->pipeConfig != 0)
+ )
+ {
+ allZero = FALSE;
+ }
+ }
+
+ return allZero;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlTileInfoEqual
+*
+* @brief
+* Return TRUE if all field are equal
+* @note
+* Only takes care of current HWL's data
+***************************************************************************************************
+*/
+BOOL_32 EgBasedAddrLib::HwlTileInfoEqual(
+ const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
+ const ADDR_TILEINFO* pRight ///<[in] Right compare operand
+ ) const
+{
+ BOOL_32 equal = FALSE;
+
+ if (pLeft->banks == pRight->banks &&
+ pLeft->bankWidth == pRight->bankWidth &&
+ pLeft->bankHeight == pRight->bankHeight &&
+ pLeft->macroAspectRatio == pRight->macroAspectRatio &&
+ pLeft->tileSplitBytes == pRight->tileSplitBytes)
+ {
+ equal = TRUE;
+ }
+
+ return equal;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlConvertTileInfoToHW
+* @brief
+* Entry of EgBasedAddrLib ConvertTileInfoToHW
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlConvertTileInfoToHW(
+ const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ ADDR_TILEINFO *pTileInfoIn = pIn->pTileInfo;
+ ADDR_TILEINFO *pTileInfoOut = pOut->pTileInfo;
+
+ if ((pTileInfoIn != NULL) && (pTileInfoOut != NULL))
+ {
+ if (pIn->reverse == FALSE)
+ {
+ switch (pTileInfoIn->banks)
+ {
+ case 2:
+ pTileInfoOut->banks = 0;
+ break;
+ case 4:
+ pTileInfoOut->banks = 1;
+ break;
+ case 8:
+ pTileInfoOut->banks = 2;
+ break;
+ case 16:
+ pTileInfoOut->banks = 3;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->banks = 0;
+ break;
+ }
+
+ switch (pTileInfoIn->bankWidth)
+ {
+ case 1:
+ pTileInfoOut->bankWidth = 0;
+ break;
+ case 2:
+ pTileInfoOut->bankWidth = 1;
+ break;
+ case 4:
+ pTileInfoOut->bankWidth = 2;
+ break;
+ case 8:
+ pTileInfoOut->bankWidth = 3;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->bankWidth = 0;
+ break;
+ }
+
+ switch (pTileInfoIn->bankHeight)
+ {
+ case 1:
+ pTileInfoOut->bankHeight = 0;
+ break;
+ case 2:
+ pTileInfoOut->bankHeight = 1;
+ break;
+ case 4:
+ pTileInfoOut->bankHeight = 2;
+ break;
+ case 8:
+ pTileInfoOut->bankHeight = 3;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->bankHeight = 0;
+ break;
+ }
+
+ switch (pTileInfoIn->macroAspectRatio)
+ {
+ case 1:
+ pTileInfoOut->macroAspectRatio = 0;
+ break;
+ case 2:
+ pTileInfoOut->macroAspectRatio = 1;
+ break;
+ case 4:
+ pTileInfoOut->macroAspectRatio = 2;
+ break;
+ case 8:
+ pTileInfoOut->macroAspectRatio = 3;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->macroAspectRatio = 0;
+ break;
+ }
+
+ switch (pTileInfoIn->tileSplitBytes)
+ {
+ case 64:
+ pTileInfoOut->tileSplitBytes = 0;
+ break;
+ case 128:
+ pTileInfoOut->tileSplitBytes = 1;
+ break;
+ case 256:
+ pTileInfoOut->tileSplitBytes = 2;
+ break;
+ case 512:
+ pTileInfoOut->tileSplitBytes = 3;
+ break;
+ case 1024:
+ pTileInfoOut->tileSplitBytes = 4;
+ break;
+ case 2048:
+ pTileInfoOut->tileSplitBytes = 5;
+ break;
+ case 4096:
+ pTileInfoOut->tileSplitBytes = 6;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->tileSplitBytes = 0;
+ break;
+ }
+ }
+ else
+ {
+ switch (pTileInfoIn->banks)
+ {
+ case 0:
+ pTileInfoOut->banks = 2;
+ break;
+ case 1:
+ pTileInfoOut->banks = 4;
+ break;
+ case 2:
+ pTileInfoOut->banks = 8;
+ break;
+ case 3:
+ pTileInfoOut->banks = 16;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->banks = 2;
+ break;
+ }
+
+ switch (pTileInfoIn->bankWidth)
+ {
+ case 0:
+ pTileInfoOut->bankWidth = 1;
+ break;
+ case 1:
+ pTileInfoOut->bankWidth = 2;
+ break;
+ case 2:
+ pTileInfoOut->bankWidth = 4;
+ break;
+ case 3:
+ pTileInfoOut->bankWidth = 8;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->bankWidth = 1;
+ break;
+ }
+
+ switch (pTileInfoIn->bankHeight)
+ {
+ case 0:
+ pTileInfoOut->bankHeight = 1;
+ break;
+ case 1:
+ pTileInfoOut->bankHeight = 2;
+ break;
+ case 2:
+ pTileInfoOut->bankHeight = 4;
+ break;
+ case 3:
+ pTileInfoOut->bankHeight = 8;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->bankHeight = 1;
+ break;
+ }
+
+ switch (pTileInfoIn->macroAspectRatio)
+ {
+ case 0:
+ pTileInfoOut->macroAspectRatio = 1;
+ break;
+ case 1:
+ pTileInfoOut->macroAspectRatio = 2;
+ break;
+ case 2:
+ pTileInfoOut->macroAspectRatio = 4;
+ break;
+ case 3:
+ pTileInfoOut->macroAspectRatio = 8;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->macroAspectRatio = 1;
+ break;
+ }
+
+ switch (pTileInfoIn->tileSplitBytes)
+ {
+ case 0:
+ pTileInfoOut->tileSplitBytes = 64;
+ break;
+ case 1:
+ pTileInfoOut->tileSplitBytes = 128;
+ break;
+ case 2:
+ pTileInfoOut->tileSplitBytes = 256;
+ break;
+ case 3:
+ pTileInfoOut->tileSplitBytes = 512;
+ break;
+ case 4:
+ pTileInfoOut->tileSplitBytes = 1024;
+ break;
+ case 5:
+ pTileInfoOut->tileSplitBytes = 2048;
+ break;
+ case 6:
+ pTileInfoOut->tileSplitBytes = 4096;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ pTileInfoOut->tileSplitBytes = 64;
+ break;
+ }
+ }
+
+ if (pTileInfoIn != pTileInfoOut)
+ {
+ pTileInfoOut->pipeConfig = pTileInfoIn->pipeConfig;
+ }
+ }
+ else
+ {
+ ADDR_ASSERT_ALWAYS();
+ retCode = ADDR_INVALIDPARAMS;
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeSurfaceInfo
+* @brief
+* Entry of EgBasedAddrLib ComputeSurfaceInfo
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ if (pIn->numSamples < pIn->numFrags)
+ {
+ retCode = ADDR_INVALIDPARAMS;
+ }
+
+ ADDR_TILEINFO tileInfo = {0};
+
+ if (retCode == ADDR_OK)
+ {
+ // Uses internal tile info if pOut does not have a valid pTileInfo
+ if (pOut->pTileInfo == NULL)
+ {
+ pOut->pTileInfo = &tileInfo;
+ }
+
+ if (!DispatchComputeSurfaceInfo(pIn, pOut))
+ {
+ retCode = ADDR_INVALIDPARAMS;
+ }
+
+ // Returns an index
+ pOut->tileIndex = HwlPostCheckTileIndex(pOut->pTileInfo,
+ pOut->tileMode,
+ pOut->tileType,
+ pOut->tileIndex);
+
+ if (IsMacroTiled(pOut->tileMode) && (pOut->macroModeIndex == TileIndexInvalid))
+ {
+ pOut->macroModeIndex = HwlComputeMacroModeIndex(pOut->tileIndex,
+ pIn->flags,
+ pIn->bpp,
+ pIn->numSamples,
+ pOut->pTileInfo);
+ }
+
+ // Resets pTileInfo to NULL if the internal tile info is used
+ if (pOut->pTileInfo == &tileInfo)
+ {
+#if DEBUG
+ // Client does not pass in a valid pTileInfo
+ if (IsMacroTiled(pOut->tileMode))
+ {
+ // If a valid index is returned, then no pTileInfo is okay
+ ADDR_ASSERT(!m_configFlags.useTileIndex || pOut->tileIndex != TileIndexInvalid);
+
+ if (!IsTileInfoAllZero(pIn->pTileInfo))
+ {
+ // The initial value of pIn->pTileInfo is copied to tileInfo
+ // We do not expect any of these value to be changed nor any 0 of inputs
+ ADDR_ASSERT(tileInfo.banks == pIn->pTileInfo->banks);
+ ADDR_ASSERT(tileInfo.bankWidth == pIn->pTileInfo->bankWidth);
+ ADDR_ASSERT(tileInfo.bankHeight == pIn->pTileInfo->bankHeight);
+ ADDR_ASSERT(tileInfo.macroAspectRatio == pIn->pTileInfo->macroAspectRatio);
+ ADDR_ASSERT(tileInfo.tileSplitBytes == pIn->pTileInfo->tileSplitBytes);
+ }
+ }
+#endif
+ pOut->pTileInfo = NULL;
+ }
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeSurfaceAddrFromCoord
+* @brief
+* Entry of EgBasedAddrLib ComputeSurfaceAddrFromCoord
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSurfaceAddrFromCoord(
+ const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ if (
+#if !ALT_TEST // Overflow test needs this out-of-boundary coord
+ (pIn->x > pIn->pitch) ||
+ (pIn->y > pIn->height) ||
+#endif
+ (pIn->numSamples > m_maxSamples))
+ {
+ retCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ pOut->addr = DispatchComputeSurfaceAddrFromCoord(pIn, pOut);
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeSurfaceCoordFromAddr
+* @brief
+* Entry of EgBasedAddrLib ComputeSurfaceCoordFromAddr
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSurfaceCoordFromAddr(
+ const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ if ((pIn->bitPosition >= 8) ||
+ (pIn->numSamples > m_maxSamples))
+ {
+ retCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ DispatchComputeSurfaceCoordFromAddr(pIn, pOut);
+ }
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeSliceTileSwizzle
+* @brief
+* Entry of EgBasedAddrLib ComputeSurfaceCoordFromAddr
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE EgBasedAddrLib::HwlComputeSliceTileSwizzle(
+ const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ if (pIn->pTileInfo && (pIn->pTileInfo->banks > 0))
+ {
+
+ pOut->tileSwizzle = ComputeSliceTileSwizzle(pIn->tileMode,
+ pIn->baseSwizzle,
+ pIn->slice,
+ pIn->baseAddr,
+ pIn->pTileInfo);
+ }
+ else
+ {
+ retCode = ADDR_INVALIDPARAMS;
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeHtileBpp
+*
+* @brief
+* Compute htile bpp
+*
+* @return
+* Htile bpp
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::HwlComputeHtileBpp(
+ BOOL_32 isWidth8, ///< [in] TRUE if block width is 8
+ BOOL_32 isHeight8 ///< [in] TRUE if block height is 8
+ ) const
+{
+ // only support 8x8 mode
+ ADDR_ASSERT(isWidth8 && isHeight8);
+ return 32;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlComputeHtileBaseAlign
+*
+* @brief
+* Compute htile base alignment
+*
+* @return
+* Htile base alignment
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::HwlComputeHtileBaseAlign(
+ BOOL_32 isTcCompatible, ///< [in] if TC compatible
+ BOOL_32 isLinear, ///< [in] if it is linear mode
+ ADDR_TILEINFO* pTileInfo ///< [in] Tile info
+ ) const
+{
+ UINT_32 baseAlign = m_pipeInterleaveBytes * HwlGetPipes(pTileInfo);
+
+ if (isTcCompatible)
+ {
+ ADDR_ASSERT(pTileInfo != NULL);
+ if (pTileInfo)
+ {
+ baseAlign *= pTileInfo->banks;
+ }
+ }
+
+ return baseAlign;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlGetPitchAlignmentMicroTiled
+*
+* @brief
+* Compute 1D tiled surface pitch alignment, calculation results are returned through
+* output parameters.
+*
+* @return
+* pitch alignment
+***************************************************************************************************
+*/
+UINT_32 EgBasedAddrLib::HwlGetPitchAlignmentMicroTiled(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 numSamples ///< [in] number of samples
+ ) const
+{
+ UINT_32 pitchAlign;
+
+ UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+ UINT_32 pixelsPerMicroTile;
+ UINT_32 pixelsPerPipeInterleave;
+ UINT_32 microTilesPerPipeInterleave;
+
+ //
+ // Special workaround for depth/stencil buffer, use 8 bpp to meet larger requirement for
+ // stencil buffer since pitch alignment is related to bpp.
+ // For a depth only buffer do not set this.
+ //
+ // Note: this actually does not work for mipmap but mipmap depth texture is not really
+ // sampled with mipmap.
+ //
+ if (flags.depth && !flags.noStencil)
+ {
+ bpp = 8;
+ }
+
+ pixelsPerMicroTile = MicroTilePixels * microTileThickness;
+ pixelsPerPipeInterleave = BYTES_TO_BITS(m_pipeInterleaveBytes) / (bpp * numSamples);
+ microTilesPerPipeInterleave = pixelsPerPipeInterleave / pixelsPerMicroTile;
+
+ pitchAlign = Max(MicroTileWidth, microTilesPerPipeInterleave * MicroTileWidth);
+
+ return pitchAlign;
+}
+
+/**
+***************************************************************************************************
+* EgBasedAddrLib::HwlGetSizeAdjustmentMicroTiled
+*
+* @brief
+* Adjust 1D tiled surface pitch and slice size
+*
+* @return
+* Logical slice size in bytes
+***************************************************************************************************
+*/
+UINT_64 EgBasedAddrLib::HwlGetSizeAdjustmentMicroTiled(
+ UINT_32 thickness, ///< [in] thickness
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 numSamples, ///< [in] number of samples
+ UINT_32 baseAlign, ///< [in] base alignment
+ UINT_32 pitchAlign, ///< [in] pitch alignment
+ UINT_32* pPitch, ///< [in/out] pointer to pitch
+ UINT_32* pHeight ///< [in/out] pointer to height
+ ) const
+{
+ UINT_64 logicalSliceSize;
+ UINT_64 physicalSliceSize;
+
+ UINT_32 pitch = *pPitch;
+ UINT_32 height = *pHeight;
+
+ // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
+ logicalSliceSize = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples);
+
+ // Physical slice: multiplied by thickness
+ physicalSliceSize = logicalSliceSize * thickness;
+
+ //
+ // R800 will always pad physical slice size to baseAlign which is pipe_interleave_bytes
+ //
+ ADDR_ASSERT((physicalSliceSize % baseAlign) == 0)
+
+ return logicalSliceSize;
+}
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h b/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h
new file mode 100644
index 00000000000..84adb66eedc
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/r800/egbaddrlib.h
@@ -0,0 +1,411 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file egbaddrlib.h
+* @brief Contains the EgBasedAddrLib class definition.
+***************************************************************************************************
+*/
+
+#ifndef __EG_BASED_ADDR_LIB_H__
+#define __EG_BASED_ADDR_LIB_H__
+
+#include "addrlib.h"
+
+
+/// Structures for functions
+struct CoordFromBankPipe
+{
+ UINT_32 xBits : 3;
+ UINT_32 yBits : 4;
+
+ UINT_32 xBit3 : 1;
+ UINT_32 xBit4 : 1;
+ UINT_32 xBit5 : 1;
+ UINT_32 yBit3 : 1;
+ UINT_32 yBit4 : 1;
+ UINT_32 yBit5 : 1;
+ UINT_32 yBit6 : 1;
+};
+
+/**
+***************************************************************************************************
+* @brief This class is the Evergreen based address library
+* @note Abstract class
+***************************************************************************************************
+*/
+class EgBasedAddrLib : public AddrLib
+{
+protected:
+ EgBasedAddrLib(const AddrClient* pClient);
+ virtual ~EgBasedAddrLib();
+
+public:
+
+ /// Surface info functions
+
+ // NOTE: DispatchComputeSurfaceInfo using TileInfo takes both an input and an output.
+ // On input:
+ // One or more fields may be 0 to be calculated/defaulted - pre-SI h/w.
+ // H/W using tile mode index only accepts none or all 0's - SI and newer h/w.
+ // It then returns the actual tiling configuration used.
+ // Other methods' TileInfo must be valid on entry
+ BOOL_32 DispatchComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ ADDR_E_RETURNCODE DispatchComputeFmaskInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+protected:
+ // Hwl interface
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceAddrFromCoord(
+ const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceCoordFromAddr(
+ const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlComputeSliceTileSwizzle(
+ const ADDR_COMPUTE_SLICESWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_SLICESWIZZLE_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlExtractBankPipeSwizzle(
+ const ADDR_EXTRACT_BANKPIPE_SWIZZLE_INPUT* pIn,
+ ADDR_EXTRACT_BANKPIPE_SWIZZLE_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlCombineBankPipeSwizzle(
+ UINT_32 bankSwizzle, UINT_32 pipeSwizzle, ADDR_TILEINFO* pTileInfo,
+ UINT_64 baseAddr, UINT_32* pTileSwizzle) const;
+
+ virtual ADDR_E_RETURNCODE HwlComputeBaseSwizzle(
+ const ADDR_COMPUTE_BASE_SWIZZLE_INPUT* pIn,
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
+ const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
+
+ virtual UINT_32 HwlComputeHtileBpp(
+ BOOL_32 isWidth8, BOOL_32 isHeight8) const;
+
+ virtual UINT_32 HwlComputeHtileBaseAlign(
+ BOOL_32 isTcCompatible, BOOL_32 isLinear, ADDR_TILEINFO* pTileInfo) const;
+
+ virtual ADDR_E_RETURNCODE HwlComputeFmaskInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pOut);
+
+ virtual ADDR_E_RETURNCODE HwlComputeFmaskAddrFromCoord(
+ const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlComputeFmaskCoordFromAddr(
+ const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+ virtual BOOL_32 HwlDegradeBaseLevel(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+ virtual UINT_32 HwlComputeQbStereoRightSwizzle(
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pInfo) const;
+
+ virtual VOID HwlComputePixelCoordFromOffset(
+ UINT_32 offset, UINT_32 bpp, UINT_32 numSamples,
+ AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+ UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample,
+ AddrTileType microTileType, BOOL_32 isDepthSampleOrder) const;
+
+ /// Return Cmask block max
+ virtual BOOL_32 HwlGetMaxCmaskBlockMax() const
+ {
+ return 16383; // 14 bits
+ }
+
+ // Sub-hwl interface
+ /// Pure virtual function to setup tile info (indices) if client requests to do so
+ virtual VOID HwlSetupTileInfo(
+ AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
+ UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+ ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
+ AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+ /// Pure virtual function to get pitch alignment for linear modes
+ virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const = 0;
+
+ /// Pure virtual function to get size adjustment for linear modes
+ virtual UINT_64 HwlGetSizeAdjustmentLinear(
+ AddrTileMode tileMode,
+ UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
+ UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const = 0;
+
+ virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
+ AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
+
+ virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
+ UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+ UINT_32 baseAlign, UINT_32 pitchAlign,
+ UINT_32 *pPitch, UINT_32 *pHeight) const;
+
+ /// Pure virtual function to do extra sanity check
+ virtual BOOL_32 HwlSanityCheckMacroTiled(
+ ADDR_TILEINFO* pTileInfo) const = 0;
+
+ /// Pure virtual function to check current level to be the last macro tiled one
+ virtual VOID HwlCheckLastMacroTiledLvl(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const = 0;
+
+ /// Adjusts bank before bank is modified by rotation
+ virtual UINT_32 HwlPreAdjustBank(
+ UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const = 0;
+
+ virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
+ AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
+ UINT_32 bank, UINT_32 pipe,
+ UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
+ BOOL_32 ignoreSE,
+ ADDR_TILEINFO* pTileInfo) const = 0;
+
+ virtual BOOL_32 HwlTileInfoEqual(
+ const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
+
+ virtual AddrTileMode HwlDegradeThickTileMode(
+ AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
+
+ virtual INT_32 HwlPostCheckTileIndex(
+ const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
+ INT curIndex = TileIndexInvalid) const
+ {
+ return TileIndexInvalid;
+ }
+
+ virtual VOID HwlFmaskPreThunkSurfInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
+ const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const
+ {
+ }
+
+ virtual VOID HwlFmaskPostThunkSurfInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const
+ {
+ }
+
+ /// Virtual function to check if the height needs extra padding
+ /// for stereo right eye offset, to avoid bank pipe swizzle
+ virtual BOOL_32 HwlStereoCheckRightOffsetPadding() const
+ {
+ return FALSE;
+ }
+
+ virtual BOOL_32 HwlReduceBankWidthHeight(
+ UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+ UINT_32 bankHeightAlign, UINT_32 pipes,
+ ADDR_TILEINFO* pTileInfo) const;
+
+ // Protected non-virtual functions
+
+ /// Mip level functions
+ AddrTileMode ComputeSurfaceMipLevelTileMode(
+ AddrTileMode baseTileMode, UINT_32 bpp,
+ UINT_32 pitch, UINT_32 height, UINT_32 numSlices, UINT_32 numSamples,
+ UINT_32 pitchAlign, UINT_32 heightAlign,
+ ADDR_TILEINFO* pTileInfo) const;
+
+ /// Swizzle functions
+ VOID ExtractBankPipeSwizzle(
+ UINT_32 base256b, ADDR_TILEINFO* pTileInfo,
+ UINT_32* pBankSwizzle, UINT_32* pPipeSwizzle) const;
+
+ UINT_32 GetBankPipeSwizzle(
+ UINT_32 bankSwizzle, UINT_32 pipeSwizzle,
+ UINT_64 baseAddr, ADDR_TILEINFO* pTileInfo) const;
+
+ UINT_32 ComputeSliceTileSwizzle(
+ AddrTileMode tileMode, UINT_32 baseSwizzle, UINT_32 slice, UINT_64 baseAddr,
+ ADDR_TILEINFO* pTileInfo) const;
+
+ /// Addressing functions
+ UINT_32 ComputeBankFromCoord(
+ UINT_32 x, UINT_32 y, UINT_32 slice,
+ AddrTileMode tileMode, UINT_32 bankSwizzle, UINT_32 tileSpitSlice,
+ ADDR_TILEINFO* pTileInfo) const;
+
+ UINT_32 ComputeBankFromAddr(
+ UINT_64 addr, UINT_32 numBanks, UINT_32 numPipes) const;
+
+ UINT_32 ComputePipeRotation(
+ AddrTileMode tileMode, UINT_32 numPipes) const;
+
+ UINT_32 ComputeBankRotation(
+ AddrTileMode tileMode, UINT_32 numBanks,
+ UINT_32 numPipes) const;
+
+ VOID ComputeSurfaceCoord2DFromBankPipe(
+ AddrTileMode tileMode, UINT_32 x, UINT_32 y, UINT_32 slice,
+ UINT_32 bank, UINT_32 pipe,
+ UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
+ ADDR_TILEINFO* pTileInfo,
+ CoordFromBankPipe *pOutput) const;
+
+ /// Htile/Cmask functions
+ UINT_64 ComputeHtileBytes(
+ UINT_32 pitch, UINT_32 height, UINT_32 bpp,
+ BOOL_32 isLinear, UINT_32 numSlices, UINT_64* sliceBytes, UINT_32 baseAlign) const;
+
+ // Static functions
+ static BOOL_32 IsTileInfoAllZero(ADDR_TILEINFO* pTileInfo);
+ static UINT_32 ComputeFmaskNumPlanesFromNumSamples(UINT_32 numSamples);
+ static UINT_32 ComputeFmaskResolvedBppFromNumSamples(UINT_32 numSamples);
+
+private:
+
+ BOOL_32 ComputeSurfaceInfoLinear(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
+ UINT_32 padDims) const;
+
+ BOOL_32 ComputeSurfaceInfoMicroTiled(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
+ UINT_32 padDims,
+ AddrTileMode expTileMode) const;
+
+ BOOL_32 ComputeSurfaceInfoMacroTiled(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut,
+ UINT_32 padDims,
+ AddrTileMode expTileMode) const;
+
+ BOOL_32 ComputeSurfaceAlignmentsLinear(
+ AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+ UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
+
+ BOOL_32 ComputeSurfaceAlignmentsMicroTiled(
+ AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+ UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
+
+ BOOL_32 ComputeSurfaceAlignmentsMacroTiled(
+ AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags,
+ UINT_32 mipLevel, UINT_32 numSamples,
+ ADDR_TILEINFO* pTileInfo,
+ UINT_32* pBaseAlign, UINT_32* pPitchAlign, UINT_32* pHeightAlign) const;
+
+ /// Surface addressing functions
+ UINT_64 DispatchComputeSurfaceAddrFromCoord(
+ const ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+ VOID DispatchComputeSurfaceCoordFromAddr(
+ const ADDR_COMPUTE_SURFACE_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_COORDFROMADDR_OUTPUT* pOut) const;
+
+ UINT_64 ComputeSurfaceAddrFromCoordMicroTiled(
+ UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
+ UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+ AddrTileMode tileMode,
+ AddrTileType microTileType, BOOL_32 isDepthSampleOrder,
+ UINT_32* pBitPosition) const;
+
+ UINT_64 ComputeSurfaceAddrFromCoordMacroTiled(
+ UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample,
+ UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+ AddrTileMode tileMode,
+ AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
+ UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+ ADDR_TILEINFO* pTileInfo,
+ UINT_32* pBitPosition) const;
+
+ VOID ComputeSurfaceCoordFromAddrMacroTiled(
+ UINT_64 addr, UINT_32 bitPosition,
+ UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+ AddrTileMode tileMode, UINT_32 tileBase, UINT_32 compBits,
+ AddrTileType microTileType, BOOL_32 ignoreSE, BOOL_32 isDepthSampleOrder,
+ UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+ ADDR_TILEINFO* pTileInfo,
+ UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample) const;
+
+ /// Fmask functions
+ UINT_64 DispatchComputeFmaskAddrFromCoord(
+ const ADDR_COMPUTE_FMASK_ADDRFROMCOORD_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_ADDRFROMCOORD_OUTPUT* pOut) const;
+
+ VOID DispatchComputeFmaskCoordFromAddr(
+ const ADDR_COMPUTE_FMASK_COORDFROMADDR_INPUT* pIn,
+ ADDR_COMPUTE_FMASK_COORDFROMADDR_OUTPUT* pOut) const;
+
+ // FMASK related methods - private
+ UINT_64 ComputeFmaskAddrFromCoordMicroTiled(
+ UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
+ UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
+ BOOL_32 resolved, UINT_32* pBitPosition) const;
+
+ VOID ComputeFmaskCoordFromAddrMicroTiled(
+ UINT_64 addr, UINT_32 bitPosition,
+ UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+ AddrTileMode tileMode, BOOL_32 resolved,
+ UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
+
+ VOID ComputeFmaskCoordFromAddrMacroTiled(
+ UINT_64 addr, UINT_32 bitPosition,
+ UINT_32 pitch, UINT_32 height, UINT_32 numSamples, AddrTileMode tileMode,
+ UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+ BOOL_32 ignoreSE,
+ ADDR_TILEINFO* pTileInfo,
+ BOOL_32 resolved,
+ UINT_32* pX, UINT_32* pY, UINT_32* pSlice, UINT_32* pSample, UINT_32* pPlane) const;
+
+ UINT_64 ComputeFmaskAddrFromCoordMacroTiled(
+ UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 sample, UINT_32 plane,
+ UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+ AddrTileMode tileMode, UINT_32 pipeSwizzle, UINT_32 bankSwizzle,
+ BOOL_32 ignoreSE,
+ ADDR_TILEINFO* pTileInfo,
+ BOOL_32 resolved,
+ UINT_32* pBitPosition) const;
+
+ /// Sanity check functions
+ BOOL_32 SanityCheckMacroTiled(
+ ADDR_TILEINFO* pTileInfo) const;
+
+protected:
+ UINT_32 m_ranks; ///< Number of ranks - MC_ARB_RAMCFG.NOOFRANK
+ UINT_32 m_logicalBanks; ///< Logical banks = m_banks * m_ranks if m_banks != 16
+ UINT_32 m_bankInterleave; ///< Bank interleave, as a multiple of pipe interleave size
+};
+
+#endif
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.cpp b/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.cpp
new file mode 100644
index 00000000000..a858b55b7cf
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.cpp
@@ -0,0 +1,2818 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file siaddrlib.cpp
+* @brief Contains the implementation for the SIAddrLib class.
+***************************************************************************************************
+*/
+
+#include "siaddrlib.h"
+
+#include "si_gb_reg.h"
+
+#include "si_ci_vi_merged_enum.h"
+
+#if BRAHMA_BUILD
+#include "amdgpu_id.h"
+#else
+#include "si_id.h"
+#endif
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+***************************************************************************************************
+* AddrSIHwlInit
+*
+* @brief
+* Creates an SIAddrLib object.
+*
+* @return
+* Returns an SIAddrLib object pointer.
+***************************************************************************************************
+*/
+AddrLib* AddrSIHwlInit(const AddrClient* pClient)
+{
+ return SIAddrLib::CreateObj(pClient);
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::SIAddrLib
+*
+* @brief
+* Constructor
+*
+***************************************************************************************************
+*/
+SIAddrLib::SIAddrLib(const AddrClient* pClient) :
+ EgBasedAddrLib(pClient),
+ m_noOfEntries(0)
+{
+ m_class = SI_ADDRLIB;
+ memset(&m_settings, 0, sizeof(m_settings));
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::~SIAddrLib
+*
+* @brief
+* Destructor
+***************************************************************************************************
+*/
+SIAddrLib::~SIAddrLib()
+{
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlGetPipes
+*
+* @brief
+* Get number pipes
+* @return
+* num pipes
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlGetPipes(
+ const ADDR_TILEINFO* pTileInfo ///< [in] Tile info
+ ) const
+{
+ UINT_32 numPipes;
+
+ if (pTileInfo)
+ {
+ numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
+ }
+ else
+ {
+ ADDR_ASSERT_ALWAYS();
+ numPipes = m_pipes; // Suppose we should still have a global pipes
+ }
+
+ return numPipes;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::GetPipePerSurf
+* @brief
+* get pipe num base on inputing tileinfo->pipeconfig
+* @return
+* pipe number
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::GetPipePerSurf(
+ AddrPipeCfg pipeConfig ///< [in] pipe config
+ ) const
+{
+ UINT_32 numPipes = 0;
+
+ switch (pipeConfig)
+ {
+ case ADDR_PIPECFG_P2:
+ numPipes = 2;
+ break;
+ case ADDR_PIPECFG_P4_8x16:
+ case ADDR_PIPECFG_P4_16x16:
+ case ADDR_PIPECFG_P4_16x32:
+ case ADDR_PIPECFG_P4_32x32:
+ numPipes = 4;
+ break;
+ case ADDR_PIPECFG_P8_16x16_8x16:
+ case ADDR_PIPECFG_P8_16x32_8x16:
+ case ADDR_PIPECFG_P8_32x32_8x16:
+ case ADDR_PIPECFG_P8_16x32_16x16:
+ case ADDR_PIPECFG_P8_32x32_16x16:
+ case ADDR_PIPECFG_P8_32x32_16x32:
+ case ADDR_PIPECFG_P8_32x64_32x32:
+ numPipes = 8;
+ break;
+ case ADDR_PIPECFG_P16_32x32_8x16:
+ case ADDR_PIPECFG_P16_32x32_16x16:
+ numPipes = 16;
+ break;
+ default:
+ ADDR_ASSERT(!"Invalid pipe config");
+ numPipes = m_pipes;
+ }
+ return numPipes;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::ComputePipeFromCoord
+*
+* @brief
+* Compute pipe number from coordinates
+* @return
+* Pipe number
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::ComputePipeFromCoord(
+ UINT_32 x, ///< [in] x coordinate
+ UINT_32 y, ///< [in] y coordinate
+ UINT_32 slice, ///< [in] slice index
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 pipeSwizzle, ///< [in] pipe swizzle
+ BOOL_32 ignoreSE, ///< [in] TRUE if shader engines are ignored
+ ADDR_TILEINFO* pTileInfo ///< [in] Tile info
+ ) const
+{
+ UINT_32 pipe;
+ UINT_32 pipeBit0 = 0;
+ UINT_32 pipeBit1 = 0;
+ UINT_32 pipeBit2 = 0;
+ UINT_32 pipeBit3 = 0;
+ UINT_32 sliceRotation;
+ UINT_32 numPipes = 0;
+
+ UINT_32 tx = x / MicroTileWidth;
+ UINT_32 ty = y / MicroTileHeight;
+ UINT_32 x3 = _BIT(tx,0);
+ UINT_32 x4 = _BIT(tx,1);
+ UINT_32 x5 = _BIT(tx,2);
+ UINT_32 x6 = _BIT(tx,3);
+ UINT_32 y3 = _BIT(ty,0);
+ UINT_32 y4 = _BIT(ty,1);
+ UINT_32 y5 = _BIT(ty,2);
+ UINT_32 y6 = _BIT(ty,3);
+
+ switch (pTileInfo->pipeConfig)
+ {
+ case ADDR_PIPECFG_P2:
+ pipeBit0 = x3 ^ y3;
+ numPipes = 2;
+ break;
+ case ADDR_PIPECFG_P4_8x16:
+ pipeBit0 = x4 ^ y3;
+ pipeBit1 = x3 ^ y4;
+ numPipes = 4;
+ break;
+ case ADDR_PIPECFG_P4_16x16:
+ pipeBit0 = x3 ^ y3 ^ x4;
+ pipeBit1 = x4 ^ y4;
+ numPipes = 4;
+ break;
+ case ADDR_PIPECFG_P4_16x32:
+ pipeBit0 = x3 ^ y3 ^ x4;
+ pipeBit1 = x4 ^ y5;
+ numPipes = 4;
+ break;
+ case ADDR_PIPECFG_P4_32x32:
+ pipeBit0 = x3 ^ y3 ^ x5;
+ pipeBit1 = x5 ^ y5;
+ numPipes = 4;
+ break;
+ case ADDR_PIPECFG_P8_16x16_8x16:
+ pipeBit0 = x4 ^ y3 ^ x5;
+ pipeBit1 = x3 ^ y5;
+ numPipes = 8;
+ break;
+ case ADDR_PIPECFG_P8_16x32_8x16:
+ pipeBit0 = x4 ^ y3 ^ x5;
+ pipeBit1 = x3 ^ y4;
+ pipeBit2 = x4 ^ y5;
+ numPipes = 8;
+ break;
+ case ADDR_PIPECFG_P8_16x32_16x16:
+ pipeBit0 = x3 ^ y3 ^ x4;
+ pipeBit1 = x5 ^ y4;
+ pipeBit2 = x4 ^ y5;
+ numPipes = 8;
+ break;
+ case ADDR_PIPECFG_P8_32x32_8x16:
+ pipeBit0 = x4 ^ y3 ^ x5;
+ pipeBit1 = x3 ^ y4;
+ pipeBit2 = x5 ^ y5;
+ numPipes = 8;
+ break;
+ case ADDR_PIPECFG_P8_32x32_16x16:
+ pipeBit0 = x3 ^ y3 ^ x4;
+ pipeBit1 = x4 ^ y4;
+ pipeBit2 = x5 ^ y5;
+ numPipes = 8;
+ break;
+ case ADDR_PIPECFG_P8_32x32_16x32:
+ pipeBit0 = x3 ^ y3 ^ x4;
+ pipeBit1 = x4 ^ y6;
+ pipeBit2 = x5 ^ y5;
+ numPipes = 8;
+ break;
+ case ADDR_PIPECFG_P8_32x64_32x32:
+ pipeBit0 = x3 ^ y3 ^ x5;
+ pipeBit1 = x6 ^ y5;
+ pipeBit2 = x5 ^ y6;
+ numPipes = 8;
+ break;
+ case ADDR_PIPECFG_P16_32x32_8x16:
+ pipeBit0 = x4 ^ y3;
+ pipeBit1 = x3 ^ y4;
+ pipeBit2 = x5 ^ y6;
+ pipeBit3 = x6 ^ y5;
+ numPipes = 16;
+ break;
+ case ADDR_PIPECFG_P16_32x32_16x16:
+ pipeBit0 = x3 ^ y3 ^ x4;
+ pipeBit1 = x4 ^ y4;
+ pipeBit2 = x5 ^ y6;
+ pipeBit3 = x6 ^ y5;
+ numPipes = 16;
+ break;
+ default:
+ ADDR_UNHANDLED_CASE();
+ break;
+ }
+ pipe = pipeBit0 | (pipeBit1 << 1) | (pipeBit2 << 2) | (pipeBit3 << 3);
+
+ UINT_32 microTileThickness = ComputeSurfaceThickness(tileMode);
+
+ //
+ // Apply pipe rotation for the slice.
+ //
+ switch (tileMode)
+ {
+ case ADDR_TM_3D_TILED_THIN1: //fall through thin
+ case ADDR_TM_3D_TILED_THICK: //fall through thick
+ case ADDR_TM_3D_TILED_XTHICK:
+ sliceRotation =
+ Max(1, static_cast(numPipes / 2) - 1) * (slice / microTileThickness);
+ break;
+ default:
+ sliceRotation = 0;
+ break;
+ }
+ pipeSwizzle += sliceRotation;
+ pipeSwizzle &= (numPipes - 1);
+
+ pipe = pipe ^ pipeSwizzle;
+
+ return pipe;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::ComputeTileCoordFromPipeAndElemIdx
+*
+* @brief
+* Compute (x,y) of a tile within a macro tile from address
+* @return
+* Pipe number
+***************************************************************************************************
+*/
+VOID SIAddrLib::ComputeTileCoordFromPipeAndElemIdx(
+ UINT_32 elemIdx, ///< [in] per pipe element index within a macro tile
+ UINT_32 pipe, ///< [in] pipe index
+ AddrPipeCfg pipeCfg, ///< [in] pipe config
+ UINT_32 pitchInMacroTile, ///< [in] surface pitch in macro tile
+ UINT_32 x, ///< [in] x coordinate of the (0,0) tile in a macro tile
+ UINT_32 y, ///< [in] y coordinate of the (0,0) tile in a macro tile
+ UINT_32* pX, ///< [out] x coordinate
+ UINT_32* pY ///< [out] y coordinate
+ ) const
+{
+ UINT_32 pipebit0 = _BIT(pipe,0);
+ UINT_32 pipebit1 = _BIT(pipe,1);
+ UINT_32 pipebit2 = _BIT(pipe,2);
+ UINT_32 pipebit3 = _BIT(pipe,3);
+ UINT_32 elemIdx0 = _BIT(elemIdx,0);
+ UINT_32 elemIdx1 = _BIT(elemIdx,1);
+ UINT_32 elemIdx2 = _BIT(elemIdx,2);
+ UINT_32 x3 = 0;
+ UINT_32 x4 = 0;
+ UINT_32 x5 = 0;
+ UINT_32 x6 = 0;
+ UINT_32 y3 = 0;
+ UINT_32 y4 = 0;
+ UINT_32 y5 = 0;
+ UINT_32 y6 = 0;
+
+ switch(pipeCfg)
+ {
+ case ADDR_PIPECFG_P2:
+ x4 = elemIdx2;
+ y4 = elemIdx1 ^ x4;
+ y3 = elemIdx0 ^ x4;
+ x3 = pipebit0 ^ y3;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ break;
+ case ADDR_PIPECFG_P4_8x16:
+ x4 = elemIdx1;
+ y4 = elemIdx0 ^ x4;
+ x3 = pipebit1 ^ y4;
+ y3 = pipebit0 ^ x4;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ break;
+ case ADDR_PIPECFG_P4_16x16:
+ x4 = elemIdx1;
+ y3 = elemIdx0 ^ x4;
+ y4 = pipebit1 ^ x4;
+ x3 = pipebit0 ^ y3 ^ x4;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ break;
+ case ADDR_PIPECFG_P4_16x32:
+ x3 = elemIdx0 ^ pipebit0;
+ y5 = _BIT(y,5);
+ x4 = pipebit1 ^ y5;
+ y3 = pipebit0 ^ x3 ^ x4;
+ y4 = elemIdx1 ^ x4;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ break;
+ case ADDR_PIPECFG_P4_32x32:
+ x4 = elemIdx2;
+ y3 = elemIdx0 ^ x4;
+ y4 = elemIdx1 ^ x4;
+ if((pitchInMacroTile % 2) == 0)
+ { //even
+ y5 = _BIT(y,5);
+ x5 = pipebit1 ^ y5;
+ x3 = pipebit0 ^ y3 ^ x5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(3, x5, x4, x3);
+ }
+ else
+ { //odd
+ x5 = _BIT(x,5);
+ x3 = pipebit0 ^ y3 ^ x5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ }
+ break;
+ case ADDR_PIPECFG_P8_16x16_8x16:
+ x4 = elemIdx0;
+ y5 = _BIT(y,5);
+ x5 = _BIT(x,5);
+ x3 = pipebit1 ^ y5;
+ y4 = pipebit2 ^ x4;
+ y3 = pipebit0 ^ x5 ^ x4;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ break;
+ case ADDR_PIPECFG_P8_16x32_8x16:
+ x3 = elemIdx0;
+ y4 = pipebit1 ^ x3;
+ y5 = _BIT(y,5);
+ x5 = _BIT(x,5);
+ x4 = pipebit2 ^ y5;
+ y3 = pipebit0 ^ x4 ^ x5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ break;
+ case ADDR_PIPECFG_P8_32x32_8x16:
+ x4 = elemIdx1;
+ y4 = elemIdx0 ^ x4;
+ x3 = pipebit1 ^ y4;
+ if((pitchInMacroTile % 2) == 0)
+ { //even
+ y5 = _BIT(y,5);
+ x5 = _BIT(x,5);
+ x5 = pipebit2 ^ y5;
+ y3 = pipebit0 ^ x4 ^ x5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(3, x5, x4, x3);
+ }
+ else
+ { //odd
+ x5 = _BIT(x,5);
+ y3 = pipebit0 ^ x4 ^ x5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ }
+ break;
+ case ADDR_PIPECFG_P8_16x32_16x16:
+ x3 = elemIdx0;
+ x5 = _BIT(x,5);
+ y5 = _BIT(y,5);
+ x4 = pipebit2 ^ y5;
+ y4 = pipebit1 ^ x5;
+ y3 = pipebit0 ^ x3 ^ x4;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ break;
+ case ADDR_PIPECFG_P8_32x32_16x16:
+ x4 = elemIdx1;
+ y3 = elemIdx0 ^ x4;
+ x3 = y3^x4^pipebit0;
+ y4 = pipebit1 ^ x4;
+ if((pitchInMacroTile % 2) == 0)
+ { //even
+ y5 = _BIT(y,5);
+ x5 = pipebit2 ^ y5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(3, x5, x4, x3);
+ }
+ else
+ { //odd
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ }
+ break;
+ case ADDR_PIPECFG_P8_32x32_16x32:
+ if((pitchInMacroTile % 2) == 0)
+ { //even
+ y5 = _BIT(y,5);
+ y6 = _BIT(y,6);
+ x4 = pipebit1 ^ y6;
+ y3 = elemIdx0 ^ x4;
+ y4 = elemIdx1 ^ x4;
+ x3 = pipebit0 ^ y3 ^ x4;
+ x5 = pipebit2 ^ y5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(3, x5, x4, x3);
+ }
+ else
+ { //odd
+ y6 = _BIT(y,6);
+ x4 = pipebit1 ^ y6;
+ y3 = elemIdx0 ^ x4;
+ y4 = elemIdx1 ^ x4;
+ x3 = pipebit0 ^ y3 ^ x4;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(2, x4, x3);
+ }
+ break;
+ case ADDR_PIPECFG_P8_32x64_32x32:
+ x4 = elemIdx2;
+ y3 = elemIdx0 ^ x4;
+ y4 = elemIdx1 ^ x4;
+ if((pitchInMacroTile % 4) == 0)
+ { //multiple of 4
+ y5 = _BIT(y,5);
+ y6 = _BIT(y,6);
+ x5 = pipebit2 ^ y6;
+ x6 = pipebit1 ^ y5;
+ x3 = pipebit0 ^ y3 ^ x5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(4, x6, x5, x4, x3);
+ }
+ else
+ {
+ y6 = _BIT(y,6);
+ x5 = pipebit2 ^ y6;
+ x3 = pipebit0 ^ y3 ^ x5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(3, x5, x4, x3);
+ }
+ break;
+ case ADDR_PIPECFG_P16_32x32_8x16:
+ x4 = elemIdx1;
+ y4 = elemIdx0 ^ x4;
+ y3 = pipebit0 ^ x4;
+ x3 = pipebit1 ^ y4;
+ if((pitchInMacroTile % 4) == 0)
+ { //multiple of 4
+ y5 = _BIT(y,5);
+ y6 = _BIT(y,6);
+ x5 = pipebit2 ^ y6;
+ x6 = pipebit3 ^ y5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(4, x6, x5,x4, x3);
+ }
+ else
+ {
+ y6 = _BIT(y,6);
+ x5 = pipebit2 ^ y6;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(3, x5, x4, x3);
+ }
+ break;
+ case ADDR_PIPECFG_P16_32x32_16x16:
+ x4 = elemIdx1;
+ y3 = elemIdx0 ^ x4;
+ y4 = pipebit1 ^ x4;
+ x3 = pipebit0 ^ y3 ^ x4;
+ if((pitchInMacroTile % 4) == 0)
+ { //multiple of 4
+ y5 = _BIT(y,5);
+ y6 = _BIT(y,6);
+ x5 = pipebit2 ^ y6;
+ x6 = pipebit3 ^ y5;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(4, x6, x5, x4, x3);
+ }
+ else
+ {
+ y6 = _BIT(y,6);
+ x5 = pipebit2 ^ y6;
+ *pY = Bits2Number(2, y4, y3);
+ *pX = Bits2Number(3, x5, x4, x3);
+ }
+ break;
+ default:
+ ADDR_UNHANDLED_CASE();
+ }
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::TileCoordToMaskElementIndex
+*
+* @brief
+* Compute element index from coordinates in tiles
+* @return
+* Element index
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::TileCoordToMaskElementIndex(
+ UINT_32 tx, ///< [in] x coord, in Tiles
+ UINT_32 ty, ///< [in] y coord, in Tiles
+ AddrPipeCfg pipeConfig, ///< [in] pipe config
+ UINT_32* macroShift, ///< [out] macro shift
+ UINT_32* elemIdxBits ///< [out] tile offset bits
+ ) const
+{
+ UINT_32 elemIdx = 0;
+ UINT_32 elemIdx0, elemIdx1, elemIdx2;
+ UINT_32 tx0, tx1;
+ UINT_32 ty0, ty1;
+
+ tx0 = _BIT(tx,0);
+ tx1 = _BIT(tx,1);
+ ty0 = _BIT(ty,0);
+ ty1 = _BIT(ty,1);
+
+ switch(pipeConfig)
+ {
+ case ADDR_PIPECFG_P2:
+ *macroShift = 3;
+ *elemIdxBits =3;
+ elemIdx2 = tx1;
+ elemIdx1 = tx1 ^ ty1;
+ elemIdx0 = tx1 ^ ty0;
+ elemIdx = Bits2Number(3,elemIdx2,elemIdx1,elemIdx0);
+ break;
+ case ADDR_PIPECFG_P4_8x16:
+ *macroShift = 2;
+ *elemIdxBits =2;
+ elemIdx1 = tx1;
+ elemIdx0 = tx1 ^ ty1;
+ elemIdx = Bits2Number(2,elemIdx1,elemIdx0);
+ break;
+ case ADDR_PIPECFG_P4_16x16:
+ *macroShift = 2;
+ *elemIdxBits =2;
+ elemIdx0 = tx1^ty0;
+ elemIdx1 = tx1;
+ elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+ break;
+ case ADDR_PIPECFG_P4_16x32:
+ *macroShift = 2;
+ *elemIdxBits =2;
+ elemIdx0 = tx1^ty0;
+ elemIdx1 = tx1^ty1;
+ elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+ break;
+ case ADDR_PIPECFG_P4_32x32:
+ *macroShift = 2;
+ *elemIdxBits =3;
+ elemIdx0 = tx1^ty0;
+ elemIdx1 = tx1^ty1;
+ elemIdx2 = tx1;
+ elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
+ break;
+ case ADDR_PIPECFG_P8_16x16_8x16:
+ *macroShift = 1;
+ *elemIdxBits =1;
+ elemIdx0 = tx1;
+ elemIdx = elemIdx0;
+ break;
+ case ADDR_PIPECFG_P8_16x32_8x16:
+ *macroShift = 1;
+ *elemIdxBits =1;
+ elemIdx0 = tx0;
+ elemIdx = elemIdx0;
+ break;
+ case ADDR_PIPECFG_P8_32x32_8x16:
+ *macroShift = 1;
+ *elemIdxBits =2;
+ elemIdx1 = tx1;
+ elemIdx0 = tx1^ty1;
+ elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+ break;
+ case ADDR_PIPECFG_P8_16x32_16x16:
+ *macroShift = 1;
+ *elemIdxBits =1;
+ elemIdx0 = tx0;
+ elemIdx = elemIdx0;
+ break;
+ case ADDR_PIPECFG_P8_32x32_16x16:
+ *macroShift = 1;
+ *elemIdxBits =2;
+ elemIdx0 = tx1^ty0;
+ elemIdx1 = tx1;
+ elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+ break;
+ case ADDR_PIPECFG_P8_32x32_16x32:
+ *macroShift = 1;
+ *elemIdxBits =2;
+ elemIdx0 = tx1^ty0;
+ elemIdx1 = tx1^ty1;
+ elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+ break;
+ case ADDR_PIPECFG_P8_32x64_32x32:
+ *macroShift = 1;
+ *elemIdxBits =3;
+ elemIdx0 = tx1^ty0;
+ elemIdx1 = tx1^ty1;
+ elemIdx2 = tx1;
+ elemIdx = Bits2Number(3, elemIdx2, elemIdx1, elemIdx0);
+ break;
+ case ADDR_PIPECFG_P16_32x32_8x16:
+ *macroShift = 0;
+ *elemIdxBits =2;
+ elemIdx0 = tx1^ty1;
+ elemIdx1 = tx1;
+ elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+ break;
+ case ADDR_PIPECFG_P16_32x32_16x16:
+ *macroShift = 0;
+ *elemIdxBits =2;
+ elemIdx0 = tx1^ty0;
+ elemIdx1 = tx1;
+ elemIdx = Bits2Number(2, elemIdx1, elemIdx0);
+ break;
+ default:
+ ADDR_UNHANDLED_CASE();
+ break;
+ }
+
+ return elemIdx;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlComputeTileDataWidthAndHeightLinear
+*
+* @brief
+* Compute the squared cache shape for per-tile data (CMASK and HTILE) for linear layout
+*
+* @return
+* N/A
+*
+* @note
+* MacroWidth and macroHeight are measured in pixels
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlComputeTileDataWidthAndHeightLinear(
+ UINT_32* pMacroWidth, ///< [out] macro tile width
+ UINT_32* pMacroHeight, ///< [out] macro tile height
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_TILEINFO* pTileInfo ///< [in] tile info
+ ) const
+{
+ ADDR_ASSERT(pTileInfo != NULL);
+ UINT_32 macroWidth;
+ UINT_32 macroHeight;
+
+ /// In linear mode, the htile or cmask buffer must be padded out to 4 tiles
+ /// but for P8_32x64_32x32, it must be padded out to 8 tiles
+ /// Actually there are more pipe configs which need 8-tile padding but SI family
+ /// has a bug which is fixed in CI family
+ if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32) ||
+ (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
+ (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x32_16x16))
+ {
+ macroWidth = 8*MicroTileWidth;
+ macroHeight = 8*MicroTileHeight;
+ }
+ else
+ {
+ macroWidth = 4*MicroTileWidth;
+ macroHeight = 4*MicroTileHeight;
+ }
+
+ *pMacroWidth = macroWidth;
+ *pMacroHeight = macroHeight;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlComputeHtileBytes
+*
+* @brief
+* Compute htile size in bytes
+*
+* @return
+* Htile size in bytes
+***************************************************************************************************
+*/
+UINT_64 SIAddrLib::HwlComputeHtileBytes(
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 bpp, ///< [in] bits per pixel
+ BOOL_32 isLinear, ///< [in] if it is linear mode
+ UINT_32 numSlices, ///< [in] number of slices
+ UINT_64* pSliceBytes, ///< [out] bytes per slice
+ UINT_32 baseAlign ///< [in] base alignments
+ ) const
+{
+ return ComputeHtileBytes(pitch, height, bpp, isLinear, numSlices, pSliceBytes, baseAlign);
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlComputeXmaskAddrFromCoord
+*
+* @brief
+* Compute address from coordinates for htile/cmask
+* @return
+* Byte address
+***************************************************************************************************
+*/
+UINT_64 SIAddrLib::HwlComputeXmaskAddrFromCoord(
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 x, ///< [in] x coord
+ UINT_32 y, ///< [in] y coord
+ UINT_32 slice, ///< [in] slice/depth index
+ UINT_32 numSlices, ///< [in] number of slices
+ UINT_32 factor, ///< [in] factor that indicates cmask(2) or htile(1)
+ BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout
+ BOOL_32 isWidth8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+ BOOL_32 isHeight8, ///< [in] TRUE if width is 8, FALSE means 4. It's register value
+ ADDR_TILEINFO* pTileInfo, ///< [in] Tile info
+ UINT_32* pBitPosition ///< [out] bit position inside a byte
+ ) const
+{
+ UINT_32 tx = x / MicroTileWidth;
+ UINT_32 ty = y / MicroTileHeight;
+ UINT_32 newPitch;
+ UINT_32 newHeight;
+ UINT_64 totalBytes;
+ UINT_32 macroWidth;
+ UINT_32 macroHeight;
+ UINT_64 pSliceBytes;
+ UINT_32 pBaseAlign;
+ UINT_32 tileNumPerPipe;
+ UINT_32 elemBits;
+
+ if (factor == 2) //CMASK
+ {
+ ADDR_CMASK_FLAGS flags = {{0}};
+
+ tileNumPerPipe = 256;
+
+ ComputeCmaskInfo(flags,
+ pitch,
+ height,
+ numSlices,
+ isLinear,
+ pTileInfo,
+ &newPitch,
+ &newHeight,
+ &totalBytes,
+ ¯oWidth,
+ ¯oHeight);
+ elemBits = CmaskElemBits;
+ }
+ else //HTile
+ {
+ ADDR_HTILE_FLAGS flags = {{0}};
+
+ tileNumPerPipe = 512;
+
+ ComputeHtileInfo(flags,
+ pitch,
+ height,
+ numSlices,
+ isLinear,
+ TRUE,
+ TRUE,
+ pTileInfo,
+ &newPitch,
+ &newHeight,
+ &totalBytes,
+ ¯oWidth,
+ ¯oHeight,
+ &pSliceBytes,
+ &pBaseAlign);
+ elemBits = 32;
+ }
+
+ const UINT_32 pitchInTile = newPitch / MicroTileWidth;
+ const UINT_32 heightInTile = newHeight / MicroTileWidth;
+ UINT_64 macroOffset; // Per pipe starting offset of the macro tile in which this tile lies.
+ UINT_64 microNumber; // Per pipe starting offset of the macro tile in which this tile lies.
+ UINT_32 microX;
+ UINT_32 microY;
+ UINT_64 microOffset;
+ UINT_32 microShift;
+ UINT_64 totalOffset;
+ UINT_32 elemIdxBits;
+ UINT_32 elemIdx =
+ TileCoordToMaskElementIndex(tx, ty, pTileInfo->pipeConfig, µShift, &elemIdxBits);
+
+ UINT_32 numPipes = HwlGetPipes(pTileInfo);
+
+ if (isLinear)
+ { //linear addressing
+ // Linear addressing is extremelly wasting memory if slice > 1, since each pipe has the full
+ // slice memory foot print instead of divided by numPipes.
+ microX = tx / 4; // Macro Tile is 4x4
+ microY = ty / 4 ;
+ microNumber = static_cast(microX + microY * (pitchInTile / 4)) << microShift;
+
+ UINT_32 sliceBits = pitchInTile * heightInTile;
+
+ // do htile single slice alignment if the flag is true
+ if (m_configFlags.useHtileSliceAlign && (factor == 1)) //Htile
+ {
+ sliceBits = PowTwoAlign(sliceBits, BITS_TO_BYTES(HtileCacheBits) * numPipes / elemBits);
+ }
+ macroOffset = slice * (sliceBits / numPipes) * elemBits ;
+ }
+ else
+ { //tiled addressing
+ const UINT_32 macroWidthInTile = macroWidth / MicroTileWidth; // Now in unit of Tiles
+ const UINT_32 macroHeightInTile = macroHeight / MicroTileHeight;
+ const UINT_32 pitchInCL = pitchInTile / macroWidthInTile;
+ const UINT_32 heightInCL = heightInTile / macroHeightInTile;
+
+ const UINT_32 macroX = x / macroWidth;
+ const UINT_32 macroY = y / macroHeight;
+ const UINT_32 macroNumber = macroX + macroY * pitchInCL + slice * pitchInCL * heightInCL;
+
+ // Per pipe starting offset of the cache line in which this tile lies.
+ microX = (x % macroWidth) / MicroTileWidth / 4; // Macro Tile is 4x4
+ microY = (y % macroHeight) / MicroTileHeight / 4 ;
+ microNumber = static_cast(microX + microY * (macroWidth / MicroTileWidth / 4)) << microShift;
+
+ macroOffset = macroNumber * tileNumPerPipe * elemBits;
+ }
+
+ if(elemIdxBits == microShift)
+ {
+ microNumber += elemIdx;
+ }
+ else
+ {
+ microNumber >>= elemIdxBits;
+ microNumber <<= elemIdxBits;
+ microNumber += elemIdx;
+ }
+
+ microOffset = elemBits * microNumber;
+ totalOffset = microOffset + macroOffset;
+
+ UINT_32 pipe = ComputePipeFromCoord(x, y, 0, ADDR_TM_2D_TILED_THIN1, 0, FALSE, pTileInfo);
+ UINT_64 addrInBits = totalOffset % (m_pipeInterleaveBytes * 8) +
+ pipe * (m_pipeInterleaveBytes * 8) +
+ totalOffset / (m_pipeInterleaveBytes * 8) * (m_pipeInterleaveBytes * 8) * numPipes;
+ *pBitPosition = static_cast(addrInBits) % 8;
+ UINT_64 addr = addrInBits / 8;
+
+ return addr;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlComputeXmaskCoordFromAddr
+*
+* @brief
+* Compute the coord from an address of a cmask/htile
+*
+* @return
+* N/A
+*
+* @note
+* This method is reused by htile, so rename to Xmask
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlComputeXmaskCoordFromAddr(
+ UINT_64 addr, ///< [in] address
+ UINT_32 bitPosition, ///< [in] bitPosition in a byte
+ UINT_32 pitch, ///< [in] pitch
+ UINT_32 height, ///< [in] height
+ UINT_32 numSlices, ///< [in] number of slices
+ UINT_32 factor, ///< [in] factor that indicates cmask or htile
+ BOOL_32 isLinear, ///< [in] linear or tiled HTILE layout
+ BOOL_32 isWidth8, ///< [in] Not used by SI
+ BOOL_32 isHeight8, ///< [in] Not used by SI
+ ADDR_TILEINFO* pTileInfo, ///< [in] Tile info
+ UINT_32* pX, ///< [out] x coord
+ UINT_32* pY, ///< [out] y coord
+ UINT_32* pSlice ///< [out] slice index
+ ) const
+{
+ UINT_32 newPitch;
+ UINT_32 newHeight;
+ UINT_64 totalBytes;
+ UINT_32 clWidth;
+ UINT_32 clHeight;
+ UINT_32 tileNumPerPipe;
+ UINT_64 sliceBytes;
+
+ *pX = 0;
+ *pY = 0;
+ *pSlice = 0;
+
+ if (factor == 2) //CMASK
+ {
+ ADDR_CMASK_FLAGS flags = {{0}};
+
+ tileNumPerPipe = 256;
+
+ ComputeCmaskInfo(flags,
+ pitch,
+ height,
+ numSlices,
+ isLinear,
+ pTileInfo,
+ &newPitch,
+ &newHeight,
+ &totalBytes,
+ &clWidth,
+ &clHeight);
+ }
+ else //HTile
+ {
+ ADDR_HTILE_FLAGS flags = {{0}};
+
+ tileNumPerPipe = 512;
+
+ ComputeHtileInfo(flags,
+ pitch,
+ height,
+ numSlices,
+ isLinear,
+ TRUE,
+ TRUE,
+ pTileInfo,
+ &newPitch,
+ &newHeight,
+ &totalBytes,
+ &clWidth,
+ &clHeight,
+ &sliceBytes);
+ }
+
+ const UINT_32 pitchInTile = newPitch / MicroTileWidth;
+ const UINT_32 heightInTile = newHeight / MicroTileWidth;
+ const UINT_32 pitchInMacroTile = pitchInTile / 4;
+ UINT_32 macroShift;
+ UINT_32 elemIdxBits;
+ // get macroShift and elemIdxBits
+ TileCoordToMaskElementIndex(0, 0, pTileInfo->pipeConfig, ¯oShift, &elemIdxBits);
+
+ const UINT_32 numPipes = HwlGetPipes(pTileInfo);
+ const UINT_32 pipe = (UINT_32)((addr / m_pipeInterleaveBytes) % numPipes);
+ // per pipe
+ UINT_64 localOffset = (addr % m_pipeInterleaveBytes) +
+ (addr / m_pipeInterleaveBytes / numPipes)* m_pipeInterleaveBytes;
+
+ UINT_32 tileIndex;
+ if (factor == 2) //CMASK
+ {
+ tileIndex = (UINT_32)(localOffset * 2 + (bitPosition != 0));
+ }
+ else
+ {
+ tileIndex = (UINT_32)(localOffset / 4);
+ }
+
+ UINT_32 macroOffset;
+ if (isLinear)
+ {
+ UINT_32 sliceSizeInTile = pitchInTile * heightInTile;
+
+ // do htile single slice alignment if the flag is true
+ if (m_configFlags.useHtileSliceAlign && (factor == 1)) //Htile
+ {
+ sliceSizeInTile = PowTwoAlign(sliceSizeInTile, static_cast(sliceBytes) / 64);
+ }
+ *pSlice = tileIndex / (sliceSizeInTile / numPipes);
+ macroOffset = tileIndex % (sliceSizeInTile / numPipes);
+ }
+ else
+ {
+ const UINT_32 clWidthInTile = clWidth / MicroTileWidth; // Now in unit of Tiles
+ const UINT_32 clHeightInTile = clHeight / MicroTileHeight;
+ const UINT_32 pitchInCL = pitchInTile / clWidthInTile;
+ const UINT_32 heightInCL = heightInTile / clHeightInTile;
+ const UINT_32 clIndex = tileIndex / tileNumPerPipe;
+
+ UINT_32 clX = clIndex % pitchInCL;
+ UINT_32 clY = (clIndex % (heightInCL * pitchInCL)) / pitchInCL;
+
+ *pX = clX * clWidthInTile * MicroTileWidth;
+ *pY = clY * clHeightInTile * MicroTileHeight;
+ *pSlice = clIndex / (heightInCL * pitchInCL);
+
+ macroOffset = tileIndex % tileNumPerPipe;
+ }
+
+ UINT_32 elemIdx = macroOffset & 7;
+ macroOffset >>= elemIdxBits;
+
+ if (elemIdxBits != macroShift)
+ {
+ macroOffset <<= (elemIdxBits - macroShift);
+
+ UINT_32 pipebit1 = _BIT(pipe,1);
+ UINT_32 pipebit2 = _BIT(pipe,2);
+ UINT_32 pipebit3 = _BIT(pipe,3);
+ if (pitchInMacroTile % 2)
+ { //odd
+ switch (pTileInfo->pipeConfig)
+ {
+ case ADDR_PIPECFG_P4_32x32:
+ macroOffset |= pipebit1;
+ break;
+ case ADDR_PIPECFG_P8_32x32_8x16:
+ case ADDR_PIPECFG_P8_32x32_16x16:
+ case ADDR_PIPECFG_P8_32x32_16x32:
+ macroOffset |= pipebit2;
+ break;
+ default:
+ break;
+ }
+
+ }
+
+ if (pitchInMacroTile % 4)
+ {
+ if (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)
+ {
+ macroOffset |= (pipebit1<<1);
+ }
+ if((pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_8x16) ||
+ (pTileInfo->pipeConfig == ADDR_PIPECFG_P16_32x32_16x16))
+ {
+ macroOffset |= (pipebit3<<1);
+ }
+ }
+ }
+
+ UINT_32 macroX;
+ UINT_32 macroY;
+
+ if (isLinear)
+ {
+ macroX = macroOffset % pitchInMacroTile;
+ macroY = macroOffset / pitchInMacroTile;
+ }
+ else
+ {
+ const UINT_32 clWidthInMacroTile = clWidth / (MicroTileWidth * 4);
+ macroX = macroOffset % clWidthInMacroTile;
+ macroY = macroOffset / clWidthInMacroTile;
+ }
+
+ *pX += macroX * 4 * MicroTileWidth;
+ *pY += macroY * 4 * MicroTileHeight;
+
+ UINT_32 microX;
+ UINT_32 microY;
+ ComputeTileCoordFromPipeAndElemIdx(elemIdx, pipe, pTileInfo->pipeConfig, pitchInMacroTile,
+ *pX, *pY, µX, µY);
+
+ *pX += microX * MicroTileWidth;
+ *pY += microY * MicroTileWidth;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlGetPitchAlignmentLinear
+* @brief
+* Get pitch alignment
+* @return
+* pitch alignment
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlGetPitchAlignmentLinear(
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags ///< [in] surface flags
+ ) const
+{
+ UINT_32 pitchAlign;
+
+ // Interleaved access requires a 256B aligned pitch, so fall back to pre-SI alignment
+ if (flags.interleaved)
+ {
+ pitchAlign = Max(64u, m_pipeInterleaveBytes / BITS_TO_BYTES(bpp));
+
+ }
+ else
+ {
+ pitchAlign = Max(8u, 64 / BITS_TO_BYTES(bpp));
+ }
+
+ return pitchAlign;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlGetSizeAdjustmentLinear
+*
+* @brief
+* Adjust linear surface pitch and slice size
+*
+* @return
+* Logical slice size in bytes
+***************************************************************************************************
+*/
+UINT_64 SIAddrLib::HwlGetSizeAdjustmentLinear(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 bpp, ///< [in] bits per pixel
+ UINT_32 numSamples, ///< [in] number of samples
+ UINT_32 baseAlign, ///< [in] base alignment
+ UINT_32 pitchAlign, ///< [in] pitch alignment
+ UINT_32* pPitch, ///< [in/out] pointer to pitch
+ UINT_32* pHeight, ///< [in/out] pointer to height
+ UINT_32* pHeightAlign ///< [in/out] pointer to height align
+ ) const
+{
+ UINT_64 sliceSize;
+ if (tileMode == ADDR_TM_LINEAR_GENERAL)
+ {
+ sliceSize = BITS_TO_BYTES(static_cast(*pPitch) * (*pHeight) * bpp * numSamples);
+ }
+ else
+ {
+ UINT_32 pitch = *pPitch;
+ UINT_32 height = *pHeight;
+
+ UINT_32 pixelsPerPipeInterleave = m_pipeInterleaveBytes / BITS_TO_BYTES(bpp);
+ UINT_32 sliceAlignInPixel = pixelsPerPipeInterleave < 64 ? 64 : pixelsPerPipeInterleave;
+
+ // numSamples should be 1 in real cases (no MSAA for linear but TGL may pass non 1 value)
+ UINT_64 pixelPerSlice = static_cast(pitch) * height * numSamples;
+
+ while (pixelPerSlice % sliceAlignInPixel)
+ {
+ pitch += pitchAlign;
+ pixelPerSlice = static_cast(pitch) * height * numSamples;
+ }
+
+ *pPitch = pitch;
+
+ UINT_32 heightAlign = 1;
+
+ while ((pitch * heightAlign) % sliceAlignInPixel)
+ {
+ heightAlign++;
+ }
+
+ *pHeightAlign = heightAlign;
+
+ sliceSize = BITS_TO_BYTES(pixelPerSlice * bpp);
+ }
+
+ return sliceSize;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlPreHandleBaseLvl3xPitch
+*
+* @brief
+* Pre-handler of 3x pitch (96 bit) adjustment
+*
+* @return
+* Expected pitch
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlPreHandleBaseLvl3xPitch(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input
+ UINT_32 expPitch ///< [in] pitch
+ ) const
+{
+ ADDR_ASSERT(pIn->width == expPitch);
+
+ // From SI, if pow2Pad is 1 the pitch is expanded 3x first, then padded to pow2, so nothing to
+ // do here
+ if (!pIn->flags.pow2Pad)
+ {
+ AddrLib::HwlPreHandleBaseLvl3xPitch(pIn, expPitch);
+ }
+ else
+ {
+ ADDR_ASSERT(IsPow2(expPitch));
+ }
+
+ return expPitch;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlPostHandleBaseLvl3xPitch
+*
+* @brief
+* Post-handler of 3x pitch adjustment
+*
+* @return
+* Expected pitch
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlPostHandleBaseLvl3xPitch(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input
+ UINT_32 expPitch ///< [in] pitch
+ ) const
+{
+ /**
+ * @note The pitch will be divided by 3 in the end so the value will look odd but h/w should
+ * be able to compute a correct pitch from it as h/w address library is doing the job.
+ */
+ // From SI, the pitch is expanded 3x first, then padded to pow2, so no special handler here
+ if (!pIn->flags.pow2Pad)
+ {
+ AddrLib::HwlPostHandleBaseLvl3xPitch(pIn, expPitch);
+ }
+
+ return expPitch;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlGetPitchAlignmentMicroTiled
+*
+* @brief
+* Compute 1D tiled surface pitch alignment
+*
+* @return
+* pitch alignment
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlGetPitchAlignmentMicroTiled(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 numSamples ///< [in] number of samples
+ ) const
+{
+ UINT_32 pitchAlign;
+
+ if (flags.qbStereo)
+ {
+ pitchAlign = EgBasedAddrLib::HwlGetPitchAlignmentMicroTiled(tileMode,bpp,flags,numSamples);
+ }
+ else
+ {
+ pitchAlign = 8;
+ }
+
+ return pitchAlign;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlGetSizeAdjustmentMicroTiled
+*
+* @brief
+* Adjust 1D tiled surface pitch and slice size
+*
+* @return
+* Logical slice size in bytes
+***************************************************************************************************
+*/
+UINT_64 SIAddrLib::HwlGetSizeAdjustmentMicroTiled(
+ UINT_32 thickness, ///< [in] thickness
+ UINT_32 bpp, ///< [in] bits per pixel
+ ADDR_SURFACE_FLAGS flags, ///< [in] surface flags
+ UINT_32 numSamples, ///< [in] number of samples
+ UINT_32 baseAlign, ///< [in] base alignment
+ UINT_32 pitchAlign, ///< [in] pitch alignment
+ UINT_32* pPitch, ///< [in/out] pointer to pitch
+ UINT_32* pHeight ///< [in/out] pointer to height
+ ) const
+{
+ UINT_64 logicalSliceSize;
+ UINT_64 physicalSliceSize;
+
+ UINT_32 pitch = *pPitch;
+ UINT_32 height = *pHeight;
+
+ // Logical slice: pitch * height * bpp * numSamples (no 1D MSAA so actually numSamples == 1)
+ logicalSliceSize = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples);
+
+ // Physical slice: multiplied by thickness
+ physicalSliceSize = logicalSliceSize * thickness;
+
+ // Pitch alignment is always 8, so if slice size is not padded to base alignment
+ // (pipe_interleave_size), we need to increase pitch
+ while ((physicalSliceSize % baseAlign) != 0)
+ {
+ pitch += pitchAlign;
+
+ logicalSliceSize = BITS_TO_BYTES(static_cast(pitch) * height * bpp * numSamples);
+
+ physicalSliceSize = logicalSliceSize * thickness;
+ }
+
+#if !ALT_TEST
+ //
+ // Special workaround for depth/stencil buffer, use 8 bpp to align depth buffer again since
+ // the stencil plane may have larger pitch if the slice size is smaller than base alignment.
+ //
+ // Note: this actually does not work for mipmap but mipmap depth texture is not really
+ // sampled with mipmap.
+ //
+ if (flags.depth && !flags.noStencil)
+ {
+ ADDR_ASSERT(numSamples == 1);
+
+ UINT_64 logicalSiceSizeStencil = static_cast(pitch) * height; // 1 byte stencil
+
+ while ((logicalSiceSizeStencil % baseAlign) != 0)
+ {
+ pitch += pitchAlign; // Stencil plane's pitch alignment is the same as depth plane's
+
+ logicalSiceSizeStencil = static_cast(pitch) * height;
+ }
+
+ if (pitch != *pPitch)
+ {
+ // If this is a mipmap, this padded one cannot be sampled as a whole mipmap!
+ logicalSliceSize = logicalSiceSizeStencil * BITS_TO_BYTES(bpp);
+ }
+ }
+#endif
+ *pPitch = pitch;
+
+ // No adjust for pHeight
+
+ return logicalSliceSize;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlConvertChipFamily
+*
+* @brief
+* Convert familyID defined in atiid.h to AddrChipFamily and set m_chipFamily/m_chipRevision
+* @return
+* AddrChipFamily
+***************************************************************************************************
+*/
+AddrChipFamily SIAddrLib::HwlConvertChipFamily(
+ UINT_32 uChipFamily, ///< [in] chip family defined in atiih.h
+ UINT_32 uChipRevision) ///< [in] chip revision defined in "asic_family"_id.h
+{
+ AddrChipFamily family = ADDR_CHIP_FAMILY_SI;
+
+ switch (uChipFamily)
+ {
+ case FAMILY_SI:
+ m_settings.isSouthernIsland = 1;
+ m_settings.isTahiti = ASICREV_IS_TAHITI_P(uChipRevision);
+ m_settings.isPitCairn = ASICREV_IS_PITCAIRN_PM(uChipRevision);
+ m_settings.isCapeVerde = ASICREV_IS_CAPEVERDE_M(uChipRevision);
+ m_settings.isOland = ASICREV_IS_OLAND_M(uChipRevision);
+ m_settings.isHainan = ASICREV_IS_HAINAN_V(uChipRevision);
+ break;
+ default:
+ ADDR_ASSERT(!"This should be a Fusion");
+ break;
+ }
+
+ return family;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlSetupTileInfo
+*
+* @brief
+* Setup default value of tile info for SI
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlSetupTileInfo(
+ AddrTileMode tileMode, ///< [in] Tile mode
+ ADDR_SURFACE_FLAGS flags, ///< [in] Surface type flags
+ UINT_32 bpp, ///< [in] Bits per pixel
+ UINT_32 pitch, ///< [in] Pitch in pixels
+ UINT_32 height, ///< [in] Height in pixels
+ UINT_32 numSamples, ///< [in] Number of samples
+ ADDR_TILEINFO* pTileInfoIn, ///< [in] Tile info input: NULL for default
+ ADDR_TILEINFO* pTileInfoOut, ///< [out] Tile info output
+ AddrTileType inTileType, ///< [in] Tile type
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] Output
+ ) const
+{
+ UINT_32 thickness = ComputeSurfaceThickness(tileMode);
+ ADDR_TILEINFO* pTileInfo = pTileInfoOut;
+ INT index = TileIndexInvalid;
+
+ // Fail-safe code
+ if (!IsLinear(tileMode))
+ {
+ // 128 bpp/thick tiling must be non-displayable.
+ // Fmask reuse color buffer's entry but bank-height field can be from another entry
+ // To simplify the logic, fmask entry should be picked from non-displayable ones
+ if (bpp == 128 || thickness > 1 || flags.fmask || flags.prt)
+ {
+ inTileType = ADDR_NON_DISPLAYABLE;
+ }
+
+ if (flags.depth || flags.stencil)
+ {
+ inTileType = ADDR_DEPTH_SAMPLE_ORDER;
+ }
+ }
+
+ // Partial valid fields are not allowed for SI.
+ if (IsTileInfoAllZero(pTileInfo))
+ {
+ if (IsMacroTiled(tileMode))
+ {
+ if (flags.prt)
+ {
+ if (numSamples == 1)
+ {
+ if (flags.depth)
+ {
+ switch (bpp)
+ {
+ case 16:
+ index = 3;
+ break;
+ case 32:
+ index = 6;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+ }
+ else
+ {
+ switch (bpp)
+ {
+ case 8:
+ index = 21;
+ break;
+ case 16:
+ index = 22;
+ break;
+ case 32:
+ index = 23;
+ break;
+ case 64:
+ index = 24;
+ break;
+ case 128:
+ index = 25;
+ break;
+ default:
+ break;
+ }
+
+ if (thickness > 1)
+ {
+ ADDR_ASSERT(bpp != 128);
+ index += 5;
+ }
+ }
+ }
+ else
+ {
+ ADDR_ASSERT(numSamples == 4);
+
+ if (flags.depth)
+ {
+ switch (bpp)
+ {
+ case 16:
+ index = 5;
+ break;
+ case 32:
+ index = 7;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+ }
+ else
+ {
+ switch (bpp)
+ {
+ case 8:
+ index = 23;
+ break;
+ case 16:
+ index = 24;
+ break;
+ case 32:
+ index = 25;
+ break;
+ case 64:
+ index = 30;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ break;
+ }
+ }
+ }
+ }//end of PRT part
+ // See table entries 0-7
+ else if (flags.depth || flags.stencil)
+ {
+ if (flags.compressZ)
+ {
+ if (flags.stencil)
+ {
+ index = 0;
+ }
+ else
+ {
+ // optimal tile index for compressed depth/stencil.
+ switch (numSamples)
+ {
+ case 1:
+ index = 0;
+ break;
+ case 2:
+ case 4:
+ index = 1;
+ break;
+ case 8:
+ index = 2;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ else // unCompressZ
+ {
+ index = 3;
+ }
+ }
+ else //non PRT & non Depth & non Stencil
+ {
+ // See table entries 9-12
+ if (inTileType == ADDR_DISPLAYABLE)
+ {
+ switch (bpp)
+ {
+ case 8:
+ index = 10;
+ break;
+ case 16:
+ index = 11;
+ break;
+ case 32:
+ index = 12;
+ break;
+ case 64:
+ index = 12;
+ break;
+ default:
+ break;
+ }
+ }
+ else
+ {
+ // See table entries 13-17
+ if (thickness == 1)
+ {
+ if (flags.fmask)
+ {
+ UINT_32 fmaskPixelSize = bpp * numSamples;
+
+ switch (fmaskPixelSize)
+ {
+ case 8:
+ index = 14;
+ break;
+ case 16:
+ index = 15;
+ break;
+ case 32:
+ index = 16;
+ break;
+ case 64:
+ index = 17;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ }
+ }
+ else
+ {
+ switch (bpp)
+ {
+ case 8:
+ index = 14;
+ break;
+ case 16:
+ index = 15;
+ break;
+ case 32:
+ index = 16;
+ break;
+ case 64:
+ index = 17;
+ break;
+ case 128:
+ index = 17;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ else // thick tiling - entries 18-20
+ {
+ switch (thickness)
+ {
+ case 4:
+ index = 20;
+ break;
+ case 8:
+ index = 19;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ if (tileMode == ADDR_TM_LINEAR_ALIGNED)
+ {
+ index = 8;
+ }
+ else if (tileMode == ADDR_TM_LINEAR_GENERAL)
+ {
+ index = TileIndexLinearGeneral;
+ }
+ else
+ {
+ if (flags.depth || flags.stencil)
+ {
+ index = 4;
+ }
+ else if (inTileType == ADDR_DISPLAYABLE)
+ {
+ index = 9;
+ }
+ else if (thickness == 1)
+ {
+ index = 13;
+ }
+ else
+ {
+ index = 18;
+ }
+ }
+ }
+
+ if (index >= 0 && index <= 31)
+ {
+ *pTileInfo = m_tileTable[index].info;
+ pOut->tileType = m_tileTable[index].type;
+ }
+
+ if (index == TileIndexLinearGeneral)
+ {
+ *pTileInfo = m_tileTable[8].info;
+ pOut->tileType = m_tileTable[8].type;
+ }
+ }
+ else
+ {
+ if (pTileInfoIn)
+ {
+ if (flags.stencil && pTileInfoIn->tileSplitBytes == 0)
+ {
+ // Stencil always uses index 0
+ *pTileInfo = m_tileTable[0].info;
+ }
+ }
+ // Pass through tile type
+ pOut->tileType = inTileType;
+ }
+
+ pOut->tileIndex = index;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::DecodeGbRegs
+*
+* @brief
+* Decodes GB_ADDR_CONFIG and noOfBanks/noOfRanks
+*
+* @return
+* TRUE if all settings are valid
+*
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::DecodeGbRegs(
+ const ADDR_REGISTER_VALUE* pRegValue) ///< [in] create input
+{
+ GB_ADDR_CONFIG reg;
+ BOOL_32 valid = TRUE;
+
+ reg.val = pRegValue->gbAddrConfig;
+
+ switch (reg.f.pipe_interleave_size)
+ {
+ case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
+ m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
+ break;
+ case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
+ m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
+ break;
+ default:
+ valid = FALSE;
+ ADDR_UNHANDLED_CASE();
+ break;
+ }
+
+ switch (reg.f.row_size)
+ {
+ case ADDR_CONFIG_1KB_ROW:
+ m_rowSize = ADDR_ROWSIZE_1KB;
+ break;
+ case ADDR_CONFIG_2KB_ROW:
+ m_rowSize = ADDR_ROWSIZE_2KB;
+ break;
+ case ADDR_CONFIG_4KB_ROW:
+ m_rowSize = ADDR_ROWSIZE_4KB;
+ break;
+ default:
+ valid = FALSE;
+ ADDR_UNHANDLED_CASE();
+ break;
+ }
+
+ switch (pRegValue->noOfBanks)
+ {
+ case 0:
+ m_banks = 4;
+ break;
+ case 1:
+ m_banks = 8;
+ break;
+ case 2:
+ m_banks = 16;
+ break;
+ default:
+ valid = FALSE;
+ ADDR_UNHANDLED_CASE();
+ break;
+ }
+
+ switch (pRegValue->noOfRanks)
+ {
+ case 0:
+ m_ranks = 1;
+ break;
+ case 1:
+ m_ranks = 2;
+ break;
+ default:
+ valid = FALSE;
+ ADDR_UNHANDLED_CASE();
+ break;
+ }
+
+ m_logicalBanks = m_banks * m_ranks;
+
+ ADDR_ASSERT(m_logicalBanks <= 16);
+
+ return valid;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlInitGlobalParams
+*
+* @brief
+* Initializes global parameters
+*
+* @return
+* TRUE if all settings are valid
+*
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::HwlInitGlobalParams(
+ const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
+{
+ BOOL_32 valid = TRUE;
+ const ADDR_REGISTER_VALUE* pRegValue = &pCreateIn->regValue;
+
+ valid = DecodeGbRegs(pRegValue);
+
+ if (valid)
+ {
+ if (m_settings.isTahiti || m_settings.isPitCairn)
+ {
+ m_pipes = 8;
+ }
+ else if (m_settings.isCapeVerde || m_settings.isOland)
+ {
+ m_pipes = 4;
+ }
+ else
+ {
+ // Hainan is 2-pipe (m_settings.isHainan == 1)
+ m_pipes = 2;
+ }
+
+ valid = InitTileSettingTable(pRegValue->pTileConfig, pRegValue->noOfEntries);
+
+ m_maxSamples = 16;
+ }
+
+ return valid;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlConvertTileInfoToHW
+* @brief
+* Entry of si's ConvertTileInfoToHW
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE SIAddrLib::HwlConvertTileInfoToHW(
+ const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn, ///< [in] input structure
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ ADDR_E_RETURNCODE retCode = ADDR_OK;
+
+ retCode = EgBasedAddrLib::HwlConvertTileInfoToHW(pIn, pOut);
+
+ if (retCode == ADDR_OK)
+ {
+ if (pIn->reverse == FALSE)
+ {
+ if (pIn->pTileInfo->pipeConfig == ADDR_PIPECFG_INVALID)
+ {
+ retCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ pOut->pTileInfo->pipeConfig =
+ static_cast(pIn->pTileInfo->pipeConfig - 1);
+ }
+ }
+ else
+ {
+ pOut->pTileInfo->pipeConfig =
+ static_cast(pIn->pTileInfo->pipeConfig + 1);
+ }
+ }
+
+ return retCode;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlComputeXmaskCoordYFrom8Pipe
+*
+* @brief
+* Compute the Y coord which will be added to Xmask Y
+* coord.
+* @return
+* Y coord
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlComputeXmaskCoordYFrom8Pipe(
+ UINT_32 pipe, ///< [in] pipe id
+ UINT_32 x ///< [in] tile coord x, which is original x coord / 8
+ ) const
+{
+ // This function should never be called since it is 6xx/8xx specfic.
+ // Keep this empty implementation to avoid any mis-use.
+ ADDR_ASSERT_ALWAYS();
+
+ return 0;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlComputeSurfaceCoord2DFromBankPipe
+*
+* @brief
+* Compute surface x,y coordinates from bank/pipe info
+* @return
+* N/A
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlComputeSurfaceCoord2DFromBankPipe(
+ AddrTileMode tileMode, ///< [in] tile mode
+ UINT_32* pX, ///< [in/out] x coordinate
+ UINT_32* pY, ///< [in/out] y coordinate
+ UINT_32 slice, ///< [in] slice index
+ UINT_32 bank, ///< [in] bank number
+ UINT_32 pipe, ///< [in] pipe number
+ UINT_32 bankSwizzle,///< [in] bank swizzle
+ UINT_32 pipeSwizzle,///< [in] pipe swizzle
+ UINT_32 tileSlices, ///< [in] slices in a micro tile
+ BOOL_32 ignoreSE, ///< [in] TRUE if shader engines are ignored
+ ADDR_TILEINFO* pTileInfo ///< [in] bank structure. **All fields to be valid on entry**
+ ) const
+{
+ UINT_32 xBit;
+ UINT_32 yBit;
+ UINT_32 yBit3 = 0;
+ UINT_32 yBit4 = 0;
+ UINT_32 yBit5 = 0;
+ UINT_32 yBit6 = 0;
+
+ UINT_32 xBit3 = 0;
+ UINT_32 xBit4 = 0;
+ UINT_32 xBit5 = 0;
+
+ UINT_32 numPipes = GetPipePerSurf(pTileInfo->pipeConfig);
+
+ CoordFromBankPipe xyBits = {0};
+ ComputeSurfaceCoord2DFromBankPipe(tileMode, *pX, *pY, slice, bank, pipe,
+ bankSwizzle, pipeSwizzle, tileSlices, pTileInfo,
+ &xyBits);
+ yBit3 = xyBits.yBit3;
+ yBit4 = xyBits.yBit4;
+ yBit5 = xyBits.yBit5;
+ yBit6 = xyBits.yBit6;
+
+ xBit3 = xyBits.xBit3;
+ xBit4 = xyBits.xBit4;
+ xBit5 = xyBits.xBit5;
+
+ yBit = xyBits.yBits;
+
+ UINT_32 yBitTemp = 0;
+
+ if ((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
+ (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32))
+ {
+ ADDR_ASSERT(pTileInfo->bankWidth == 1 && pTileInfo->macroAspectRatio > 1);
+ UINT_32 yBitToCheck = QLog2(pTileInfo->banks) - 1;
+
+ ADDR_ASSERT(yBitToCheck <= 3);
+
+ yBitTemp = _BIT(yBit, yBitToCheck);
+
+ xBit3 = 0;
+ }
+
+ yBit = Bits2Number(4, yBit6, yBit5, yBit4, yBit3);
+ xBit = Bits2Number(3, xBit5, xBit4, xBit3);
+
+ *pY += yBit * pTileInfo->bankHeight * MicroTileHeight;
+ *pX += xBit * numPipes * pTileInfo->bankWidth * MicroTileWidth;
+
+ //calculate the bank and pipe bits in x, y
+ UINT_32 xTile; //x in micro tile
+ UINT_32 x3 = 0;
+ UINT_32 x4 = 0;
+ UINT_32 x5 = 0;
+ UINT_32 x6 = 0;
+ UINT_32 y = *pY;
+
+ UINT_32 pipeBit0 = _BIT(pipe,0);
+ UINT_32 pipeBit1 = _BIT(pipe,1);
+ UINT_32 pipeBit2 = _BIT(pipe,2);
+
+ UINT_32 y3 = _BIT(y, 3);
+ UINT_32 y4 = _BIT(y, 4);
+ UINT_32 y5 = _BIT(y, 5);
+ UINT_32 y6 = _BIT(y, 6);
+
+ // bankbit0 after ^x4^x5
+ UINT_32 bankBit00 = _BIT(bank,0);
+ UINT_32 bankBit0 = 0;
+
+ switch (pTileInfo->pipeConfig)
+ {
+ case ADDR_PIPECFG_P2:
+ x3 = pipeBit0 ^ y3;
+ break;
+ case ADDR_PIPECFG_P4_8x16:
+ x4 = pipeBit0 ^ y3;
+ x3 = pipeBit0 ^ y4;
+ break;
+ case ADDR_PIPECFG_P4_16x16:
+ x4 = pipeBit1 ^ y4;
+ x3 = pipeBit0 ^ y3 ^ x4;
+ break;
+ case ADDR_PIPECFG_P4_16x32:
+ x4 = pipeBit1 ^ y4;
+ x3 = pipeBit0 ^ y3 ^ x4;
+ break;
+ case ADDR_PIPECFG_P4_32x32:
+ x5 = pipeBit1 ^ y5;
+ x3 = pipeBit0 ^ y3 ^ x5;
+ bankBit0 = yBitTemp ^ x5;
+ x4 = bankBit00 ^ x5 ^ bankBit0;
+ *pX += x5 * 4 * 1 * 8; // x5 * num_pipes * bank_width * 8;
+ break;
+ case ADDR_PIPECFG_P8_16x16_8x16:
+ x3 = pipeBit1 ^ y5;
+ x4 = pipeBit2 ^ y4;
+ x5 = pipeBit0 ^ y3 ^ x4;
+ break;
+ case ADDR_PIPECFG_P8_16x32_8x16:
+ x3 = pipeBit1 ^ y4;
+ x4 = pipeBit2 ^ y5;
+ x5 = pipeBit0 ^ y3 ^ x4;
+ break;
+ case ADDR_PIPECFG_P8_32x32_8x16:
+ x3 = pipeBit1 ^ y4;
+ x5 = pipeBit2 ^ y5;
+ x4 = pipeBit0 ^ y3 ^ x5;
+ break;
+ case ADDR_PIPECFG_P8_16x32_16x16:
+ x4 = pipeBit2 ^ y5;
+ x5 = pipeBit1 ^ y4;
+ x3 = pipeBit0 ^ y3 ^ x4;
+ break;
+ case ADDR_PIPECFG_P8_32x32_16x16:
+ x5 = pipeBit2 ^ y5;
+ x4 = pipeBit1 ^ y4;
+ x3 = pipeBit0 ^ y3 ^ x4;
+ break;
+ case ADDR_PIPECFG_P8_32x32_16x32:
+ x5 = pipeBit2 ^ y5;
+ x4 = pipeBit1 ^ y6;
+ x3 = pipeBit0 ^ y3 ^ x4;
+ break;
+ case ADDR_PIPECFG_P8_32x64_32x32:
+ x6 = pipeBit1 ^ y5;
+ x5 = pipeBit2 ^ y6;
+ x3 = pipeBit0 ^ y3 ^ x5;
+ bankBit0 = yBitTemp ^ x6;
+ x4 = bankBit00 ^ x5 ^ bankBit0;
+ *pX += x6 * 8 * 1 * 8; // x6 * num_pipes * bank_width * 8;
+ break;
+ default:
+ ADDR_ASSERT_ALWAYS();
+ }
+
+ xTile = Bits2Number(3, x5, x4, x3);
+
+ *pX += xTile << 3;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlPreAdjustBank
+*
+* @brief
+* Adjust bank before calculating address acoording to bank/pipe
+* @return
+* Adjusted bank
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlPreAdjustBank(
+ UINT_32 tileX, ///< [in] x coordinate in unit of tile
+ UINT_32 bank, ///< [in] bank
+ ADDR_TILEINFO* pTileInfo ///< [in] tile info
+ ) const
+{
+ if (((pTileInfo->pipeConfig == ADDR_PIPECFG_P4_32x32) ||
+ (pTileInfo->pipeConfig == ADDR_PIPECFG_P8_32x64_32x32)) && (pTileInfo->bankWidth == 1))
+ {
+ UINT_32 bankBit0 = _BIT(bank, 0);
+ UINT_32 x4 = _BIT(tileX, 1);
+ UINT_32 x5 = _BIT(tileX, 2);
+
+ bankBit0 = bankBit0 ^ x4 ^ x5;
+ bank |= bankBit0;
+
+ ADDR_ASSERT(pTileInfo->macroAspectRatio > 1)
+ }
+
+ return bank;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlComputeSurfaceInfo
+*
+* @brief
+* Entry of si's ComputeSurfaceInfo
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE SIAddrLib::HwlComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
+ ) const
+{
+ pOut->tileIndex = pIn->tileIndex;
+
+ return EgBasedAddrLib::HwlComputeSurfaceInfo(pIn,pOut);
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlComputeMipLevel
+* @brief
+* Compute MipLevel info (including level 0)
+* @return
+* TRUE if HWL's handled
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::HwlComputeMipLevel(
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in/out] Input structure
+ ) const
+{
+ // basePitch is calculated from level 0 so we only check this for mipLevel > 0
+ if (pIn->mipLevel > 0)
+ {
+ // Note: Don't check expand 3x formats(96 bit) as the basePitch is not pow2 even if
+ // we explicity set pow2Pad flag. The 3x base pitch is padded to pow2 but after being
+ // divided by expandX factor (3) - to program texture pitch, the basePitch is never pow2.
+ if (!AddrElemLib::IsExpand3x(pIn->format))
+ {
+ // Sublevel pitches are generated from base level pitch instead of width on SI
+ // If pow2Pad is 0, we don't assert - as this is not really used for a mip chain
+ ADDR_ASSERT(!pIn->flags.pow2Pad || ((pIn->basePitch != 0) && IsPow2(pIn->basePitch)));
+ }
+
+ if (pIn->basePitch != 0)
+ {
+ pIn->width = Max(1u, pIn->basePitch >> pIn->mipLevel);
+ }
+ }
+
+ // pow2Pad is done in PostComputeMipLevel
+
+ return TRUE;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlCheckLastMacroTiledLvl
+*
+* @brief
+* Sets pOut->last2DLevel to TRUE if it is
+* @note
+*
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlCheckLastMacroTiledLvl(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] Input structure
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [in/out] Output structure (used as input, too)
+ ) const
+{
+ // pow2Pad covers all mipmap cases
+ if (pIn->flags.pow2Pad)
+ {
+ ADDR_ASSERT(IsMacroTiled(pIn->tileMode));
+
+ UINT_32 nextPitch;
+ UINT_32 nextHeight;
+ UINT_32 nextSlices;
+
+ AddrTileMode nextTileMode;
+
+ if (pIn->mipLevel == 0 || pIn->basePitch == 0)
+ {
+ // Base level or fail-safe case (basePitch == 0)
+ nextPitch = pOut->pitch >> 1;
+ }
+ else
+ {
+ // Sub levels
+ nextPitch = pIn->basePitch >> (pIn->mipLevel + 1);
+ }
+
+ // nextHeight must be shifted from this level's original height rather than a pow2 padded
+ // one but this requires original height stored somewhere (pOut->height)
+ ADDR_ASSERT(pOut->height != 0);
+
+ // next level's height is just current level's >> 1 in pixels
+ nextHeight = pOut->height >> 1;
+ // Special format such as FMT_1 and FMT_32_32_32 can be linear only so we consider block
+ // compressed foramts
+ if (AddrElemLib::IsBlockCompressed(pIn->format))
+ {
+ nextHeight = (nextHeight + 3) / 4;
+ }
+ nextHeight = NextPow2(nextHeight);
+
+ // nextSlices may be 0 if this level's is 1
+ if (pIn->flags.volume)
+ {
+ nextSlices = Max(1u, pIn->numSlices >> 1);
+ }
+ else
+ {
+ nextSlices = pIn->numSlices;
+ }
+
+ nextTileMode = ComputeSurfaceMipLevelTileMode(pIn->tileMode,
+ pIn->bpp,
+ nextPitch,
+ nextHeight,
+ nextSlices,
+ pIn->numSamples,
+ pOut->pitchAlign,
+ pOut->heightAlign,
+ pOut->pTileInfo);
+
+ pOut->last2DLevel = IsMicroTiled(nextTileMode);
+ }
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlDegradeThickTileMode
+*
+* @brief
+* Degrades valid tile mode for thick modes if needed
+*
+* @return
+* Suitable tile mode
+***************************************************************************************************
+*/
+AddrTileMode SIAddrLib::HwlDegradeThickTileMode(
+ AddrTileMode baseTileMode, ///< [in] base tile mode
+ UINT_32 numSlices, ///< [in] current number of slices
+ UINT_32* pBytesPerTile ///< [in/out] pointer to bytes per slice
+ ) const
+{
+ return EgBasedAddrLib::HwlDegradeThickTileMode(baseTileMode, numSlices, pBytesPerTile);
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlTileInfoEqual
+*
+* @brief
+* Return TRUE if all field are equal
+* @note
+* Only takes care of current HWL's data
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::HwlTileInfoEqual(
+ const ADDR_TILEINFO* pLeft, ///<[in] Left compare operand
+ const ADDR_TILEINFO* pRight ///<[in] Right compare operand
+ ) const
+{
+ BOOL_32 equal = FALSE;
+
+ if (pLeft->pipeConfig == pRight->pipeConfig)
+ {
+ equal = EgBasedAddrLib::HwlTileInfoEqual(pLeft, pRight);
+ }
+
+ return equal;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::GetTileSettings
+*
+* @brief
+* Get tile setting infos by index.
+* @return
+* Tile setting info.
+***************************************************************************************************
+*/
+const ADDR_TILECONFIG* SIAddrLib::GetTileSetting(
+ UINT_32 index ///< [in] Tile index
+ ) const
+{
+ ADDR_ASSERT(index < m_noOfEntries);
+ return &m_tileTable[index];
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlPostCheckTileIndex
+*
+* @brief
+* Map a tile setting to index if curIndex is invalid, otherwise check if curIndex matches
+* tile mode/type/info and change the index if needed
+* @return
+* Tile index.
+***************************************************************************************************
+*/
+INT_32 SIAddrLib::HwlPostCheckTileIndex(
+ const ADDR_TILEINFO* pInfo, ///< [in] Tile Info
+ AddrTileMode mode, ///< [in] Tile mode
+ AddrTileType type, ///< [in] Tile type
+ INT curIndex ///< [in] Current index assigned in HwlSetupTileInfo
+ ) const
+{
+ INT_32 index = curIndex;
+
+ if (mode == ADDR_TM_LINEAR_GENERAL)
+ {
+ index = TileIndexLinearGeneral;
+ }
+ else
+ {
+ BOOL_32 macroTiled = IsMacroTiled(mode);
+
+ // We need to find a new index if either of them is true
+ // 1. curIndex is invalid
+ // 2. tile mode is changed
+ // 3. tile info does not match for macro tiled
+ if ((index == TileIndexInvalid ||
+ (mode != m_tileTable[index].mode) ||
+ (macroTiled && !HwlTileInfoEqual(pInfo, &m_tileTable[index].info))))
+ {
+ for (index = 0; index < static_cast(m_noOfEntries); index++)
+ {
+ if (macroTiled)
+ {
+ // macro tile modes need all to match
+ if (HwlTileInfoEqual(pInfo, &m_tileTable[index].info) &&
+ (mode == m_tileTable[index].mode) &&
+ (type == m_tileTable[index].type))
+ {
+ break;
+ }
+ }
+ else if (mode == ADDR_TM_LINEAR_ALIGNED)
+ {
+ // linear mode only needs tile mode to match
+ if (mode == m_tileTable[index].mode)
+ {
+ break;
+ }
+ }
+ else
+ {
+ // micro tile modes only need tile mode and tile type to match
+ if (mode == m_tileTable[index].mode &&
+ type == m_tileTable[index].type)
+ {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ ADDR_ASSERT(index < static_cast(m_noOfEntries));
+
+ if (index >= static_cast(m_noOfEntries))
+ {
+ index = TileIndexInvalid;
+ }
+
+ return index;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlSetupTileCfg
+*
+* @brief
+* Map tile index to tile setting.
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE SIAddrLib::HwlSetupTileCfg(
+ INT_32 index, ///< [in] Tile index
+ INT_32 macroModeIndex, ///< [in] Index in macro tile mode table(CI)
+ ADDR_TILEINFO* pInfo, ///< [out] Tile Info
+ AddrTileMode* pMode, ///< [out] Tile mode
+ AddrTileType* pType ///< [out] Tile type
+ ) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ // Global flag to control usage of tileIndex
+ if (UseTileIndex(index))
+ {
+ if (index == TileIndexLinearGeneral)
+ {
+ if (pMode)
+ {
+ *pMode = ADDR_TM_LINEAR_GENERAL;
+ }
+
+ if (pType)
+ {
+ *pType = ADDR_DISPLAYABLE;
+ }
+
+ if (pInfo)
+ {
+ pInfo->banks = 2;
+ pInfo->bankWidth = 1;
+ pInfo->bankHeight = 1;
+ pInfo->macroAspectRatio = 1;
+ pInfo->tileSplitBytes = 64;
+ pInfo->pipeConfig = ADDR_PIPECFG_P2;
+ }
+ }
+ else if (static_cast(index) >= m_noOfEntries)
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ else
+ {
+ const ADDR_TILECONFIG* pCfgTable = GetTileSetting(index);
+
+ if (pInfo)
+ {
+ *pInfo = pCfgTable->info;
+ }
+ else
+ {
+ if (IsMacroTiled(pCfgTable->mode))
+ {
+ returnCode = ADDR_INVALIDPARAMS;
+ }
+ }
+
+ if (pMode)
+ {
+ *pMode = pCfgTable->mode;
+ }
+
+ if (pType)
+ {
+ *pType = pCfgTable->type;
+ }
+ }
+ }
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::ReadGbTileMode
+*
+* @brief
+* Convert GB_TILE_MODE HW value to ADDR_TILE_CONFIG.
+* @return
+* NA.
+***************************************************************************************************
+*/
+VOID SIAddrLib::ReadGbTileMode(
+ UINT_32 regValue, ///< [in] GB_TILE_MODE register
+ ADDR_TILECONFIG* pCfg ///< [out] output structure
+ ) const
+{
+ GB_TILE_MODE gbTileMode;
+ gbTileMode.val = regValue;
+
+ pCfg->type = static_cast(gbTileMode.f.micro_tile_mode);
+ pCfg->info.bankHeight = 1 << gbTileMode.f.bank_height;
+ pCfg->info.bankWidth = 1 << gbTileMode.f.bank_width;
+ pCfg->info.banks = 1 << (gbTileMode.f.num_banks + 1);
+ pCfg->info.macroAspectRatio = 1 << gbTileMode.f.macro_tile_aspect;
+ pCfg->info.tileSplitBytes = 64 << gbTileMode.f.tile_split;
+ pCfg->info.pipeConfig = static_cast(gbTileMode.f.pipe_config + 1);
+
+ UINT_32 regArrayMode = gbTileMode.f.array_mode;
+
+ pCfg->mode = static_cast(regArrayMode);
+
+ if (regArrayMode == 8) //ARRAY_2D_TILED_XTHICK
+ {
+ pCfg->mode = ADDR_TM_2D_TILED_XTHICK;
+ }
+ else if (regArrayMode >= 14) //ARRAY_3D_TILED_XTHICK
+ {
+ pCfg->mode = static_cast(pCfg->mode + 3);
+ }
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::InitTileSettingTable
+*
+* @brief
+* Initialize the ADDR_TILE_CONFIG table.
+* @return
+* TRUE if tile table is correctly initialized
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::InitTileSettingTable(
+ const UINT_32* pCfg, ///< [in] Pointer to table of tile configs
+ UINT_32 noOfEntries ///< [in] Numbe of entries in the table above
+ )
+{
+ BOOL_32 initOk = TRUE;
+
+ ADDR_ASSERT(noOfEntries <= TileTableSize);
+
+ memset(m_tileTable, 0, sizeof(m_tileTable));
+
+ if (noOfEntries != 0)
+ {
+ m_noOfEntries = noOfEntries;
+ }
+ else
+ {
+ m_noOfEntries = TileTableSize;
+ }
+
+ if (pCfg) // From Client
+ {
+ for (UINT_32 i = 0; i < m_noOfEntries; i++)
+ {
+ ReadGbTileMode(*(pCfg + i), &m_tileTable[i]);
+ }
+ }
+ else
+ {
+ ADDR_ASSERT_ALWAYS();
+ initOk = FALSE;
+ }
+
+ if (initOk)
+ {
+ ADDR_ASSERT(m_tileTable[TILEINDEX_LINEAR_ALIGNED].mode == ADDR_TM_LINEAR_ALIGNED);
+ }
+
+ return initOk;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlGetTileIndex
+*
+* @brief
+* Return the virtual/real index for given mode/type/info
+* @return
+* ADDR_OK if successful.
+***************************************************************************************************
+*/
+ADDR_E_RETURNCODE SIAddrLib::HwlGetTileIndex(
+ const ADDR_GET_TILEINDEX_INPUT* pIn,
+ ADDR_GET_TILEINDEX_OUTPUT* pOut) const
+{
+ ADDR_E_RETURNCODE returnCode = ADDR_OK;
+
+ pOut->index = HwlPostCheckTileIndex(pIn->pTileInfo, pIn->tileMode, pIn->tileType);
+
+ return returnCode;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlFmaskPreThunkSurfInfo
+*
+* @brief
+* Some preparation before thunking a ComputeSurfaceInfo call for Fmask
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlFmaskPreThunkSurfInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn, ///< [in] Input of fmask info
+ const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut, ///< [in] Output of fmask info
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn, ///< [out] Input of thunked surface info
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut ///< [out] Output of thunked surface info
+ ) const
+{
+ pSurfIn->tileIndex = pFmaskIn->tileIndex;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlFmaskPostThunkSurfInfo
+*
+* @brief
+* Copy hwl extra field after calling thunked ComputeSurfaceInfo
+* @return
+* ADDR_E_RETURNCODE
+***************************************************************************************************
+*/
+VOID SIAddrLib::HwlFmaskPostThunkSurfInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut, ///< [in] Output of surface info
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut ///< [out] Output of fmask info
+ ) const
+{
+ pFmaskOut->macroModeIndex = TileIndexInvalid;
+ pFmaskOut->tileIndex = pSurfOut->tileIndex;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlComputeFmaskBits
+* @brief
+* Computes fmask bits
+* @return
+* Fmask bits
+***************************************************************************************************
+*/
+UINT_32 SIAddrLib::HwlComputeFmaskBits(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+ UINT_32* pNumSamples
+ ) const
+{
+ UINT_32 numSamples = pIn->numSamples;
+ UINT_32 numFrags = GetNumFragments(numSamples, pIn->numFrags);
+ UINT_32 bpp;
+
+ if (numFrags != numSamples) // EQAA
+ {
+ ADDR_ASSERT(numFrags <= 8);
+
+ if (!pIn->resolved)
+ {
+ if (numFrags == 1)
+ {
+ bpp = 1;
+ numSamples = numSamples == 16 ? 16 : 8;
+ }
+ else if (numFrags == 2)
+ {
+ ADDR_ASSERT(numSamples >= 4);
+
+ bpp = 2;
+ numSamples = numSamples;
+ }
+ else if (numFrags == 4)
+ {
+ ADDR_ASSERT(numSamples >= 4);
+
+ bpp = 4;
+ numSamples = numSamples;
+ }
+ else // numFrags == 8
+ {
+ ADDR_ASSERT(numSamples == 16);
+
+ bpp = 4;
+ numSamples = numSamples;
+ }
+ }
+ else
+ {
+ if (numFrags == 1)
+ {
+ bpp = (numSamples == 16) ? 16 : 8;
+ numSamples = 1;
+ }
+ else if (numFrags == 2)
+ {
+ ADDR_ASSERT(numSamples >= 4);
+
+ bpp = numSamples*2;
+ numSamples = 1;
+ }
+ else if (numFrags == 4)
+ {
+ ADDR_ASSERT(numSamples >= 4);
+
+ bpp = numSamples*4;
+ numSamples = 1;
+ }
+ else // numFrags == 8
+ {
+ ADDR_ASSERT(numSamples >= 16);
+
+ bpp = 16*4;
+ numSamples = 1;
+ }
+ }
+ }
+ else // Normal AA
+ {
+ if (!pIn->resolved)
+ {
+ bpp = ComputeFmaskNumPlanesFromNumSamples(numSamples);
+ numSamples = numSamples == 2 ? 8 : numSamples;
+ }
+ else
+ {
+ // The same as 8XX
+ bpp = ComputeFmaskResolvedBppFromNumSamples(numSamples);
+ numSamples = 1; // 1x sample
+ }
+ }
+
+ SafeAssign(pNumSamples, numSamples);
+
+ return bpp;
+}
+
+/**
+***************************************************************************************************
+* SIAddrLib::HwlOverrideTileMode
+*
+* @brief
+* Override tile modes (for PRT only, avoid client passes in an invalid PRT mode for SI.
+*
+* @return
+* Suitable tile mode
+*
+***************************************************************************************************
+*/
+BOOL_32 SIAddrLib::HwlOverrideTileMode(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
+ AddrTileMode* pTileMode, ///< [in/out] pointer to the tile mode
+ AddrTileType* pTileType ///< [in/out] pointer to the tile type
+ ) const
+{
+ BOOL_32 bOverrided = FALSE;
+ AddrTileMode tileMode = *pTileMode;
+
+ switch (tileMode)
+ {
+ case ADDR_TM_PRT_TILED_THIN1:
+ tileMode = ADDR_TM_2D_TILED_THIN1;
+ break;
+
+ case ADDR_TM_PRT_TILED_THICK:
+ tileMode = ADDR_TM_2D_TILED_THICK;
+ break;
+
+ case ADDR_TM_PRT_2D_TILED_THICK:
+ tileMode = ADDR_TM_2D_TILED_THICK;
+ break;
+
+ case ADDR_TM_PRT_3D_TILED_THICK:
+ tileMode = ADDR_TM_3D_TILED_THICK;
+ break;
+
+ default:
+ break;
+ }
+
+ if (tileMode != *pTileMode)
+ {
+ *pTileMode = tileMode;
+ bOverrided = TRUE;
+ ADDR_ASSERT(pIn->flags.prt == TRUE);
+ }
+
+ return bOverrided;
+}
+
diff --git a/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.h b/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.h
new file mode 100644
index 00000000000..897beb1bb92
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/addrlib/r800/siaddrlib.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+***************************************************************************************************
+* @file siaddrlib.h
+* @brief Contains the R800AddrLib class definition.
+***************************************************************************************************
+*/
+
+#ifndef __SI_ADDR_LIB_H__
+#define __SI_ADDR_LIB_H__
+
+#include "addrlib.h"
+#include "egbaddrlib.h"
+
+/**
+***************************************************************************************************
+* @brief Describes the information in tile mode table
+***************************************************************************************************
+*/
+struct ADDR_TILECONFIG
+{
+ AddrTileMode mode;
+ AddrTileType type;
+ ADDR_TILEINFO info;
+};
+
+/**
+***************************************************************************************************
+* @brief SI specific settings structure.
+***************************************************************************************************
+*/
+struct SIChipSettings
+{
+ struct
+ {
+ UINT_32 isSouthernIsland : 1;
+ UINT_32 isTahiti : 1;
+ UINT_32 isPitCairn : 1;
+ UINT_32 isCapeVerde : 1;
+ /// Oland/Hainan are of GFXIP 6.0, similar with SI
+ UINT_32 isOland : 1;
+ UINT_32 isHainan : 1;
+ };
+};
+
+/**
+***************************************************************************************************
+* @brief This class is the SI specific address library
+* function set.
+***************************************************************************************************
+*/
+class SIAddrLib : public EgBasedAddrLib
+{
+public:
+ /// Creates SIAddrLib object
+ static AddrLib* CreateObj(const AddrClient* pClient)
+ {
+ return new(pClient) SIAddrLib(pClient);
+ }
+
+protected:
+ SIAddrLib(const AddrClient* pClient);
+ virtual ~SIAddrLib();
+
+ // Hwl interface - defined in AddrLib
+ virtual ADDR_E_RETURNCODE HwlComputeSurfaceInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ virtual ADDR_E_RETURNCODE HwlConvertTileInfoToHW(
+ const ADDR_CONVERT_TILEINFOTOHW_INPUT* pIn,
+ ADDR_CONVERT_TILEINFOTOHW_OUTPUT* pOut) const;
+
+ virtual UINT_64 HwlComputeXmaskAddrFromCoord(
+ UINT_32 pitch, UINT_32 height, UINT_32 x, UINT_32 y, UINT_32 slice, UINT_32 numSlices,
+ UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+ ADDR_TILEINFO* pTileInfo, UINT_32* pBitPosition) const;
+
+ virtual VOID HwlComputeXmaskCoordFromAddr(
+ UINT_64 addr, UINT_32 bitPosition, UINT_32 pitch, UINT_32 height, UINT_32 numSlices,
+ UINT_32 factor, BOOL_32 isLinear, BOOL_32 isWidth8, BOOL_32 isHeight8,
+ ADDR_TILEINFO* pTileInfo, UINT_32* pX, UINT_32* pY, UINT_32* pSlice) const;
+
+ virtual ADDR_E_RETURNCODE HwlGetTileIndex(
+ const ADDR_GET_TILEINDEX_INPUT* pIn,
+ ADDR_GET_TILEINDEX_OUTPUT* pOut) const;
+
+ virtual BOOL_32 HwlComputeMipLevel(
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn) const;
+
+ virtual AddrChipFamily HwlConvertChipFamily(
+ UINT_32 uChipFamily, UINT_32 uChipRevision);
+
+ virtual BOOL_32 HwlInitGlobalParams(
+ const ADDR_CREATE_INPUT* pCreateIn);
+
+ virtual ADDR_E_RETURNCODE HwlSetupTileCfg(
+ INT_32 index, INT_32 macroModeIndex,
+ ADDR_TILEINFO* pInfo, AddrTileMode* pMode = 0, AddrTileType* pType = 0) const;
+
+ virtual VOID HwlComputeTileDataWidthAndHeightLinear(
+ UINT_32* pMacroWidth, UINT_32* pMacroHeight,
+ UINT_32 bpp, ADDR_TILEINFO* pTileInfo) const;
+
+ virtual UINT_64 HwlComputeHtileBytes(
+ UINT_32 pitch, UINT_32 height, UINT_32 bpp,
+ BOOL_32 isLinear, UINT_32 numSlices, UINT_64* pSliceBytes, UINT_32 baseAlign) const;
+
+ virtual UINT_32 ComputePipeFromCoord(
+ UINT_32 x, UINT_32 y, UINT_32 slice,
+ AddrTileMode tileMode, UINT_32 pipeSwizzle, BOOL_32 ignoreSE,
+ ADDR_TILEINFO* pTileInfo) const;
+
+ virtual UINT_32 HwlGetPipes(const ADDR_TILEINFO* pTileInfo) const;
+
+ /// Pre-handler of 3x pitch (96 bit) adjustment
+ virtual UINT_32 HwlPreHandleBaseLvl3xPitch(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+ /// Post-handler of 3x pitch adjustment
+ virtual UINT_32 HwlPostHandleBaseLvl3xPitch(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, UINT_32 expPitch) const;
+
+ /// Dummy function to finalize the inheritance
+ virtual UINT_32 HwlComputeXmaskCoordYFrom8Pipe(
+ UINT_32 pipe, UINT_32 x) const;
+
+ // Sub-hwl interface - defined in EgBasedAddrLib
+ virtual VOID HwlSetupTileInfo(
+ AddrTileMode tileMode, ADDR_SURFACE_FLAGS flags,
+ UINT_32 bpp, UINT_32 pitch, UINT_32 height, UINT_32 numSamples,
+ ADDR_TILEINFO* inputTileInfo, ADDR_TILEINFO* outputTileInfo,
+ AddrTileType inTileType, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ virtual UINT_32 HwlGetPitchAlignmentMicroTiled(
+ AddrTileMode tileMode, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples) const;
+
+ virtual UINT_64 HwlGetSizeAdjustmentMicroTiled(
+ UINT_32 thickness, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+ UINT_32 baseAlign, UINT_32 pitchAlign,
+ UINT_32 *pPitch, UINT_32 *pHeight) const;
+
+ virtual VOID HwlCheckLastMacroTiledLvl(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn, ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pOut) const;
+
+ virtual BOOL_32 HwlTileInfoEqual(
+ const ADDR_TILEINFO* pLeft, const ADDR_TILEINFO* pRight) const;
+
+ virtual AddrTileMode HwlDegradeThickTileMode(
+ AddrTileMode baseTileMode, UINT_32 numSlices, UINT_32* pBytesPerTile) const;
+
+ virtual BOOL_32 HwlOverrideTileMode(
+ const ADDR_COMPUTE_SURFACE_INFO_INPUT* pIn,
+ AddrTileMode* pTileMode,
+ AddrTileType* pTileType) const;
+
+ virtual BOOL_32 HwlSanityCheckMacroTiled(
+ ADDR_TILEINFO* pTileInfo) const
+ {
+ return TRUE;
+ }
+
+ virtual UINT_32 HwlGetPitchAlignmentLinear(UINT_32 bpp, ADDR_SURFACE_FLAGS flags) const;
+
+ virtual UINT_64 HwlGetSizeAdjustmentLinear(
+ AddrTileMode tileMode,
+ UINT_32 bpp, UINT_32 numSamples, UINT_32 baseAlign, UINT_32 pitchAlign,
+ UINT_32 *pPitch, UINT_32 *pHeight, UINT_32 *pHeightAlign) const;
+
+ virtual VOID HwlComputeSurfaceCoord2DFromBankPipe(
+ AddrTileMode tileMode, UINT_32* pX, UINT_32* pY, UINT_32 slice,
+ UINT_32 bank, UINT_32 pipe,
+ UINT_32 bankSwizzle, UINT_32 pipeSwizzle, UINT_32 tileSlices,
+ BOOL_32 ignoreSE,
+ ADDR_TILEINFO* pTileInfo) const;
+
+ virtual UINT_32 HwlPreAdjustBank(
+ UINT_32 tileX, UINT_32 bank, ADDR_TILEINFO* pTileInfo) const;
+
+ virtual INT_32 HwlPostCheckTileIndex(
+ const ADDR_TILEINFO* pInfo, AddrTileMode mode, AddrTileType type,
+ INT curIndex = TileIndexInvalid) const;
+
+ virtual VOID HwlFmaskPreThunkSurfInfo(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pFmaskIn,
+ const ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut,
+ ADDR_COMPUTE_SURFACE_INFO_INPUT* pSurfIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut) const;
+
+ virtual VOID HwlFmaskPostThunkSurfInfo(
+ const ADDR_COMPUTE_SURFACE_INFO_OUTPUT* pSurfOut,
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT* pFmaskOut) const;
+
+ virtual UINT_32 HwlComputeFmaskBits(
+ const ADDR_COMPUTE_FMASK_INFO_INPUT* pIn,
+ UINT_32* pNumSamples) const;
+
+ virtual BOOL_32 HwlReduceBankWidthHeight(
+ UINT_32 tileSize, UINT_32 bpp, ADDR_SURFACE_FLAGS flags, UINT_32 numSamples,
+ UINT_32 bankHeightAlign, UINT_32 pipes,
+ ADDR_TILEINFO* pTileInfo) const
+ {
+ return TRUE;
+ }
+
+ // Protected non-virtual functions
+ VOID ComputeTileCoordFromPipeAndElemIdx(
+ UINT_32 elemIdx, UINT_32 pipe, AddrPipeCfg pipeCfg, UINT_32 pitchInMacroTile,
+ UINT_32 x, UINT_32 y, UINT_32* pX, UINT_32* pY) const;
+
+ UINT_32 TileCoordToMaskElementIndex(
+ UINT_32 tx, UINT_32 ty, AddrPipeCfg pipeConfig,
+ UINT_32 *macroShift, UINT_32 *elemIdxBits) const;
+
+ BOOL_32 DecodeGbRegs(
+ const ADDR_REGISTER_VALUE* pRegValue);
+
+ const ADDR_TILECONFIG* GetTileSetting(
+ UINT_32 index) const;
+
+ static const UINT_32 TileTableSize = 32;
+ ADDR_TILECONFIG m_tileTable[TileTableSize];
+ UINT_32 m_noOfEntries;
+
+private:
+
+ UINT_32 GetPipePerSurf(AddrPipeCfg pipeConfig) const;
+
+ VOID ReadGbTileMode(
+ UINT_32 regValue, ADDR_TILECONFIG* pCfg) const;
+ BOOL_32 InitTileSettingTable(
+ const UINT_32 *pSetting, UINT_32 noOfEntries);
+
+ SIChipSettings m_settings;
+};
+
+#endif
+
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
new file mode 100644
index 00000000000..50c42e3599a
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -0,0 +1,781 @@
+/*
+ * Copyright © 2011 Marek Olšák
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Marek Olšák
+ */
+
+#include "amdgpu_cs.h"
+
+#include "os/os_time.h"
+#include "state_tracker/drm_driver.h"
+#include
+#include
+#include
+
+static const struct pb_vtbl amdgpu_winsys_bo_vtbl;
+
+static inline struct amdgpu_winsys_bo *amdgpu_winsys_bo(struct pb_buffer *bo)
+{
+ assert(bo->vtbl == &amdgpu_winsys_bo_vtbl);
+ return (struct amdgpu_winsys_bo *)bo;
+}
+
+struct amdgpu_bomgr {
+ struct pb_manager base;
+ struct amdgpu_winsys *rws;
+};
+
+static struct amdgpu_winsys *get_winsys(struct pb_manager *mgr)
+{
+ return ((struct amdgpu_bomgr*)mgr)->rws;
+}
+
+static struct amdgpu_winsys_bo *get_amdgpu_winsys_bo(struct pb_buffer *_buf)
+{
+ struct amdgpu_winsys_bo *bo = NULL;
+
+ if (_buf->vtbl == &amdgpu_winsys_bo_vtbl) {
+ bo = amdgpu_winsys_bo(_buf);
+ } else {
+ struct pb_buffer *base_buf;
+ pb_size offset;
+ pb_get_base_buffer(_buf, &base_buf, &offset);
+
+ if (base_buf->vtbl == &amdgpu_winsys_bo_vtbl)
+ bo = amdgpu_winsys_bo(base_buf);
+ }
+
+ return bo;
+}
+
+static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
+ enum radeon_bo_usage usage)
+{
+ struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
+ struct amdgpu_winsys *ws = bo->rws;
+ int i;
+
+ if (bo->is_shared) {
+ /* We can't use user fences for shared buffers, because user fences
+ * are local to this process only. If we want to wait for all buffer
+ * uses in all processes, we have to use amdgpu_bo_wait_for_idle.
+ */
+ bool buffer_busy = true;
+ int r;
+
+ r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy);
+ if (r)
+ fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__,
+ r);
+ return !buffer_busy;
+ }
+
+ if (timeout == 0) {
+ /* Timeout == 0 is quite simple. */
+ pipe_mutex_lock(ws->bo_fence_lock);
+ for (i = 0; i < RING_LAST; i++)
+ if (bo->fence[i]) {
+ if (amdgpu_fence_wait(bo->fence[i], 0, false)) {
+ /* Release the idle fence to avoid checking it again later. */
+ amdgpu_fence_reference(&bo->fence[i], NULL);
+ } else {
+ pipe_mutex_unlock(ws->bo_fence_lock);
+ return false;
+ }
+ }
+ pipe_mutex_unlock(ws->bo_fence_lock);
+ return true;
+
+ } else {
+ struct pipe_fence_handle *fence[RING_LAST] = {};
+ bool fence_idle[RING_LAST] = {};
+ bool buffer_idle = true;
+ int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
+
+ /* Take references to all fences, so that we can wait for them
+ * without the lock. */
+ pipe_mutex_lock(ws->bo_fence_lock);
+ for (i = 0; i < RING_LAST; i++)
+ amdgpu_fence_reference(&fence[i], bo->fence[i]);
+ pipe_mutex_unlock(ws->bo_fence_lock);
+
+ /* Now wait for the fences. */
+ for (i = 0; i < RING_LAST; i++) {
+ if (fence[i]) {
+ if (amdgpu_fence_wait(fence[i], abs_timeout, true))
+ fence_idle[i] = true;
+ else
+ buffer_idle = false;
+ }
+ }
+
+ /* Release idle fences to avoid checking them again later. */
+ pipe_mutex_lock(ws->bo_fence_lock);
+ for (i = 0; i < RING_LAST; i++) {
+ if (fence[i] == bo->fence[i] && fence_idle[i])
+ amdgpu_fence_reference(&bo->fence[i], NULL);
+
+ amdgpu_fence_reference(&fence[i], NULL);
+ }
+ pipe_mutex_unlock(ws->bo_fence_lock);
+
+ return buffer_idle;
+ }
+}
+
+static enum radeon_bo_domain amdgpu_bo_get_initial_domain(
+ struct radeon_winsys_cs_handle *buf)
+{
+ return ((struct amdgpu_winsys_bo*)buf)->initial_domain;
+}
+
+static void amdgpu_bo_destroy(struct pb_buffer *_buf)
+{
+ struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
+ int i;
+
+ amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
+ amdgpu_va_range_free(bo->va_handle);
+ amdgpu_bo_free(bo->bo);
+
+ for (i = 0; i < RING_LAST; i++)
+ amdgpu_fence_reference(&bo->fence[i], NULL);
+
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ bo->rws->allocated_vram -= align(bo->base.size, bo->rws->gart_page_size);
+ else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+ bo->rws->allocated_gtt -= align(bo->base.size, bo->rws->gart_page_size);
+ FREE(bo);
+}
+
+static void *amdgpu_bo_map(struct radeon_winsys_cs_handle *buf,
+ struct radeon_winsys_cs *rcs,
+ enum pipe_transfer_usage usage)
+{
+ struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
+ struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs;
+ int r;
+ void *cpu = NULL;
+
+ /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
+ if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+ /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
+ if (usage & PIPE_TRANSFER_DONTBLOCK) {
+ if (!(usage & PIPE_TRANSFER_WRITE)) {
+ /* Mapping for read.
+ *
+ * Since we are mapping for read, we don't need to wait
+ * if the GPU is using the buffer for read too
+ * (neither one is changing it).
+ *
+ * Only check whether the buffer is being used for write. */
+ if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
+ RADEON_USAGE_WRITE)) {
+ cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
+ return NULL;
+ }
+
+ if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
+ RADEON_USAGE_WRITE)) {
+ return NULL;
+ }
+ } else {
+ if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) {
+ cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
+ return NULL;
+ }
+
+ if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0,
+ RADEON_USAGE_READWRITE)) {
+ return NULL;
+ }
+ }
+ } else {
+ uint64_t time = os_time_get_nano();
+
+ if (!(usage & PIPE_TRANSFER_WRITE)) {
+ /* Mapping for read.
+ *
+ * Since we are mapping for read, we don't need to wait
+ * if the GPU is using the buffer for read too
+ * (neither one is changing it).
+ *
+ * Only check whether the buffer is being used for write. */
+ if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo,
+ RADEON_USAGE_WRITE)) {
+ cs->flush_cs(cs->flush_data, 0, NULL);
+ }
+ amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
+ RADEON_USAGE_WRITE);
+ } else {
+ /* Mapping for write. */
+ if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo))
+ cs->flush_cs(cs->flush_data, 0, NULL);
+
+ amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
+ RADEON_USAGE_READWRITE);
+ }
+
+ bo->rws->buffer_wait_time += os_time_get_nano() - time;
+ }
+ }
+
+ /* If the buffer is created from user memory, return the user pointer. */
+ if (bo->user_ptr)
+ return bo->user_ptr;
+
+ r = amdgpu_bo_cpu_map(bo->bo, &cpu);
+ return r ? NULL : cpu;
+}
+
+static void amdgpu_bo_unmap(struct radeon_winsys_cs_handle *buf)
+{
+ struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
+
+ amdgpu_bo_cpu_unmap(bo->bo);
+}
+
+static void amdgpu_bo_get_base_buffer(struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ *base_buf = buf;
+ *offset = 0;
+}
+
+static enum pipe_error amdgpu_bo_validate(struct pb_buffer *_buf,
+ struct pb_validate *vl,
+ unsigned flags)
+{
+ /* Always pinned */
+ return PIPE_OK;
+}
+
+static void amdgpu_bo_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+}
+
+static const struct pb_vtbl amdgpu_winsys_bo_vtbl = {
+ amdgpu_bo_destroy,
+ NULL, /* never called */
+ NULL, /* never called */
+ amdgpu_bo_validate,
+ amdgpu_bo_fence,
+ amdgpu_bo_get_base_buffer,
+};
+
+static struct pb_buffer *amdgpu_bomgr_create_bo(struct pb_manager *_mgr,
+ pb_size size,
+ const struct pb_desc *desc)
+{
+ struct amdgpu_winsys *rws = get_winsys(_mgr);
+ struct amdgpu_bo_desc *rdesc = (struct amdgpu_bo_desc*)desc;
+ struct amdgpu_bo_alloc_request request = {0};
+ amdgpu_bo_handle buf_handle;
+ uint64_t va = 0;
+ struct amdgpu_winsys_bo *bo;
+ amdgpu_va_handle va_handle;
+ int r;
+
+ assert(rdesc->initial_domain & RADEON_DOMAIN_VRAM_GTT);
+ bo = CALLOC_STRUCT(amdgpu_winsys_bo);
+ if (!bo) {
+ return NULL;
+ }
+
+ request.alloc_size = size;
+ request.phys_alignment = desc->alignment;
+
+ if (rdesc->initial_domain & RADEON_DOMAIN_VRAM) {
+ request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
+ if (rdesc->flags & RADEON_FLAG_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ }
+ if (rdesc->initial_domain & RADEON_DOMAIN_GTT) {
+ request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+ if (rdesc->flags & RADEON_FLAG_GTT_WC)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ }
+
+ r = amdgpu_bo_alloc(rws->dev, &request, &buf_handle);
+ if (r) {
+ fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
+ fprintf(stderr, "amdgpu: size : %d bytes\n", size);
+ fprintf(stderr, "amdgpu: alignment : %d bytes\n", desc->alignment);
+ fprintf(stderr, "amdgpu: domains : %d\n", rdesc->initial_domain);
+ goto error_bo_alloc;
+ }
+
+ r = amdgpu_va_range_alloc(rws->dev, amdgpu_gpu_va_range_general,
+ size, desc->alignment, 0, &va, &va_handle, 0);
+ if (r)
+ goto error_va_alloc;
+
+ r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
+ if (r)
+ goto error_va_map;
+
+ pipe_reference_init(&bo->base.reference, 1);
+ bo->base.alignment = desc->alignment;
+ bo->base.usage = desc->usage;
+ bo->base.size = size;
+ bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
+ bo->rws = rws;
+ bo->bo = buf_handle;
+ bo->va = va;
+ bo->va_handle = va_handle;
+ bo->initial_domain = rdesc->initial_domain;
+ bo->unique_id = __sync_fetch_and_add(&rws->next_bo_unique_id, 1);
+
+ if (rdesc->initial_domain & RADEON_DOMAIN_VRAM)
+ rws->allocated_vram += align(size, rws->gart_page_size);
+ else if (rdesc->initial_domain & RADEON_DOMAIN_GTT)
+ rws->allocated_gtt += align(size, rws->gart_page_size);
+
+ return &bo->base;
+
+error_va_map:
+ amdgpu_va_range_free(va_handle);
+
+error_va_alloc:
+ amdgpu_bo_free(buf_handle);
+
+error_bo_alloc:
+ FREE(bo);
+ return NULL;
+}
+
+static void amdgpu_bomgr_flush(struct pb_manager *mgr)
+{
+ /* NOP */
+}
+
+/* This is for the cache bufmgr. */
+static boolean amdgpu_bomgr_is_buffer_busy(struct pb_manager *_mgr,
+ struct pb_buffer *_buf)
+{
+ struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf);
+
+ if (amdgpu_bo_is_referenced_by_any_cs(bo)) {
+ return TRUE;
+ }
+
+ if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0, RADEON_USAGE_READWRITE)) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void amdgpu_bomgr_destroy(struct pb_manager *mgr)
+{
+ FREE(mgr);
+}
+
+struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws)
+{
+ struct amdgpu_bomgr *mgr;
+
+ mgr = CALLOC_STRUCT(amdgpu_bomgr);
+ if (!mgr)
+ return NULL;
+
+ mgr->base.destroy = amdgpu_bomgr_destroy;
+ mgr->base.create_buffer = amdgpu_bomgr_create_bo;
+ mgr->base.flush = amdgpu_bomgr_flush;
+ mgr->base.is_buffer_busy = amdgpu_bomgr_is_buffer_busy;
+
+ mgr->rws = rws;
+ return &mgr->base;
+}
+
+static unsigned eg_tile_split(unsigned tile_split)
+{
+ switch (tile_split) {
+ case 0: tile_split = 64; break;
+ case 1: tile_split = 128; break;
+ case 2: tile_split = 256; break;
+ case 3: tile_split = 512; break;
+ default:
+ case 4: tile_split = 1024; break;
+ case 5: tile_split = 2048; break;
+ case 6: tile_split = 4096; break;
+ }
+ return tile_split;
+}
+
+static unsigned eg_tile_split_rev(unsigned eg_tile_split)
+{
+ switch (eg_tile_split) {
+ case 64: return 0;
+ case 128: return 1;
+ case 256: return 2;
+ case 512: return 3;
+ default:
+ case 1024: return 4;
+ case 2048: return 5;
+ case 4096: return 6;
+ }
+}
+
+static void amdgpu_bo_get_tiling(struct pb_buffer *_buf,
+ enum radeon_bo_layout *microtiled,
+ enum radeon_bo_layout *macrotiled,
+ unsigned *bankw, unsigned *bankh,
+ unsigned *tile_split,
+ unsigned *stencil_tile_split,
+ unsigned *mtilea,
+ bool *scanout)
+{
+ struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
+ struct amdgpu_bo_info info = {0};
+ uint32_t tiling_flags;
+ int r;
+
+ r = amdgpu_bo_query_info(bo->bo, &info);
+ if (r)
+ return;
+
+ tiling_flags = info.metadata.tiling_info;
+
+ *microtiled = RADEON_LAYOUT_LINEAR;
+ *macrotiled = RADEON_LAYOUT_LINEAR;
+
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
+ *macrotiled = RADEON_LAYOUT_TILED;
+ else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
+ *microtiled = RADEON_LAYOUT_TILED;
+
+ if (bankw && tile_split && mtilea && tile_split) {
+ *bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+ *bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+ *tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
+ *mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+ }
+ if (scanout)
+ *scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
+}
+
+static void amdgpu_bo_set_tiling(struct pb_buffer *_buf,
+ struct radeon_winsys_cs *rcs,
+ enum radeon_bo_layout microtiled,
+ enum radeon_bo_layout macrotiled,
+ unsigned pipe_config,
+ unsigned bankw, unsigned bankh,
+ unsigned tile_split,
+ unsigned stencil_tile_split,
+ unsigned mtilea, unsigned num_banks,
+ uint32_t pitch,
+ bool scanout)
+{
+ struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(_buf);
+ struct amdgpu_bo_metadata metadata = {0};
+ uint32_t tiling_flags = 0;
+
+ if (macrotiled == RADEON_LAYOUT_TILED)
+ tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
+ else if (microtiled == RADEON_LAYOUT_TILED)
+ tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
+ else
+ tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
+
+ tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, pipe_config);
+ tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(bankw));
+ tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(bankh));
+ if (tile_split)
+ tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(tile_split));
+ tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(mtilea));
+ tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(num_banks)-1);
+
+ if (scanout)
+ tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
+ else
+ tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
+
+ metadata.tiling_info = tiling_flags;
+
+ amdgpu_bo_set_metadata(bo->bo, &metadata);
+}
+
+static struct radeon_winsys_cs_handle *amdgpu_get_cs_handle(struct pb_buffer *_buf)
+{
+ /* return a direct pointer to amdgpu_winsys_bo. */
+ return (struct radeon_winsys_cs_handle*)get_amdgpu_winsys_bo(_buf);
+}
+
+static struct pb_buffer *
+amdgpu_bo_create(struct radeon_winsys *rws,
+ unsigned size,
+ unsigned alignment,
+ boolean use_reusable_pool,
+ enum radeon_bo_domain domain,
+ enum radeon_bo_flag flags)
+{
+ struct amdgpu_winsys *ws = amdgpu_winsys(rws);
+ struct amdgpu_bo_desc desc;
+ struct pb_manager *provider;
+ struct pb_buffer *buffer;
+
+ /* Don't use VRAM if the GPU doesn't have much. This is only the initial
+ * domain. The kernel is free to move the buffer if it wants to.
+ *
+ * 64MB means no VRAM by todays standards.
+ */
+ if (domain & RADEON_DOMAIN_VRAM && ws->info.vram_size <= 64*1024*1024) {
+ domain = RADEON_DOMAIN_GTT;
+ flags = RADEON_FLAG_GTT_WC;
+ }
+
+ memset(&desc, 0, sizeof(desc));
+ desc.base.alignment = alignment;
+
+ /* Align size to page size. This is the minimum alignment for normal
+ * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
+ * like constant/uniform buffers, can benefit from better and more reuse.
+ */
+ size = align(size, ws->gart_page_size);
+
+ /* Only set one usage bit each for domains and flags, or the cache manager
+ * might consider different sets of domains / flags compatible
+ */
+ if (domain == RADEON_DOMAIN_VRAM_GTT)
+ desc.base.usage = 1 << 2;
+ else
+ desc.base.usage = domain >> 1;
+ assert(flags < sizeof(desc.base.usage) * 8 - 3);
+ desc.base.usage |= 1 << (flags + 3);
+
+ desc.initial_domain = domain;
+ desc.flags = flags;
+
+ /* Assign a buffer manager. */
+ if (use_reusable_pool)
+ provider = ws->cman;
+ else
+ provider = ws->kman;
+
+ buffer = provider->create_buffer(provider, size, &desc.base);
+ if (!buffer)
+ return NULL;
+
+ return (struct pb_buffer*)buffer;
+}
+
+static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
+ struct winsys_handle *whandle,
+ unsigned *stride)
+{
+ struct amdgpu_winsys *ws = amdgpu_winsys(rws);
+ struct amdgpu_winsys_bo *bo;
+ enum amdgpu_bo_handle_type type;
+ struct amdgpu_bo_import_result result = {0};
+ uint64_t va;
+ amdgpu_va_handle va_handle;
+ struct amdgpu_bo_info info = {0};
+ enum radeon_bo_domain initial = 0;
+ int r;
+
+ /* Initialize the structure. */
+ bo = CALLOC_STRUCT(amdgpu_winsys_bo);
+ if (!bo) {
+ return NULL;
+ }
+
+ switch (whandle->type) {
+ case DRM_API_HANDLE_TYPE_SHARED:
+ type = amdgpu_bo_handle_type_gem_flink_name;
+ break;
+ case DRM_API_HANDLE_TYPE_FD:
+ type = amdgpu_bo_handle_type_dma_buf_fd;
+ break;
+ default:
+ return NULL;
+ }
+
+ r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result);
+ if (r)
+ goto error;
+
+ /* Get initial domains. */
+ r = amdgpu_bo_query_info(result.buf_handle, &info);
+ if (r)
+ goto error_query;
+
+ r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
+ result.alloc_size, 1 << 20, 0, &va, &va_handle, 0);
+ if (r)
+ goto error_query;
+
+ r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
+ if (r)
+ goto error_va_map;
+
+ if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
+ initial |= RADEON_DOMAIN_VRAM;
+ if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
+ initial |= RADEON_DOMAIN_GTT;
+
+
+ pipe_reference_init(&bo->base.reference, 1);
+ bo->base.alignment = info.phys_alignment;
+ bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
+ bo->bo = result.buf_handle;
+ bo->base.size = result.alloc_size;
+ bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
+ bo->rws = ws;
+ bo->va = va;
+ bo->va_handle = va_handle;
+ bo->initial_domain = initial;
+ bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
+ bo->is_shared = true;
+
+ if (stride)
+ *stride = whandle->stride;
+
+ if (bo->initial_domain & RADEON_DOMAIN_VRAM)
+ ws->allocated_vram += align(bo->base.size, ws->gart_page_size);
+ else if (bo->initial_domain & RADEON_DOMAIN_GTT)
+ ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+
+ return &bo->base;
+
+error_va_map:
+ amdgpu_va_range_free(va_handle);
+
+error_query:
+ amdgpu_bo_free(result.buf_handle);
+
+error:
+ FREE(bo);
+ return NULL;
+}
+
+static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
+ unsigned stride,
+ struct winsys_handle *whandle)
+{
+ struct amdgpu_winsys_bo *bo = get_amdgpu_winsys_bo(buffer);
+ enum amdgpu_bo_handle_type type;
+ int r;
+
+ switch (whandle->type) {
+ case DRM_API_HANDLE_TYPE_SHARED:
+ type = amdgpu_bo_handle_type_gem_flink_name;
+ break;
+ case DRM_API_HANDLE_TYPE_FD:
+ type = amdgpu_bo_handle_type_dma_buf_fd;
+ break;
+ case DRM_API_HANDLE_TYPE_KMS:
+ type = amdgpu_bo_handle_type_kms;
+ break;
+ default:
+ return FALSE;
+ }
+
+ r = amdgpu_bo_export(bo->bo, type, &whandle->handle);
+ if (r)
+ return FALSE;
+
+ whandle->stride = stride;
+ bo->is_shared = true;
+ return TRUE;
+}
+
+static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
+ void *pointer, unsigned size)
+{
+ struct amdgpu_winsys *ws = amdgpu_winsys(rws);
+ amdgpu_bo_handle buf_handle;
+ struct amdgpu_winsys_bo *bo;
+ uint64_t va;
+ amdgpu_va_handle va_handle;
+
+ bo = CALLOC_STRUCT(amdgpu_winsys_bo);
+ if (!bo)
+ return NULL;
+
+ if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
+ goto error;
+
+ if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
+ size, 1 << 12, 0, &va, &va_handle, 0))
+ goto error_va_alloc;
+
+ if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
+ goto error_va_map;
+
+ /* Initialize it. */
+ pipe_reference_init(&bo->base.reference, 1);
+ bo->bo = buf_handle;
+ bo->base.alignment = 0;
+ bo->base.usage = PB_USAGE_GPU_WRITE | PB_USAGE_GPU_READ;
+ bo->base.size = size;
+ bo->base.vtbl = &amdgpu_winsys_bo_vtbl;
+ bo->rws = ws;
+ bo->user_ptr = pointer;
+ bo->va = va;
+ bo->va_handle = va_handle;
+ bo->initial_domain = RADEON_DOMAIN_GTT;
+ bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1);
+
+ ws->allocated_gtt += align(bo->base.size, ws->gart_page_size);
+
+ return (struct pb_buffer*)bo;
+
+error_va_map:
+ amdgpu_va_range_free(va_handle);
+
+error_va_alloc:
+ amdgpu_bo_free(buf_handle);
+
+error:
+ FREE(bo);
+ return NULL;
+}
+
+static uint64_t amdgpu_bo_get_va(struct radeon_winsys_cs_handle *buf)
+{
+ return ((struct amdgpu_winsys_bo*)buf)->va;
+}
+
+void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws)
+{
+ ws->base.buffer_get_cs_handle = amdgpu_get_cs_handle;
+ ws->base.buffer_set_tiling = amdgpu_bo_set_tiling;
+ ws->base.buffer_get_tiling = amdgpu_bo_get_tiling;
+ ws->base.buffer_map = amdgpu_bo_map;
+ ws->base.buffer_unmap = amdgpu_bo_unmap;
+ ws->base.buffer_wait = amdgpu_bo_wait;
+ ws->base.buffer_create = amdgpu_bo_create;
+ ws->base.buffer_from_handle = amdgpu_bo_from_handle;
+ ws->base.buffer_from_ptr = amdgpu_bo_from_ptr;
+ ws->base.buffer_get_handle = amdgpu_bo_get_handle;
+ ws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
+ ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
+}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
new file mode 100644
index 00000000000..3739fd1366e
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * Copyright © 2011 Marek Olšák
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Marek Olšák
+ */
+
+#ifndef AMDGPU_BO_H
+#define AMDGPU_BO_H
+
+#include "amdgpu_winsys.h"
+#include "pipebuffer/pb_bufmgr.h"
+
+struct amdgpu_bo_desc {
+ struct pb_desc base;
+
+ enum radeon_bo_domain initial_domain;
+ unsigned flags;
+};
+
+struct amdgpu_winsys_bo {
+ struct pb_buffer base;
+
+ struct amdgpu_winsys *rws;
+ void *user_ptr; /* from buffer_from_ptr */
+
+ amdgpu_bo_handle bo;
+ uint32_t unique_id;
+ amdgpu_va_handle va_handle;
+ uint64_t va;
+ enum radeon_bo_domain initial_domain;
+
+ /* how many command streams is this bo referenced in? */
+ int num_cs_references;
+
+ /* whether buffer_get_handle or buffer_from_handle was called,
+ * it can only transition from false to true
+ */
+ volatile int is_shared; /* bool (int for atomicity) */
+
+ /* Fences for buffer synchronization. */
+ struct pipe_fence_handle *fence[RING_LAST];
+};
+
+struct pb_manager *amdgpu_bomgr_create(struct amdgpu_winsys *rws);
+void amdgpu_bomgr_init_functions(struct amdgpu_winsys *ws);
+
+static inline
+void amdgpu_winsys_bo_reference(struct amdgpu_winsys_bo **dst,
+ struct amdgpu_winsys_bo *src)
+{
+ pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
+}
+
+#endif
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
new file mode 100644
index 00000000000..0f42298c2ad
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -0,0 +1,704 @@
+/*
+ * Copyright © 2008 Jérôme Glisse
+ * Copyright © 2010 Marek Olšák
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Marek Olšák
+ */
+
+#include "amdgpu_cs.h"
+#include "os/os_time.h"
+#include
+#include
+
+
+/* FENCES */
+
+static struct pipe_fence_handle *
+amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
+ unsigned ip_instance, unsigned ring)
+{
+ struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
+
+ fence->reference.count = 1;
+ fence->ctx = ctx;
+ fence->fence.context = ctx->ctx;
+ fence->fence.ip_type = ip_type;
+ fence->fence.ip_instance = ip_instance;
+ fence->fence.ring = ring;
+ p_atomic_inc(&ctx->refcount);
+ return (struct pipe_fence_handle *)fence;
+}
+
+static void amdgpu_fence_submitted(struct pipe_fence_handle *fence,
+ struct amdgpu_cs_request* request,
+ uint64_t *user_fence_cpu_address)
+{
+ struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
+
+ rfence->fence.fence = request->seq_no;
+ rfence->user_fence_cpu_address = user_fence_cpu_address;
+}
+
+static void amdgpu_fence_signalled(struct pipe_fence_handle *fence)
+{
+ struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
+
+ rfence->signalled = true;
+}
+
+bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
+ bool absolute)
+{
+ struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
+ uint32_t expired;
+ int64_t abs_timeout;
+ uint64_t *user_fence_cpu;
+ int r;
+
+ if (rfence->signalled)
+ return true;
+
+ if (absolute)
+ abs_timeout = timeout;
+ else
+ abs_timeout = os_time_get_absolute_timeout(timeout);
+
+ user_fence_cpu = rfence->user_fence_cpu_address;
+ if (user_fence_cpu && *user_fence_cpu >= rfence->fence.fence) {
+ rfence->signalled = true;
+ return true;
+ }
+ /* Now use the libdrm query. */
+ r = amdgpu_cs_query_fence_status(&rfence->fence,
+ abs_timeout,
+ AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE,
+ &expired);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_cs_query_fence_status failed.\n");
+ return FALSE;
+ }
+
+ if (expired) {
+ /* This variable can only transition from false to true, so it doesn't
+ * matter if threads race for it. */
+ rfence->signalled = true;
+ return true;
+ }
+ return false;
+}
+
+static bool amdgpu_fence_wait_rel_timeout(struct radeon_winsys *rws,
+ struct pipe_fence_handle *fence,
+ uint64_t timeout)
+{
+ return amdgpu_fence_wait(fence, timeout, false);
+}
+
+/* CONTEXTS */
+
+static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws)
+{
+ struct amdgpu_ctx *ctx = CALLOC_STRUCT(amdgpu_ctx);
+ int r;
+ struct amdgpu_bo_alloc_request alloc_buffer = {};
+ amdgpu_bo_handle buf_handle;
+
+ ctx->ws = amdgpu_winsys(ws);
+ ctx->refcount = 1;
+
+ r = amdgpu_cs_ctx_create(ctx->ws->dev, &ctx->ctx);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r);
+ FREE(ctx);
+ return NULL;
+ }
+
+ alloc_buffer.alloc_size = 4 * 1024;
+ alloc_buffer.phys_alignment = 4 *1024;
+ alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
+
+ r = amdgpu_bo_alloc(ctx->ws->dev, &alloc_buffer, &buf_handle);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_bo_alloc failed. (%i)\n", r);
+ amdgpu_cs_ctx_free(ctx->ctx);
+ FREE(ctx);
+ return NULL;
+ }
+
+ r = amdgpu_bo_cpu_map(buf_handle, (void**)&ctx->user_fence_cpu_address_base);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_bo_cpu_map failed. (%i)\n", r);
+ amdgpu_bo_free(buf_handle);
+ amdgpu_cs_ctx_free(ctx->ctx);
+ FREE(ctx);
+ return NULL;
+ }
+
+ memset(ctx->user_fence_cpu_address_base, 0, alloc_buffer.alloc_size);
+ ctx->user_fence_bo = buf_handle;
+
+ return (struct radeon_winsys_ctx*)ctx;
+}
+
+static void amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
+{
+ amdgpu_ctx_unref((struct amdgpu_ctx*)rwctx);
+}
+
+static enum pipe_reset_status
+amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx)
+{
+ struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
+ uint32_t result, hangs;
+ int r;
+
+ r = amdgpu_cs_query_reset_state(ctx->ctx, &result, &hangs);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state failed. (%i)\n", r);
+ return PIPE_NO_RESET;
+ }
+
+ switch (result) {
+ case AMDGPU_CTX_GUILTY_RESET:
+ return PIPE_GUILTY_CONTEXT_RESET;
+ case AMDGPU_CTX_INNOCENT_RESET:
+ return PIPE_INNOCENT_CONTEXT_RESET;
+ case AMDGPU_CTX_UNKNOWN_RESET:
+ return PIPE_UNKNOWN_CONTEXT_RESET;
+ case AMDGPU_CTX_NO_RESET:
+ default:
+ return PIPE_NO_RESET;
+ }
+}
+
+/* COMMAND SUBMISSION */
+
+static bool amdgpu_get_new_ib(struct amdgpu_cs *cs)
+{
+ /* The maximum size is 4MB - 1B, which is unaligned.
+ * Use aligned size 4MB - 16B. */
+ const unsigned max_ib_size = (1024 * 1024 - 16) * 4;
+ const unsigned min_ib_size = 24 * 1024 * 4;
+
+ cs->base.cdw = 0;
+ cs->base.buf = NULL;
+
+ /* Allocate a new buffer for IBs if the current buffer is all used. */
+ if (!cs->big_ib_buffer ||
+ cs->used_ib_space + min_ib_size > cs->big_ib_buffer->size) {
+ struct radeon_winsys *ws = &cs->ctx->ws->base;
+ struct radeon_winsys_cs_handle *winsys_bo;
+
+ pb_reference(&cs->big_ib_buffer, NULL);
+ cs->big_ib_winsys_buffer = NULL;
+ cs->ib_mapped = NULL;
+ cs->used_ib_space = 0;
+
+ cs->big_ib_buffer = ws->buffer_create(ws, max_ib_size,
+ 4096, true,
+ RADEON_DOMAIN_GTT,
+ RADEON_FLAG_CPU_ACCESS);
+ if (!cs->big_ib_buffer)
+ return false;
+
+ winsys_bo = ws->buffer_get_cs_handle(cs->big_ib_buffer);
+
+ cs->ib_mapped = ws->buffer_map(winsys_bo, NULL, PIPE_TRANSFER_WRITE);
+ if (!cs->ib_mapped) {
+ pb_reference(&cs->big_ib_buffer, NULL);
+ return false;
+ }
+
+ cs->big_ib_winsys_buffer = (struct amdgpu_winsys_bo*)winsys_bo;
+ }
+
+ cs->ib.ib_mc_address = cs->big_ib_winsys_buffer->va + cs->used_ib_space;
+ cs->base.buf = (uint32_t*)(cs->ib_mapped + cs->used_ib_space);
+ cs->base.max_dw = (cs->big_ib_buffer->size - cs->used_ib_space) / 4;
+ return true;
+}
+
+static boolean amdgpu_init_cs_context(struct amdgpu_cs *cs,
+ enum ring_type ring_type)
+{
+ int i;
+
+ switch (ring_type) {
+ case RING_DMA:
+ cs->request.ip_type = AMDGPU_HW_IP_DMA;
+ break;
+
+ case RING_UVD:
+ cs->request.ip_type = AMDGPU_HW_IP_UVD;
+ break;
+
+ case RING_VCE:
+ cs->request.ip_type = AMDGPU_HW_IP_VCE;
+ break;
+
+ case RING_COMPUTE:
+ cs->request.ip_type = AMDGPU_HW_IP_COMPUTE;
+ break;
+
+ default:
+ case RING_GFX:
+ cs->request.ip_type = AMDGPU_HW_IP_GFX;
+ break;
+ }
+
+ cs->request.number_of_ibs = 1;
+ cs->request.ibs = &cs->ib;
+
+ cs->max_num_buffers = 512;
+ cs->buffers = (struct amdgpu_cs_buffer*)
+ CALLOC(1, cs->max_num_buffers * sizeof(struct amdgpu_cs_buffer));
+ if (!cs->buffers) {
+ return FALSE;
+ }
+
+ cs->handles = CALLOC(1, cs->max_num_buffers * sizeof(amdgpu_bo_handle));
+ if (!cs->handles) {
+ FREE(cs->buffers);
+ return FALSE;
+ }
+
+ cs->flags = CALLOC(1, cs->max_num_buffers);
+ if (!cs->flags) {
+ FREE(cs->handles);
+ FREE(cs->buffers);
+ return FALSE;
+ }
+
+ for (i = 0; i < Elements(cs->buffer_indices_hashlist); i++) {
+ cs->buffer_indices_hashlist[i] = -1;
+ }
+ return TRUE;
+}
+
+static void amdgpu_cs_context_cleanup(struct amdgpu_cs *cs)
+{
+ unsigned i;
+
+ for (i = 0; i < cs->num_buffers; i++) {
+ p_atomic_dec(&cs->buffers[i].bo->num_cs_references);
+ amdgpu_winsys_bo_reference(&cs->buffers[i].bo, NULL);
+ cs->handles[i] = NULL;
+ cs->flags[i] = 0;
+ }
+
+ cs->num_buffers = 0;
+ cs->used_gart = 0;
+ cs->used_vram = 0;
+
+ for (i = 0; i < Elements(cs->buffer_indices_hashlist); i++) {
+ cs->buffer_indices_hashlist[i] = -1;
+ }
+}
+
+static void amdgpu_destroy_cs_context(struct amdgpu_cs *cs)
+{
+ amdgpu_cs_context_cleanup(cs);
+ FREE(cs->flags);
+ FREE(cs->buffers);
+ FREE(cs->handles);
+ FREE(cs->request.dependencies);
+}
+
+
+static struct radeon_winsys_cs *
+amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
+ enum ring_type ring_type,
+ void (*flush)(void *ctx, unsigned flags,
+ struct pipe_fence_handle **fence),
+ void *flush_ctx,
+ struct radeon_winsys_cs_handle *trace_buf)
+{
+ struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
+ struct amdgpu_cs *cs;
+
+ cs = CALLOC_STRUCT(amdgpu_cs);
+ if (!cs) {
+ return NULL;
+ }
+
+ cs->ctx = ctx;
+ cs->flush_cs = flush;
+ cs->flush_data = flush_ctx;
+ cs->base.ring_type = ring_type;
+
+ if (!amdgpu_init_cs_context(cs, ring_type)) {
+ FREE(cs);
+ return NULL;
+ }
+
+ if (!amdgpu_get_new_ib(cs)) {
+ amdgpu_destroy_cs_context(cs);
+ FREE(cs);
+ return NULL;
+ }
+
+ p_atomic_inc(&ctx->ws->num_cs);
+ return &cs->base;
+}
+
+#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
+
+int amdgpu_get_reloc(struct amdgpu_cs *cs, struct amdgpu_winsys_bo *bo)
+{
+ unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
+ int i = cs->buffer_indices_hashlist[hash];
+
+ /* not found or found */
+ if (i == -1 || cs->buffers[i].bo == bo)
+ return i;
+
+ /* Hash collision, look for the BO in the list of relocs linearly. */
+ for (i = cs->num_buffers - 1; i >= 0; i--) {
+ if (cs->buffers[i].bo == bo) {
+ /* Put this reloc in the hash list.
+ * This will prevent additional hash collisions if there are
+ * several consecutive get_reloc calls for the same buffer.
+ *
+ * Example: Assuming buffers A,B,C collide in the hash list,
+ * the following sequence of relocs:
+ * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
+ * will collide here: ^ and here: ^,
+ * meaning that we should get very few collisions in the end. */
+ cs->buffer_indices_hashlist[hash] = i;
+ return i;
+ }
+ }
+ return -1;
+}
+
+static unsigned amdgpu_add_reloc(struct amdgpu_cs *cs,
+ struct amdgpu_winsys_bo *bo,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_domain domains,
+ unsigned priority,
+ enum radeon_bo_domain *added_domains)
+{
+ struct amdgpu_cs_buffer *reloc;
+ unsigned hash = bo->unique_id & (Elements(cs->buffer_indices_hashlist)-1);
+ int i = -1;
+
+ priority = MIN2(priority, 15);
+ *added_domains = 0;
+
+ i = amdgpu_get_reloc(cs, bo);
+
+ if (i >= 0) {
+ reloc = &cs->buffers[i];
+ reloc->usage |= usage;
+ *added_domains = domains & ~reloc->domains;
+ reloc->domains |= domains;
+ cs->flags[i] = MAX2(cs->flags[i], priority);
+ return i;
+ }
+
+ /* New relocation, check if the backing array is large enough. */
+ if (cs->num_buffers >= cs->max_num_buffers) {
+ uint32_t size;
+ cs->max_num_buffers += 10;
+
+ size = cs->max_num_buffers * sizeof(struct amdgpu_cs_buffer);
+ cs->buffers = realloc(cs->buffers, size);
+
+ size = cs->max_num_buffers * sizeof(amdgpu_bo_handle);
+ cs->handles = realloc(cs->handles, size);
+
+ cs->flags = realloc(cs->flags, cs->max_num_buffers);
+ }
+
+ /* Initialize the new relocation. */
+ cs->buffers[cs->num_buffers].bo = NULL;
+ amdgpu_winsys_bo_reference(&cs->buffers[cs->num_buffers].bo, bo);
+ cs->handles[cs->num_buffers] = bo->bo;
+ cs->flags[cs->num_buffers] = priority;
+ p_atomic_inc(&bo->num_cs_references);
+ reloc = &cs->buffers[cs->num_buffers];
+ reloc->bo = bo;
+ reloc->usage = usage;
+ reloc->domains = domains;
+
+ cs->buffer_indices_hashlist[hash] = cs->num_buffers;
+
+ *added_domains = domains;
+ return cs->num_buffers++;
+}
+
+static unsigned amdgpu_cs_add_reloc(struct radeon_winsys_cs *rcs,
+ struct radeon_winsys_cs_handle *buf,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_domain domains,
+ enum radeon_bo_priority priority)
+{
+ /* Don't use the "domains" parameter. Amdgpu doesn't support changing
+ * the buffer placement during command submission.
+ */
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+ struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
+ enum radeon_bo_domain added_domains;
+ unsigned index = amdgpu_add_reloc(cs, bo, usage, bo->initial_domain,
+ priority, &added_domains);
+
+ if (added_domains & RADEON_DOMAIN_GTT)
+ cs->used_gart += bo->base.size;
+ if (added_domains & RADEON_DOMAIN_VRAM)
+ cs->used_vram += bo->base.size;
+
+ return index;
+}
+
+static int amdgpu_cs_get_reloc(struct radeon_winsys_cs *rcs,
+ struct radeon_winsys_cs_handle *buf)
+{
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+
+ return amdgpu_get_reloc(cs, (struct amdgpu_winsys_bo*)buf);
+}
+
+static boolean amdgpu_cs_validate(struct radeon_winsys_cs *rcs)
+{
+ return TRUE;
+}
+
+static boolean amdgpu_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
+{
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+ boolean status =
+ (cs->used_gart + gtt) < cs->ctx->ws->info.gart_size * 0.7 &&
+ (cs->used_vram + vram) < cs->ctx->ws->info.vram_size * 0.7;
+
+ return status;
+}
+
+static void amdgpu_cs_do_submission(struct amdgpu_cs *cs,
+ struct pipe_fence_handle **out_fence)
+{
+ struct amdgpu_winsys *ws = cs->ctx->ws;
+ struct pipe_fence_handle *fence;
+ int i, j, r;
+
+ /* Create a fence. */
+ fence = amdgpu_fence_create(cs->ctx,
+ cs->request.ip_type,
+ cs->request.ip_instance,
+ cs->request.ring);
+ if (out_fence)
+ amdgpu_fence_reference(out_fence, fence);
+
+ cs->request.number_of_dependencies = 0;
+
+ /* Since the kernel driver doesn't synchronize execution between different
+ * rings automatically, we have to add fence dependencies manually. */
+ pipe_mutex_lock(ws->bo_fence_lock);
+ for (i = 0; i < cs->num_buffers; i++) {
+ for (j = 0; j < RING_LAST; j++) {
+ struct amdgpu_cs_fence *dep;
+ unsigned idx;
+
+ struct amdgpu_fence *bo_fence = (void *)cs->buffers[i].bo->fence[j];
+ if (!bo_fence)
+ continue;
+
+ if (bo_fence->ctx == cs->ctx &&
+ bo_fence->fence.ip_type == cs->request.ip_type &&
+ bo_fence->fence.ip_instance == cs->request.ip_instance &&
+ bo_fence->fence.ring == cs->request.ring)
+ continue;
+
+ if (amdgpu_fence_wait((void *)bo_fence, 0, false))
+ continue;
+
+ idx = cs->request.number_of_dependencies++;
+ if (idx >= cs->max_dependencies) {
+ unsigned size;
+
+ cs->max_dependencies = idx + 8;
+ size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence);
+ cs->request.dependencies = realloc(cs->request.dependencies, size);
+ }
+
+ dep = &cs->request.dependencies[idx];
+ memcpy(dep, &bo_fence->fence, sizeof(*dep));
+ }
+ }
+
+ cs->request.fence_info.handle = NULL;
+ if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE) {
+ cs->request.fence_info.handle = cs->ctx->user_fence_bo;
+ cs->request.fence_info.offset = cs->base.ring_type;
+ }
+
+ r = amdgpu_cs_submit(cs->ctx->ctx, 0, &cs->request, 1);
+ if (r) {
+ if (r == -ENOMEM)
+ fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
+ else
+ fprintf(stderr, "amdgpu: The CS has been rejected, "
+ "see dmesg for more information.\n");
+
+ amdgpu_fence_signalled(fence);
+ } else {
+ /* Success. */
+ uint64_t *user_fence = NULL;
+ if (cs->request.ip_type != AMDGPU_HW_IP_UVD && cs->request.ip_type != AMDGPU_HW_IP_VCE)
+ user_fence = cs->ctx->user_fence_cpu_address_base +
+ cs->request.fence_info.offset;
+ amdgpu_fence_submitted(fence, &cs->request, user_fence);
+
+ for (i = 0; i < cs->num_buffers; i++)
+ amdgpu_fence_reference(&cs->buffers[i].bo->fence[cs->base.ring_type],
+ fence);
+ }
+ pipe_mutex_unlock(ws->bo_fence_lock);
+ amdgpu_fence_reference(&fence, NULL);
+}
+
+static void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs)
+{
+ /* no-op */
+}
+
+DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
+
+static void amdgpu_cs_flush(struct radeon_winsys_cs *rcs,
+ unsigned flags,
+ struct pipe_fence_handle **fence,
+ uint32_t cs_trace_id)
+{
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+ struct amdgpu_winsys *ws = cs->ctx->ws;
+
+ switch (cs->base.ring_type) {
+ case RING_DMA:
+ /* pad DMA ring to 8 DWs */
+ if (ws->info.chip_class <= SI) {
+ while (rcs->cdw & 7)
+ OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
+ } else {
+ while (rcs->cdw & 7)
+ OUT_CS(&cs->base, 0x00000000); /* NOP packet */
+ }
+ break;
+ case RING_GFX:
+ /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
+ * r6xx, requires at least 4 dw alignment to avoid a hw bug.
+ */
+ if (ws->info.chip_class <= SI) {
+ while (rcs->cdw & 7)
+ OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
+ } else {
+ while (rcs->cdw & 7)
+ OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
+ }
+ break;
+ case RING_UVD:
+ while (rcs->cdw & 15)
+ OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
+ break;
+ default:
+ break;
+ }
+
+ if (rcs->cdw > rcs->max_dw) {
+ fprintf(stderr, "amdgpu: command stream overflowed\n");
+ }
+
+ amdgpu_cs_add_reloc(rcs, (void*)cs->big_ib_winsys_buffer,
+ RADEON_USAGE_READ, 0, RADEON_PRIO_MIN);
+
+ /* If the CS is not empty or overflowed.... */
+ if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
+ int r;
+
+ r = amdgpu_bo_list_create(ws->dev, cs->num_buffers,
+ cs->handles, cs->flags,
+ &cs->request.resources);
+
+ if (r) {
+ fprintf(stderr, "amdgpu: resource list creation failed (%d)\n", r);
+ cs->request.resources = NULL;
+ goto cleanup;
+ }
+
+ cs->ib.size = cs->base.cdw;
+ cs->used_ib_space += cs->base.cdw * 4;
+
+ amdgpu_cs_do_submission(cs, fence);
+
+ /* Cleanup. */
+ if (cs->request.resources)
+ amdgpu_bo_list_destroy(cs->request.resources);
+ }
+
+cleanup:
+ amdgpu_cs_context_cleanup(cs);
+ amdgpu_get_new_ib(cs);
+
+ ws->num_cs_flushes++;
+}
+
+static void amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
+{
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+
+ amdgpu_destroy_cs_context(cs);
+ p_atomic_dec(&cs->ctx->ws->num_cs);
+ pb_reference(&cs->big_ib_buffer, NULL);
+ FREE(cs);
+}
+
+static boolean amdgpu_bo_is_referenced(struct radeon_winsys_cs *rcs,
+ struct radeon_winsys_cs_handle *_buf,
+ enum radeon_bo_usage usage)
+{
+ struct amdgpu_cs *cs = amdgpu_cs(rcs);
+ struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)_buf;
+
+ return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage);
+}
+
+void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
+{
+ ws->base.ctx_create = amdgpu_ctx_create;
+ ws->base.ctx_destroy = amdgpu_ctx_destroy;
+ ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
+ ws->base.cs_create = amdgpu_cs_create;
+ ws->base.cs_destroy = amdgpu_cs_destroy;
+ ws->base.cs_add_reloc = amdgpu_cs_add_reloc;
+ ws->base.cs_get_reloc = amdgpu_cs_get_reloc;
+ ws->base.cs_validate = amdgpu_cs_validate;
+ ws->base.cs_memory_below_limit = amdgpu_cs_memory_below_limit;
+ ws->base.cs_flush = amdgpu_cs_flush;
+ ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced;
+ ws->base.cs_sync_flush = amdgpu_cs_sync_flush;
+ ws->base.fence_wait = amdgpu_fence_wait_rel_timeout;
+ ws->base.fence_reference = amdgpu_fence_reference;
+}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
new file mode 100644
index 00000000000..0842259044b
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2011 Marek Olšák
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Marek Olšák
+ */
+
+#ifndef AMDGPU_CS_H
+#define AMDGPU_CS_H
+
+#include "amdgpu_bo.h"
+#include "util/u_memory.h"
+
+struct amdgpu_ctx {
+ struct amdgpu_winsys *ws;
+ amdgpu_context_handle ctx;
+ amdgpu_bo_handle user_fence_bo;
+ uint64_t *user_fence_cpu_address_base;
+ int refcount;
+};
+
+struct amdgpu_cs_buffer {
+ struct amdgpu_winsys_bo *bo;
+ enum radeon_bo_usage usage;
+ enum radeon_bo_domain domains;
+};
+
+
+struct amdgpu_cs {
+ struct radeon_winsys_cs base;
+ struct amdgpu_ctx *ctx;
+
+ /* Flush CS. */
+ void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
+ void *flush_data;
+
+ /* A buffer out of which new IBs are allocated. */
+ struct pb_buffer *big_ib_buffer; /* for holding the reference */
+ struct amdgpu_winsys_bo *big_ib_winsys_buffer;
+ uint8_t *ib_mapped;
+ unsigned used_ib_space;
+
+ /* amdgpu_cs_submit parameters */
+ struct amdgpu_cs_request request;
+ struct amdgpu_cs_ib_info ib;
+
+ /* Relocs. */
+ unsigned max_num_buffers;
+ unsigned num_buffers;
+ amdgpu_bo_handle *handles;
+ uint8_t *flags;
+ struct amdgpu_cs_buffer *buffers;
+
+ int buffer_indices_hashlist[512];
+
+ unsigned used_vram;
+ unsigned used_gart;
+
+ unsigned max_dependencies;
+};
+
+struct amdgpu_fence {
+ struct pipe_reference reference;
+
+ struct amdgpu_ctx *ctx; /* submission context */
+ struct amdgpu_cs_fence fence;
+ uint64_t *user_fence_cpu_address;
+
+ volatile int signalled; /* bool (int for atomicity) */
+};
+
+static inline void amdgpu_ctx_unref(struct amdgpu_ctx *ctx)
+{
+ if (p_atomic_dec_zero(&ctx->refcount)) {
+ amdgpu_cs_ctx_free(ctx->ctx);
+ amdgpu_bo_free(ctx->user_fence_bo);
+ FREE(ctx);
+ }
+}
+
+static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
+ struct pipe_fence_handle *src)
+{
+ struct amdgpu_fence **rdst = (struct amdgpu_fence **)dst;
+ struct amdgpu_fence *rsrc = (struct amdgpu_fence *)src;
+
+ if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
+ amdgpu_ctx_unref((*rdst)->ctx);
+ FREE(*rdst);
+ }
+ *rdst = rsrc;
+}
+
+int amdgpu_get_reloc(struct amdgpu_cs *csc, struct amdgpu_winsys_bo *bo);
+
+static inline struct amdgpu_cs *
+amdgpu_cs(struct radeon_winsys_cs *base)
+{
+ return (struct amdgpu_cs*)base;
+}
+
+static inline boolean
+amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
+ struct amdgpu_winsys_bo *bo)
+{
+ int num_refs = bo->num_cs_references;
+ return num_refs == bo->rws->num_cs ||
+ (num_refs && amdgpu_get_reloc(cs, bo) != -1);
+}
+
+static inline boolean
+amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
+ struct amdgpu_winsys_bo *bo,
+ enum radeon_bo_usage usage)
+{
+ int index;
+
+ if (!bo->num_cs_references)
+ return FALSE;
+
+ index = amdgpu_get_reloc(cs, bo);
+ if (index == -1)
+ return FALSE;
+
+ return (cs->buffers[index].usage & usage) != 0;
+}
+
+static inline boolean
+amdgpu_bo_is_referenced_by_any_cs(struct amdgpu_winsys_bo *bo)
+{
+ return bo->num_cs_references != 0;
+}
+
+bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
+ bool absolute);
+void amdgpu_cs_init_functions(struct amdgpu_winsys *ws);
+
+#endif
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
new file mode 100644
index 00000000000..8882c418e12
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/**
+ * This file is included by addrlib. It adds GPU family definitions and
+ * macros compatible with addrlib.
+ */
+
+#ifndef AMDGPU_ID_H
+#define AMDGPU_ID_H
+
+#include "pipe/p_config.h"
+
+#if defined(PIPE_ARCH_LITTLE_ENDIAN)
+#define LITTLEENDIAN_CPU
+#elif defined(PIPE_ARCH_BIG_ENDIAN)
+#define BIGENDIAN_CPU
+#endif
+
+enum {
+ FAMILY_UNKNOWN,
+ FAMILY_SI,
+ FAMILY_CI,
+ FAMILY_KV,
+ FAMILY_VI,
+ FAMILY_CZ,
+ FAMILY_PI,
+ FAMILY_LAST,
+};
+
+/* SI specific rev IDs */
+enum {
+ SI_TAHITI_P_A11 = 1,
+ SI_TAHITI_P_A0 = SI_TAHITI_P_A11, /*A0 is alias of A11*/
+ SI_TAHITI_P_A21 = 5,
+ SI_TAHITI_P_B0 = SI_TAHITI_P_A21, /*B0 is alias of A21*/
+ SI_TAHITI_P_A22 = 6,
+ SI_TAHITI_P_B1 = SI_TAHITI_P_A22, /*B1 is alias of A22*/
+
+ SI_PITCAIRN_PM_A11 = 20,
+ SI_PITCAIRN_PM_A0 = SI_PITCAIRN_PM_A11, /*A0 is alias of A11*/
+ SI_PITCAIRN_PM_A12 = 21,
+ SI_PITCAIRN_PM_A1 = SI_PITCAIRN_PM_A12, /*A1 is alias of A12*/
+
+ SI_CAPEVERDE_M_A11 = 40,
+ SI_CAPEVERDE_M_A0 = SI_CAPEVERDE_M_A11, /*A0 is alias of A11*/
+ SI_CAPEVERDE_M_A12 = 41,
+ SI_CAPEVERDE_M_A1 = SI_CAPEVERDE_M_A12, /*A1 is alias of A12*/
+
+ SI_OLAND_M_A0 = 60,
+
+ SI_HAINAN_V_A0 = 70,
+
+ SI_UNKNOWN = 0xFF
+};
+
+
+#define ASICREV_IS_TAHITI_P(eChipRev) \
+ (eChipRev < SI_PITCAIRN_PM_A11)
+#define ASICREV_IS_PITCAIRN_PM(eChipRev) \
+ ((eChipRev >= SI_PITCAIRN_PM_A11) && (eChipRev < SI_CAPEVERDE_M_A11))
+#define ASICREV_IS_CAPEVERDE_M(eChipRev) \
+ ((eChipRev >= SI_CAPEVERDE_M_A11) && (eChipRev < SI_OLAND_M_A0))
+#define ASICREV_IS_OLAND_M(eChipRev) \
+ ((eChipRev >= SI_OLAND_M_A0) && (eChipRev < SI_HAINAN_V_A0))
+#define ASICREV_IS_HAINAN_V(eChipRev) \
+(eChipRev >= SI_HAINAN_V_A0)
+
+/* CI specific revIDs */
+enum {
+ CI_BONAIRE_M_A0 = 20,
+ CI_BONAIRE_M_A1 = 21,
+
+ CI_HAWAII_P_A0 = 40,
+
+ CI_UNKNOWN = 0xFF
+};
+
+#define ASICREV_IS_BONAIRE_M(eChipRev) \
+ ((eChipRev >= CI_BONAIRE_M_A0) && (eChipRev < CI_HAWAII_P_A0))
+#define ASICREV_IS_HAWAII_P(eChipRev) \
+ (eChipRev >= CI_HAWAII_P_A0)
+
+/* KV specific rev IDs */
+enum {
+ KV_SPECTRE_A0 = 0x01, /* KV1 with Spectre GFX core, 8-8-1-2 (CU-Pix-Primitive-RB) */
+ KV_SPOOKY_A0 = 0x41, /* KV2 with Spooky GFX core, including downgraded from Spectre core, 3-4-1-1 (CU-Pix-Primitive-RB) */
+ KB_KALINDI_A0 = 0x81, /* KB with Kalindi GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+ KB_KALINDI_A1 = 0x82, /* KB with Kalindi GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+ BV_KALINDI_A2 = 0x85, /* BV with Kalindi GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+ ML_GODAVARI_A0 = 0xa1, /* ML with Godavari GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+ ML_GODAVARI_A1 = 0xa2, /* ML with Godavari GFX core, 2-4-1-1 (CU-Pix-Primitive-RB) */
+ KV_UNKNOWN = 0xFF
+};
+
+#define ASICREV_IS_SPECTRE(eChipRev) \
+ ((eChipRev >= KV_SPECTRE_A0) && (eChipRev < KV_SPOOKY_A0)) /* identify all versions of SPRECTRE and supported features set */
+#define ASICREV_IS_SPOOKY(eChipRev) \
+ ((eChipRev >= KV_SPOOKY_A0) && (eChipRev < KB_KALINDI_A0)) /* identify all versions of SPOOKY and supported features set */
+#define ASICREV_IS_KALINDI(eChipRev) \
+ ((eChipRev >= KB_KALINDI_A0) && (eChipRev < KV_UNKNOWN)) /* identify all versions of KALINDI and supported features set */
+
+/* Following macros are subset of ASICREV_IS_KALINDI macro */
+#define ASICREV_IS_KALINDI_BHAVANI(eChipRev) \
+ ((eChipRev >= BV_KALINDI_A2) && (eChipRev < ML_GODAVARI_A0)) /* identify all versions of BHAVANI and supported features set */
+#define ASICREV_IS_KALINDI_GODAVARI(eChipRev) \
+ ((eChipRev >= ML_GODAVARI_A0) && (eChipRev < KV_UNKNOWN)) /* identify all versions of GODAVARI and supported features set */
+
+/* VI specific rev IDs */
+enum {
+ VI_ICELAND_M_A0 = 1,
+
+ VI_TONGA_P_A0 = 20,
+ VI_TONGA_P_A1 = 21,
+
+ VI_FIJI_P_A0 = 60,
+
+ VI_UNKNOWN = 0xFF
+};
+
+
+#define ASICREV_IS_ICELAND_M(eChipRev) \
+ (eChipRev < VI_TONGA_P_A0)
+#define ASICREV_IS_TONGA_P(eChipRev) \
+ ((eChipRev >= VI_TONGA_P_A0) && (eChipRev < VI_FIJI_P_A0))
+#define ASICREV_IS_FIJI_P(eChipRev) \
+ (eChipRev >= VI_FIJI_P_A0)
+
+/* CZ specific rev IDs */
+enum {
+ CZ_CARRIZO_A0 = 0x01,
+ CZ_UNKNOWN = 0xFF
+};
+
+#define ASICREV_IS_CARRIZO(eChipRev) \
+ (eChipRev >= CARRIZO_A0)
+
+#endif /* AMDGPU_ID_H */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_public.h b/src/gallium/winsys/amdgpu/drm/amdgpu_public.h
new file mode 100644
index 00000000000..ad133b20bf6
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_public.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#ifndef AMDGPU_PUBLIC_H
+#define AMDGPU_PUBLIC_H
+
+#include "pipe/p_defines.h"
+
+struct radeon_winsys;
+struct pipe_screen;
+
+typedef struct pipe_screen *(*radeon_screen_create_t)(struct radeon_winsys *);
+
+struct radeon_winsys *
+amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create);
+
+#endif
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
new file mode 100644
index 00000000000..358df381011
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright © 2011 Red Hat All Rights Reserved.
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+/* Contact:
+ * Marek Olšák
+ */
+
+#include "amdgpu_winsys.h"
+
+#ifndef NO_ENTRIES
+#define NO_ENTRIES 32
+#endif
+
+#ifndef NO_MACRO_ENTRIES
+#define NO_MACRO_ENTRIES 16
+#endif
+
+#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
+#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
+#endif
+
+
+static int amdgpu_surface_sanity(const struct radeon_surf *surf)
+{
+ unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
+
+ if (!(surf->flags & RADEON_SURF_HAS_TILE_MODE_INDEX))
+ return -EINVAL;
+
+ /* all dimension must be at least 1 ! */
+ if (!surf->npix_x || !surf->npix_y || !surf->npix_z ||
+ !surf->array_size)
+ return -EINVAL;
+
+ if (!surf->blk_w || !surf->blk_h || !surf->blk_d)
+ return -EINVAL;
+
+ switch (surf->nsamples) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (type) {
+ case RADEON_SURF_TYPE_1D:
+ if (surf->npix_y > 1)
+ return -EINVAL;
+ /* fall through */
+ case RADEON_SURF_TYPE_2D:
+ case RADEON_SURF_TYPE_CUBEMAP:
+ if (surf->npix_z > 1 || surf->array_size > 1)
+ return -EINVAL;
+ break;
+ case RADEON_SURF_TYPE_3D:
+ if (surf->array_size > 1)
+ return -EINVAL;
+ break;
+ case RADEON_SURF_TYPE_1D_ARRAY:
+ if (surf->npix_y > 1)
+ return -EINVAL;
+ /* fall through */
+ case RADEON_SURF_TYPE_2D_ARRAY:
+ if (surf->npix_z > 1)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
+{
+ return malloc(pInput->sizeInBytes);
+}
+
+static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput)
+{
+ free(pInput->pVirtAddr);
+ return ADDR_OK;
+}
+
+/**
+ * This returns the number of banks for the surface.
+ * Possible values: 2, 4, 8, 16.
+ */
+static uint32_t cik_num_banks(struct amdgpu_winsys *ws,
+ struct radeon_surf *surf)
+{
+ unsigned index, tileb;
+
+ tileb = 8 * 8 * surf->bpe;
+ tileb = MIN2(surf->tile_split, tileb);
+
+ for (index = 0; tileb > 64; index++) {
+ tileb >>= 1;
+ }
+ assert(index < 16);
+
+ return 2 << ((ws->amdinfo.gb_macro_tile_mode[index] >> 6) & 0x3);
+}
+
+ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
+{
+ ADDR_CREATE_INPUT addrCreateInput = {0};
+ ADDR_CREATE_OUTPUT addrCreateOutput = {0};
+ ADDR_REGISTER_VALUE regValue = {0};
+ ADDR_CREATE_FLAGS createFlags = {{0}};
+ ADDR_E_RETURNCODE addrRet;
+
+ addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
+ addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
+
+ regValue.noOfBanks = ws->amdinfo.mc_arb_ramcfg & 0x3;
+ regValue.gbAddrConfig = ws->amdinfo.gb_addr_cfg;
+ regValue.noOfRanks = (ws->amdinfo.mc_arb_ramcfg & 0x4) >> 2;
+
+ regValue.backendDisables = ws->amdinfo.backend_disable[0];
+ regValue.pTileConfig = ws->amdinfo.gb_tile_mode;
+ regValue.noOfEntries = sizeof(ws->amdinfo.gb_tile_mode) /
+ sizeof(ws->amdinfo.gb_tile_mode[0]);
+ regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
+ regValue.noOfMacroEntries = sizeof(ws->amdinfo.gb_macro_tile_mode) /
+ sizeof(ws->amdinfo.gb_macro_tile_mode[0]);
+
+ createFlags.value = 0;
+ createFlags.useTileIndex = 1;
+ createFlags.degradeBaseLevel = 1;
+
+ addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
+ addrCreateInput.chipFamily = ws->family;
+ addrCreateInput.chipRevision = ws->rev_id;
+ addrCreateInput.createFlags = createFlags;
+ addrCreateInput.callbacks.allocSysMem = allocSysMem;
+ addrCreateInput.callbacks.freeSysMem = freeSysMem;
+ addrCreateInput.callbacks.debugPrint = 0;
+ addrCreateInput.regValue = regValue;
+
+ addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
+ if (addrRet != ADDR_OK)
+ return NULL;
+
+ return addrCreateOutput.hLib;
+}
+
+static int compute_level(struct amdgpu_winsys *ws,
+ struct radeon_surf *surf, bool is_stencil,
+ unsigned level, unsigned type, bool compressed,
+ ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut)
+{
+ struct radeon_surf_level *surf_level;
+ ADDR_E_RETURNCODE ret;
+
+ AddrSurfInfoIn->mipLevel = level;
+ AddrSurfInfoIn->width = u_minify(surf->npix_x, level);
+ AddrSurfInfoIn->height = u_minify(surf->npix_y, level);
+
+ if (type == RADEON_SURF_TYPE_3D)
+ AddrSurfInfoIn->numSlices = u_minify(surf->npix_z, level);
+ else if (type == RADEON_SURF_TYPE_CUBEMAP)
+ AddrSurfInfoIn->numSlices = 6;
+ else
+ AddrSurfInfoIn->numSlices = surf->array_size;
+
+ if (level > 0) {
+ /* Set the base level pitch. This is needed for calculation
+ * of non-zero levels. */
+ if (is_stencil)
+ AddrSurfInfoIn->basePitch = surf->stencil_level[0].nblk_x;
+ else
+ AddrSurfInfoIn->basePitch = surf->level[0].nblk_x;
+
+ /* Convert blocks to pixels for compressed formats. */
+ if (compressed)
+ AddrSurfInfoIn->basePitch *= surf->blk_w;
+ }
+
+ ret = AddrComputeSurfaceInfo(ws->addrlib,
+ AddrSurfInfoIn,
+ AddrSurfInfoOut);
+ if (ret != ADDR_OK) {
+ return ret;
+ }
+
+ surf_level = is_stencil ? &surf->stencil_level[level] : &surf->level[level];
+ surf_level->offset = align(surf->bo_size, AddrSurfInfoOut->baseAlign);
+ surf_level->slice_size = AddrSurfInfoOut->sliceSize;
+ surf_level->pitch_bytes = AddrSurfInfoOut->pitch * (is_stencil ? 1 : surf->bpe);
+ surf_level->npix_x = u_minify(surf->npix_x, level);
+ surf_level->npix_y = u_minify(surf->npix_y, level);
+ surf_level->npix_z = u_minify(surf->npix_z, level);
+ surf_level->nblk_x = AddrSurfInfoOut->pitch;
+ surf_level->nblk_y = AddrSurfInfoOut->height;
+ if (type == RADEON_SURF_TYPE_3D)
+ surf_level->nblk_z = AddrSurfInfoOut->depth;
+ else
+ surf_level->nblk_z = 1;
+
+ switch (AddrSurfInfoOut->tileMode) {
+ case ADDR_TM_LINEAR_GENERAL:
+ surf_level->mode = RADEON_SURF_MODE_LINEAR;
+ break;
+ case ADDR_TM_LINEAR_ALIGNED:
+ surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+ break;
+ case ADDR_TM_1D_TILED_THIN1:
+ surf_level->mode = RADEON_SURF_MODE_1D;
+ break;
+ case ADDR_TM_2D_TILED_THIN1:
+ surf_level->mode = RADEON_SURF_MODE_2D;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (is_stencil)
+ surf->stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
+ else
+ surf->tiling_index[level] = AddrSurfInfoOut->tileIndex;
+
+ surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;
+ return 0;
+}
+
+static int amdgpu_surface_init(struct radeon_winsys *rws,
+ struct radeon_surf *surf)
+{
+ struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+ unsigned level, mode, type;
+ bool compressed;
+ ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
+ ADDR_TILEINFO AddrTileInfoIn = {0};
+ ADDR_TILEINFO AddrTileInfoOut = {0};
+ int r;
+
+ r = amdgpu_surface_sanity(surf);
+ if (r)
+ return r;
+
+ AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
+ AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
+ AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
+
+ type = RADEON_SURF_GET(surf->flags, TYPE);
+ mode = RADEON_SURF_GET(surf->flags, MODE);
+ compressed = surf->blk_w == 4 && surf->blk_h == 4;
+
+ /* MSAA and FMASK require 2D tiling. */
+ if (surf->nsamples > 1 ||
+ (surf->flags & RADEON_SURF_FMASK))
+ mode = RADEON_SURF_MODE_2D;
+
+ /* DB doesn't support linear layouts. */
+ if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) &&
+ mode < RADEON_SURF_MODE_1D)
+ mode = RADEON_SURF_MODE_1D;
+
+ /* Set the requested tiling mode. */
+ switch (mode) {
+ case RADEON_SURF_MODE_LINEAR:
+ AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_GENERAL;
+ break;
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
+ break;
+ case RADEON_SURF_MODE_1D:
+ AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
+ break;
+ case RADEON_SURF_MODE_2D:
+ AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
+ break;
+ default:
+ assert(0);
+ }
+
+ /* The format must be set correctly for the allocation of compressed
+ * textures to work. In other cases, setting the bpp is sufficient. */
+ if (compressed) {
+ switch (surf->bpe) {
+ case 8:
+ AddrSurfInfoIn.format = ADDR_FMT_BC1;
+ break;
+ case 16:
+ AddrSurfInfoIn.format = ADDR_FMT_BC3;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ else {
+ AddrSurfInfoIn.bpp = surf->bpe * 8;
+ }
+
+ AddrSurfInfoIn.numSamples = surf->nsamples;
+ AddrSurfInfoIn.tileIndex = -1;
+
+ /* Set the micro tile type. */
+ if (surf->flags & RADEON_SURF_SCANOUT)
+ AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
+ else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
+ AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
+ else
+ AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
+
+ AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
+ AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
+ AddrSurfInfoIn.flags.stencil = (surf->flags & RADEON_SURF_SBUFFER) != 0;
+ AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
+ AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
+ AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
+ AddrSurfInfoIn.flags.degrade4Space = 1;
+
+ /* This disables incorrect calculations (hacks) in addrlib. */
+ AddrSurfInfoIn.flags.noStencil = 1;
+
+ /* Set preferred macrotile parameters. This is usually required
+ * for shared resources. This is for 2D tiling only. */
+ if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 &&
+ surf->bankw && surf->bankh && surf->mtilea && surf->tile_split) {
+ /* If any of these parameters are incorrect, the calculation
+ * will fail. */
+ AddrTileInfoIn.banks = cik_num_banks(ws, surf);
+ AddrTileInfoIn.bankWidth = surf->bankw;
+ AddrTileInfoIn.bankHeight = surf->bankh;
+ AddrTileInfoIn.macroAspectRatio = surf->mtilea;
+ AddrTileInfoIn.tileSplitBytes = surf->tile_split;
+ AddrSurfInfoIn.flags.degrade4Space = 0;
+ AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
+
+ /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
+ * the tile index, because we are expected to know it if
+ * we know the other parameters.
+ *
+ * This is something that can easily be fixed in Addrlib.
+ * For now, just figure it out here.
+ * Note that only 2D_TILE_THIN1 is handled here.
+ */
+ assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
+ assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
+
+ if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
+ AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
+ else
+ AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
+ }
+
+ surf->bo_size = 0;
+
+ /* Calculate texture layout information. */
+ for (level = 0; level <= surf->last_level; level++) {
+ r = compute_level(ws, surf, false, level, type, compressed,
+ &AddrSurfInfoIn, &AddrSurfInfoOut);
+ if (r)
+ return r;
+
+ if (level == 0) {
+ surf->bo_alignment = AddrSurfInfoOut.baseAlign;
+ surf->pipe_config = AddrSurfInfoOut.pTileInfo->pipeConfig - 1;
+
+ /* For 2D modes only. */
+ if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
+ surf->bankw = AddrSurfInfoOut.pTileInfo->bankWidth;
+ surf->bankh = AddrSurfInfoOut.pTileInfo->bankHeight;
+ surf->mtilea = AddrSurfInfoOut.pTileInfo->macroAspectRatio;
+ surf->tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
+ surf->num_banks = AddrSurfInfoOut.pTileInfo->banks;
+ }
+ }
+ }
+
+ /* Calculate texture layout information for stencil. */
+ if (surf->flags & RADEON_SURF_SBUFFER) {
+ AddrSurfInfoIn.bpp = 8;
+ /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
+ AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
+
+ for (level = 0; level <= surf->last_level; level++) {
+ r = compute_level(ws, surf, true, level, type, compressed,
+ &AddrSurfInfoIn, &AddrSurfInfoOut);
+ if (r)
+ return r;
+
+ if (level == 0) {
+ surf->stencil_offset = surf->stencil_level[0].offset;
+
+ /* For 2D modes only. */
+ if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
+ surf->stencil_tile_split =
+ AddrSurfInfoOut.pTileInfo->tileSplitBytes;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_surface_best(struct radeon_winsys *rws,
+ struct radeon_surf *surf)
+{
+ return 0;
+}
+
+void amdgpu_surface_init_functions(struct amdgpu_winsys *ws)
+{
+ ws->base.surface_init = amdgpu_surface_init;
+ ws->base.surface_best = amdgpu_surface_best;
+}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
new file mode 100644
index 00000000000..012c9003b69
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -0,0 +1,503 @@
+/*
+ * Copyright © 2009 Corbin Simpson
+ * Copyright © 2009 Joakim Sindholt
+ * Copyright © 2011 Marek Olšák
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Marek Olšák
+ */
+
+#include "amdgpu_cs.h"
+#include "amdgpu_public.h"
+
+#include "util/u_hash_table.h"
+#include
+#include
+#include
+#include
+#include "amdgpu_id.h"
+
+#define CIK_TILE_MODE_COLOR_2D 14
+
+#define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f)
+#define CIK__PIPE_CONFIG__ADDR_SURF_P2 0
+#define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4
+#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5
+#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6
+#define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14
+#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16
+#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17
+
+static struct util_hash_table *dev_tab = NULL;
+pipe_static_mutex(dev_tab_mutex);
+
+static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
+{
+ unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D];
+
+ switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
+ case CIK__PIPE_CONFIG__ADDR_SURF_P2:
+ default:
+ return 2;
+ case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32:
+ return 4;
+ case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32:
+ return 8;
+ case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
+ case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
+ return 16;
+ }
+}
+
+/* Convert Sea Islands register values GB_ADDR_CFG and MC_ADDR_CFG
+ * into GB_TILING_CONFIG register which is only present on R600-R700. */
+static unsigned r600_get_gb_tiling_config(struct amdgpu_gpu_info *info)
+{
+ unsigned num_pipes = info->gb_addr_cfg & 0x7;
+ unsigned num_banks = info->mc_arb_ramcfg & 0x3;
+ unsigned pipe_interleave_bytes = (info->gb_addr_cfg >> 4) & 0x7;
+ unsigned row_size = (info->gb_addr_cfg >> 28) & 0x3;
+
+ return num_pipes | (num_banks << 4) |
+ (pipe_interleave_bytes << 8) |
+ (row_size << 12);
+}
+
+/* Helper function to do the ioctls needed for setup and init. */
+static boolean do_winsys_init(struct amdgpu_winsys *ws)
+{
+ struct amdgpu_buffer_size_alignments alignment_info = {};
+ struct amdgpu_heap_info vram, gtt;
+ struct drm_amdgpu_info_hw_ip dma = {}, uvd = {}, vce = {};
+ uint32_t vce_version = 0, vce_feature = 0;
+ int r;
+
+ /* Query hardware and driver information. */
+ r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n");
+ goto fail;
+ }
+
+ r = amdgpu_query_buffer_size_alignment(ws->dev, &alignment_info);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n");
+ goto fail;
+ }
+
+ r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
+ goto fail;
+ }
+
+ r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, >t);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
+ goto fail;
+ }
+
+ r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_DMA, 0, &dma);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
+ goto fail;
+ }
+
+ r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_UVD, 0, &uvd);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd) failed.\n");
+ goto fail;
+ }
+
+ r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_VCE, 0, &vce);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vce) failed.\n");
+ goto fail;
+ }
+
+ r = amdgpu_query_firmware_version(ws->dev, AMDGPU_INFO_FW_VCE, 0, 0,
+ &vce_version, &vce_feature);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n");
+ goto fail;
+ }
+
+ /* Set chip identification. */
+ ws->info.pci_id = ws->amdinfo.asic_id; /* TODO: is this correct? */
+ ws->info.vce_harvest_config = ws->amdinfo.vce_harvest_config;
+
+ switch (ws->info.pci_id) {
+#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; break;
+#include "pci_ids/radeonsi_pci_ids.h"
+#undef CHIPSET
+
+ default:
+ fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
+ goto fail;
+ }
+
+ if (ws->info.family >= CHIP_TONGA)
+ ws->info.chip_class = VI;
+ else if (ws->info.family >= CHIP_BONAIRE)
+ ws->info.chip_class = CIK;
+ else {
+ fprintf(stderr, "amdgpu: Unknown family.\n");
+ goto fail;
+ }
+
+ /* LLVM 3.6 is required for VI. */
+ if (ws->info.chip_class >= VI &&
+ (HAVE_LLVM < 0x0306 ||
+ (HAVE_LLVM == 0x0306 && MESA_LLVM_VERSION_PATCH < 1))) {
+ fprintf(stderr, "amdgpu: LLVM 3.6.1 is required, got LLVM %i.%i.%i\n",
+ HAVE_LLVM >> 8, HAVE_LLVM & 255, MESA_LLVM_VERSION_PATCH);
+ goto fail;
+ }
+
+ /* family and rev_id are for addrlib */
+ switch (ws->info.family) {
+ case CHIP_BONAIRE:
+ ws->family = FAMILY_CI;
+ ws->rev_id = CI_BONAIRE_M_A0;
+ break;
+ case CHIP_KAVERI:
+ ws->family = FAMILY_KV;
+ ws->rev_id = KV_SPECTRE_A0;
+ break;
+ case CHIP_KABINI:
+ ws->family = FAMILY_KV;
+ ws->rev_id = KB_KALINDI_A0;
+ break;
+ case CHIP_HAWAII:
+ ws->family = FAMILY_CI;
+ ws->rev_id = CI_HAWAII_P_A0;
+ break;
+ case CHIP_MULLINS:
+ ws->family = FAMILY_KV;
+ ws->rev_id = ML_GODAVARI_A0;
+ break;
+ case CHIP_TONGA:
+ ws->family = FAMILY_VI;
+ ws->rev_id = VI_TONGA_P_A0;
+ break;
+ case CHIP_ICELAND:
+ ws->family = FAMILY_VI;
+ ws->rev_id = VI_ICELAND_M_A0;
+ break;
+ case CHIP_CARRIZO:
+ ws->family = FAMILY_CZ;
+ ws->rev_id = CZ_CARRIZO_A0;
+ break;
+ case CHIP_FIJI:
+ ws->family = FAMILY_VI;
+ ws->rev_id = VI_FIJI_P_A0;
+ break;
+ default:
+ fprintf(stderr, "amdgpu: Unknown family.\n");
+ goto fail;
+ }
+
+ ws->addrlib = amdgpu_addr_create(ws);
+ if (!ws->addrlib) {
+ fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
+ goto fail;
+ }
+
+ /* Set hardware information. */
+ ws->info.gart_size = gtt.heap_size;
+ ws->info.vram_size = vram.heap_size;
+ /* convert the shader clock from KHz to MHz */
+ ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
+ ws->info.max_compute_units = 1; /* TODO */
+ ws->info.max_se = ws->amdinfo.num_shader_engines;
+ ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
+ ws->info.has_uvd = uvd.available_rings != 0;
+ ws->info.vce_fw_version =
+ vce.available_rings ? vce_version : 0;
+ ws->info.has_userptr = TRUE;
+ ws->info.r600_num_backends = ws->amdinfo.rb_pipes;
+ ws->info.r600_clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
+ ws->info.r600_tiling_config = r600_get_gb_tiling_config(&ws->amdinfo);
+ ws->info.r600_num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
+ ws->info.r600_max_pipes = ws->amdinfo.max_quad_shader_pipes; /* TODO: is this correct? */
+ ws->info.r600_virtual_address = TRUE;
+ ws->info.r600_has_dma = dma.available_rings != 0;
+
+ memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
+ sizeof(ws->amdinfo.gb_tile_mode));
+ ws->info.si_tile_mode_array_valid = TRUE;
+ ws->info.si_backend_enabled_mask = ws->amdinfo.enabled_rb_pipes_mask;
+
+ memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode,
+ sizeof(ws->amdinfo.gb_macro_tile_mode));
+ ws->info.cik_macrotile_mode_array_valid = TRUE;
+
+ ws->gart_page_size = alignment_info.size_remote;
+
+ return TRUE;
+
+fail:
+ if (ws->addrlib)
+ AddrDestroy(ws->addrlib);
+ amdgpu_device_deinitialize(ws->dev);
+ ws->dev = NULL;
+ return FALSE;
+}
+
+static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
+{
+ struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+
+ pipe_mutex_destroy(ws->bo_fence_lock);
+
+ ws->cman->destroy(ws->cman);
+ ws->kman->destroy(ws->kman);
+ AddrDestroy(ws->addrlib);
+
+ amdgpu_device_deinitialize(ws->dev);
+ FREE(rws);
+}
+
+static void amdgpu_winsys_query_info(struct radeon_winsys *rws,
+ struct radeon_info *info)
+{
+ *info = ((struct amdgpu_winsys *)rws)->info;
+}
+
+static boolean amdgpu_cs_request_feature(struct radeon_winsys_cs *rcs,
+ enum radeon_feature_id fid,
+ boolean enable)
+{
+ return FALSE;
+}
+
+static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
+ enum radeon_value_id value)
+{
+ struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+ struct amdgpu_heap_info heap;
+ uint64_t retval = 0;
+
+ switch (value) {
+ case RADEON_REQUESTED_VRAM_MEMORY:
+ return ws->allocated_vram;
+ case RADEON_REQUESTED_GTT_MEMORY:
+ return ws->allocated_gtt;
+ case RADEON_BUFFER_WAIT_TIME_NS:
+ return ws->buffer_wait_time;
+ case RADEON_TIMESTAMP:
+ amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
+ return retval;
+ case RADEON_NUM_CS_FLUSHES:
+ return ws->num_cs_flushes;
+ case RADEON_NUM_BYTES_MOVED:
+ amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval);
+ return retval;
+ case RADEON_VRAM_USAGE:
+ amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap);
+ return heap.heap_usage;
+ case RADEON_GTT_USAGE:
+ amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
+ return heap.heap_usage;
+ case RADEON_GPU_TEMPERATURE:
+ case RADEON_CURRENT_SCLK:
+ case RADEON_CURRENT_MCLK:
+ return 0;
+ case RADEON_GPU_RESET_COUNTER:
+ assert(0);
+ return 0;
+ }
+ return 0;
+}
+
+static void amdgpu_read_registers(struct radeon_winsys *rws,
+ unsigned reg_offset,
+ unsigned num_registers, uint32_t *out)
+{
+ struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
+
+ amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
+ 0xffffffff, 0, out);
+}
+
+static unsigned hash_dev(void *key)
+{
+#if defined(PIPE_ARCH_X86_64)
+ return pointer_to_intptr(key) ^ (pointer_to_intptr(key) >> 32);
+#else
+ return pointer_to_intptr(key);
+#endif
+}
+
+static int compare_dev(void *key1, void *key2)
+{
+ return key1 != key2;
+}
+
+static bool amdgpu_winsys_unref(struct radeon_winsys *ws)
+{
+ struct amdgpu_winsys *rws = (struct amdgpu_winsys*)ws;
+ bool destroy;
+
+ /* When the reference counter drops to zero, remove the device pointer
+ * from the table.
+ * This must happen while the mutex is locked, so that
+ * amdgpu_winsys_create in another thread doesn't get the winsys
+ * from the table when the counter drops to 0. */
+ pipe_mutex_lock(dev_tab_mutex);
+
+ destroy = pipe_reference(&rws->reference, NULL);
+ if (destroy && dev_tab)
+ util_hash_table_remove(dev_tab, rws->dev);
+
+ pipe_mutex_unlock(dev_tab_mutex);
+ return destroy;
+}
+
+PUBLIC struct radeon_winsys *
+amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
+{
+ struct amdgpu_winsys *ws;
+ drmVersionPtr version = drmGetVersion(fd);
+ amdgpu_device_handle dev;
+ uint32_t drm_major, drm_minor, r;
+
+ /* The DRM driver version of amdgpu is 3.x.x. */
+ if (version->version_major != 3) {
+ drmFreeVersion(version);
+ return NULL;
+ }
+ drmFreeVersion(version);
+
+ /* Look up the winsys from the dev table. */
+ pipe_mutex_lock(dev_tab_mutex);
+ if (!dev_tab)
+ dev_tab = util_hash_table_create(hash_dev, compare_dev);
+
+ /* Initialize the amdgpu device. This should always return the same pointer
+ * for the same fd. */
+ r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev);
+ if (r) {
+ pipe_mutex_unlock(dev_tab_mutex);
+ fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n");
+ return NULL;
+ }
+
+ /* Lookup a winsys if we have already created one for this device. */
+ ws = util_hash_table_get(dev_tab, dev);
+ if (ws) {
+ pipe_reference(NULL, &ws->reference);
+ pipe_mutex_unlock(dev_tab_mutex);
+ return &ws->base;
+ }
+
+ /* Create a new winsys. */
+ ws = CALLOC_STRUCT(amdgpu_winsys);
+ if (!ws) {
+ pipe_mutex_unlock(dev_tab_mutex);
+ return NULL;
+ }
+
+ ws->dev = dev;
+ ws->info.drm_major = drm_major;
+ ws->info.drm_minor = drm_minor;
+
+ if (!do_winsys_init(ws))
+ goto fail;
+
+ /* Create managers. */
+ ws->kman = amdgpu_bomgr_create(ws);
+ if (!ws->kman)
+ goto fail;
+ ws->cman = pb_cache_manager_create(ws->kman, 500000, 2.0f, 0,
+ (ws->info.vram_size + ws->info.gart_size) / 8);
+ if (!ws->cman)
+ goto fail;
+
+ /* init reference */
+ pipe_reference_init(&ws->reference, 1);
+
+ /* Set functions. */
+ ws->base.unref = amdgpu_winsys_unref;
+ ws->base.destroy = amdgpu_winsys_destroy;
+ ws->base.query_info = amdgpu_winsys_query_info;
+ ws->base.cs_request_feature = amdgpu_cs_request_feature;
+ ws->base.query_value = amdgpu_query_value;
+ ws->base.read_registers = amdgpu_read_registers;
+
+ amdgpu_bomgr_init_functions(ws);
+ amdgpu_cs_init_functions(ws);
+ amdgpu_surface_init_functions(ws);
+
+ pipe_mutex_init(ws->bo_fence_lock);
+
+ /* Create the screen at the end. The winsys must be initialized
+ * completely.
+ *
+ * Alternatively, we could create the screen based on "ws->gen"
+ * and link all drivers into one binary blob. */
+ ws->base.screen = screen_create(&ws->base);
+ if (!ws->base.screen) {
+ amdgpu_winsys_destroy(&ws->base);
+ pipe_mutex_unlock(dev_tab_mutex);
+ return NULL;
+ }
+
+ util_hash_table_set(dev_tab, dev, ws);
+
+ /* We must unlock the mutex once the winsys is fully initialized, so that
+ * other threads attempting to create the winsys from the same fd will
+ * get a fully initialized winsys and not just half-way initialized. */
+ pipe_mutex_unlock(dev_tab_mutex);
+
+ return &ws->base;
+
+fail:
+ pipe_mutex_unlock(dev_tab_mutex);
+ if (ws->cman)
+ ws->cman->destroy(ws->cman);
+ if (ws->kman)
+ ws->kman->destroy(ws->kman);
+ FREE(ws);
+ return NULL;
+}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
new file mode 100644
index 00000000000..4d07644c9ef
--- /dev/null
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2009 Corbin Simpson
+ * Copyright © 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ * Marek Olšák
+ */
+
+#ifndef AMDGPU_WINSYS_H
+#define AMDGPU_WINSYS_H
+
+#include "gallium/drivers/radeon/radeon_winsys.h"
+#include "addrlib/addrinterface.h"
+#include "os/os_thread.h"
+#include
+
+struct amdgpu_cs;
+
+struct amdgpu_winsys {
+ struct radeon_winsys base;
+ struct pipe_reference reference;
+
+ amdgpu_device_handle dev;
+
+ pipe_mutex bo_fence_lock;
+
+ int num_cs; /* The number of command streams created. */
+ uint32_t next_bo_unique_id;
+ uint64_t allocated_vram;
+ uint64_t allocated_gtt;
+ uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
+ uint64_t num_cs_flushes;
+ unsigned gart_page_size;
+
+ struct radeon_info info;
+
+ struct pb_manager *kman;
+ struct pb_manager *cman;
+
+ struct amdgpu_gpu_info amdinfo;
+ ADDR_HANDLE addrlib;
+ uint32_t rev_id;
+ unsigned family;
+};
+
+static inline struct amdgpu_winsys *
+amdgpu_winsys(struct radeon_winsys *base)
+{
+ return (struct amdgpu_winsys*)base;
+}
+
+void amdgpu_surface_init_functions(struct amdgpu_winsys *ws);
+ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws);
+
+#endif
diff --git a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
index 9fedb121565..93ce6f224fe 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
+++ b/src/gallium/winsys/i915/drm/i915_drm_batchbuffer.c
@@ -26,7 +26,7 @@ struct i915_drm_batchbuffer
drm_intel_bo *bo;
};
-static INLINE struct i915_drm_batchbuffer *
+static inline struct i915_drm_batchbuffer *
i915_drm_batchbuffer(struct i915_winsys_batchbuffer *batch)
{
return (struct i915_drm_batchbuffer *)batch;
diff --git a/src/gallium/winsys/i915/drm/i915_drm_winsys.h b/src/gallium/winsys/i915/drm/i915_drm_winsys.h
index 7f0d718bdb7..56b9e150497 100644
--- a/src/gallium/winsys/i915/drm/i915_drm_winsys.h
+++ b/src/gallium/winsys/i915/drm/i915_drm_winsys.h
@@ -28,7 +28,7 @@ struct i915_drm_winsys
drm_intel_bufmgr *gem_manager;
};
-static INLINE struct i915_drm_winsys *
+static inline struct i915_drm_winsys *
i915_drm_winsys(struct i915_winsys *iws)
{
return (struct i915_drm_winsys *)iws;
@@ -58,13 +58,13 @@ struct i915_drm_buffer {
unsigned flink;
};
-static INLINE struct i915_drm_buffer *
+static inline struct i915_drm_buffer *
i915_drm_buffer(struct i915_winsys_buffer *buffer)
{
return (struct i915_drm_buffer *)buffer;
}
-static INLINE drm_intel_bo *
+static inline drm_intel_bo *
intel_bo(struct i915_winsys_buffer *buffer)
{
return i915_drm_buffer(buffer)->bo;
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index 063524655b6..c6603e38a00 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -17,7 +17,7 @@ static struct util_hash_table *fd_tab = NULL;
pipe_static_mutex(nouveau_screen_mutex);
-boolean nouveau_drm_screen_unref(struct nouveau_screen *screen)
+bool nouveau_drm_screen_unref(struct nouveau_screen *screen)
{
int ret;
if (screen->refcount == -1)
@@ -120,7 +120,11 @@ nouveau_drm_screen_create(int fd)
if (!screen)
goto err;
- util_hash_table_set(fd_tab, intptr_to_pointer(fd), screen);
+ /* Use dupfd in hash table, to avoid errors if the original fd gets
+ * closed by its owner. The hash key needs to live at least as long as
+ * the screen.
+ */
+ util_hash_table_set(fd_tab, intptr_to_pointer(dupfd), screen);
screen->refcount = 1;
pipe_mutex_unlock(nouveau_screen_mutex);
return &screen->base;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index fe98870967a..3a9ac445b24 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -44,7 +44,7 @@
static const struct pb_vtbl radeon_bo_vtbl;
-static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
+static inline struct radeon_bo *radeon_bo(struct pb_buffer *bo)
{
assert(bo->vtbl == &radeon_bo_vtbl);
return (struct radeon_bo *)bo;
@@ -78,7 +78,7 @@ struct radeon_bomgr {
struct list_head va_holes;
};
-static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
+static inline struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
{
return (struct radeon_bomgr *)mgr;
}
@@ -101,33 +101,30 @@ static struct radeon_bo *get_radeon_bo(struct pb_buffer *_buf)
return bo;
}
-static void radeon_bo_wait(struct pb_buffer *_buf, enum radeon_bo_usage usage)
+static bool radeon_bo_wait(struct pb_buffer *_buf, uint64_t timeout,
+ enum radeon_bo_usage usage)
{
- struct radeon_bo *bo = get_radeon_bo(_buf);
- struct drm_radeon_gem_wait_idle args = {0};
+ struct radeon_bo *bo = get_radeon_bo(_buf);
- while (p_atomic_read(&bo->num_active_ioctls)) {
- sched_yield();
+ /* Wait if any ioctl is being submitted with this buffer. */
+ if (!os_wait_until_zero(&bo->num_active_ioctls, timeout))
+ return false;
+
+ /* TODO: handle arbitrary timeout */
+ if (!timeout) {
+ struct drm_radeon_gem_busy args = {0};
+
+ args.handle = bo->handle;
+ return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
+ &args, sizeof(args)) == 0;
+ } else {
+ struct drm_radeon_gem_wait_idle args = {0};
+
+ args.handle = bo->handle;
+ while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
+ &args, sizeof(args)) == -EBUSY);
+ return true;
}
-
- args.handle = bo->handle;
- while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
- &args, sizeof(args)) == -EBUSY);
-}
-
-static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
- enum radeon_bo_usage usage)
-{
- struct radeon_bo *bo = get_radeon_bo(_buf);
- struct drm_radeon_gem_busy args = {0};
-
- if (p_atomic_read(&bo->num_active_ioctls)) {
- return TRUE;
- }
-
- args.handle = bo->handle;
- return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
- &args, sizeof(args)) != 0;
}
static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
@@ -305,14 +302,34 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
if (bo->ptr)
os_munmap(bo->ptr, bo->base.size);
+ if (mgr->va) {
+ if (bo->rws->va_unmap_working) {
+ struct drm_radeon_gem_va va;
+
+ va.handle = bo->handle;
+ va.vm_id = 0;
+ va.operation = RADEON_VA_UNMAP;
+ va.flags = RADEON_VM_PAGE_READABLE |
+ RADEON_VM_PAGE_WRITEABLE |
+ RADEON_VM_PAGE_SNOOPED;
+ va.offset = bo->va;
+
+ if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_VA, &va,
+ sizeof(va)) != 0 &&
+ va.operation == RADEON_VA_RESULT_ERROR) {
+ fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
+ fprintf(stderr, "radeon: size : %d bytes\n", bo->base.size);
+ fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
+ }
+ }
+
+ radeon_bomgr_free_va(mgr, bo->va, bo->base.size);
+ }
+
/* Close object. */
args.handle = bo->handle;
drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
- if (mgr->va) {
- radeon_bomgr_free_va(mgr, bo->va, bo->base.size);
- }
-
pipe_mutex_destroy(bo->map_mutex);
if (bo->initial_domain & RADEON_DOMAIN_VRAM)
@@ -331,14 +348,11 @@ void *radeon_bo_do_map(struct radeon_bo *bo)
if (bo->user_ptr)
return bo->user_ptr;
- /* Return the pointer if it's already mapped. */
- if (bo->ptr)
- return bo->ptr;
-
/* Map the buffer. */
pipe_mutex_lock(bo->map_mutex);
- /* Return the pointer if it's already mapped (in case of a race). */
+ /* Return the pointer if it's already mapped. */
if (bo->ptr) {
+ bo->map_count++;
pipe_mutex_unlock(bo->map_mutex);
return bo->ptr;
}
@@ -363,6 +377,7 @@ void *radeon_bo_do_map(struct radeon_bo *bo)
return NULL;
}
bo->ptr = ptr;
+ bo->map_count = 1;
pipe_mutex_unlock(bo->map_mutex);
return bo->ptr;
@@ -392,8 +407,8 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
return NULL;
}
- if (radeon_bo_is_busy((struct pb_buffer*)bo,
- RADEON_USAGE_WRITE)) {
+ if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
+ RADEON_USAGE_WRITE)) {
return NULL;
}
} else {
@@ -402,8 +417,8 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
return NULL;
}
- if (radeon_bo_is_busy((struct pb_buffer*)bo,
- RADEON_USAGE_READWRITE)) {
+ if (!radeon_bo_wait((struct pb_buffer*)bo, 0,
+ RADEON_USAGE_READWRITE)) {
return NULL;
}
}
@@ -421,7 +436,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
cs->flush_cs(cs->flush_data, 0, NULL);
}
- radeon_bo_wait((struct pb_buffer*)bo,
+ radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
RADEON_USAGE_WRITE);
} else {
/* Mapping for write. */
@@ -435,7 +450,8 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
}
}
- radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
+ radeon_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE,
+ RADEON_USAGE_READWRITE);
}
bo->mgr->rws->buffer_wait_time += os_time_get_nano() - time;
@@ -447,7 +463,26 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
static void radeon_bo_unmap(struct radeon_winsys_cs_handle *_buf)
{
- /* NOP */
+ struct radeon_bo *bo = (struct radeon_bo*)_buf;
+
+ if (bo->user_ptr)
+ return;
+
+ pipe_mutex_lock(bo->map_mutex);
+ if (!bo->ptr) {
+ pipe_mutex_unlock(bo->map_mutex);
+ return; /* it's not been mapped */
+ }
+
+ assert(bo->map_count);
+ if (--bo->map_count) {
+ pipe_mutex_unlock(bo->map_mutex);
+ return; /* it's been mapped multiple times */
+ }
+
+ os_munmap(bo->ptr, bo->base.size);
+ bo->ptr = NULL;
+ pipe_mutex_unlock(bo->map_mutex);
}
static void radeon_bo_get_base_buffer(struct pb_buffer *buf,
@@ -607,7 +642,7 @@ static boolean radeon_bomgr_is_buffer_busy(struct pb_manager *_mgr,
return TRUE;
}
- if (radeon_bo_is_busy((struct pb_buffer*)bo, RADEON_USAGE_READWRITE)) {
+ if (!radeon_bo_wait((struct pb_buffer*)bo, 0, RADEON_USAGE_READWRITE)) {
return TRUE;
}
@@ -739,10 +774,11 @@ static void radeon_bo_set_tiling(struct pb_buffer *_buf,
struct radeon_winsys_cs *rcs,
enum radeon_bo_layout microtiled,
enum radeon_bo_layout macrotiled,
+ unsigned pipe_config,
unsigned bankw, unsigned bankh,
unsigned tile_split,
unsigned stencil_tile_split,
- unsigned mtilea,
+ unsigned mtilea, unsigned num_banks,
uint32_t pitch,
bool scanout)
{
@@ -758,9 +794,7 @@ static void radeon_bo_set_tiling(struct pb_buffer *_buf,
cs->flush_cs(cs->flush_data, 0, NULL);
}
- while (p_atomic_read(&bo->num_active_ioctls)) {
- sched_yield();
- }
+ os_wait_until_zero(&bo->num_active_ioctls, PIPE_TIMEOUT_INFINITE);
if (microtiled == RADEON_LAYOUT_TILED)
args.tiling_flags |= RADEON_TILING_MICRO;
@@ -820,6 +854,12 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
memset(&desc, 0, sizeof(desc));
desc.base.alignment = alignment;
+ /* Align size to page size. This is the minimum alignment for normal
+ * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
+ * like constant/uniform buffers, can benefit from better and more reuse.
+ */
+ size = align(size, 4096);
+
/* Only set one usage bit each for domains and flags, or the cache manager
* might consider different sets of domains / flags compatible
*/
@@ -1125,7 +1165,6 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
ws->base.buffer_map = radeon_bo_map;
ws->base.buffer_unmap = radeon_bo_unmap;
ws->base.buffer_wait = radeon_bo_wait;
- ws->base.buffer_is_busy = radeon_bo_is_busy;
ws->base.buffer_create = radeon_winsys_bo_create;
ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index b83ce168b4e..f8f50cc5d5b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -54,6 +54,7 @@ struct radeon_bo {
void *ptr;
pipe_mutex map_mutex;
+ unsigned map_count;
uint32_t handle;
uint32_t flink_name;
@@ -71,7 +72,7 @@ struct radeon_bo {
struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws);
void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws);
-static INLINE
+static inline
void radeon_bo_reference(struct radeon_bo **dst, struct radeon_bo *src)
{
pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index ecf89578c68..7a267f9acbf 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -80,22 +80,39 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
static void radeon_fence_reference(struct pipe_fence_handle **dst,
struct pipe_fence_handle *src);
+static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
+{
+ /* No context support here. Just return the winsys pointer
+ * as the "context". */
+ return (struct radeon_winsys_ctx*)ws;
+}
+
+static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
+{
+ /* No context support here. */
+}
+
static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
struct radeon_drm_winsys *ws)
{
int i;
+ csc->buf = MALLOC(ws->ib_max_size);
+ if (!csc->buf)
+ return FALSE;
csc->fd = ws->fd;
csc->nrelocs = 512;
csc->relocs_bo = (struct radeon_bo**)
CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
if (!csc->relocs_bo) {
+ FREE(csc->buf);
return FALSE;
}
csc->relocs = (struct drm_radeon_cs_reloc*)
CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
if (!csc->relocs) {
+ FREE(csc->buf);
FREE(csc->relocs_bo);
return FALSE;
}
@@ -148,18 +165,19 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
radeon_cs_context_cleanup(csc);
FREE(csc->relocs_bo);
FREE(csc->relocs);
+ FREE(csc->buf);
}
static struct radeon_winsys_cs *
-radeon_drm_cs_create(struct radeon_winsys *rws,
+radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
enum ring_type ring_type,
void (*flush)(void *ctx, unsigned flags,
struct pipe_fence_handle **fence),
void *flush_ctx,
struct radeon_winsys_cs_handle *trace_buf)
{
- struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
+ struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
struct radeon_drm_cs *cs;
cs = CALLOC_STRUCT(radeon_drm_cs);
@@ -188,6 +206,7 @@ radeon_drm_cs_create(struct radeon_winsys *rws,
cs->cst = &cs->csc2;
cs->base.buf = cs->csc->buf;
cs->base.ring_type = ring_type;
+ cs->base.max_dw = ws->ib_max_size / 4;
p_atomic_inc(&ws->num_cs);
return &cs->base;
@@ -195,7 +214,7 @@ radeon_drm_cs_create(struct radeon_winsys *rws,
#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
-static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc,
+static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
enum radeon_bo_domain rd,
enum radeon_bo_domain wd,
unsigned priority,
@@ -372,20 +391,29 @@ static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
{
struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
- boolean status =
- (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
- (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
- return status;
+ vram += cs->csc->used_vram;
+ gtt += cs->csc->used_gart;
+
+ /* Anything that goes above the VRAM size should go to GTT. */
+ if (vram > cs->ws->info.vram_size)
+ gtt += vram - cs->ws->info.vram_size;
+
+ /* Now we just need to check if we have enough GTT. */
+ return gtt < cs->ws->info.gart_size * 0.7;
}
void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
{
unsigned i;
+ int r;
- if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
- &csc->cs, sizeof(struct drm_radeon_cs))) {
- if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
+ r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
+ &csc->cs, sizeof(struct drm_radeon_cs));
+ if (r) {
+ if (r == -ENOMEM)
+ fprintf(stderr, "radeon: Not enough memory for command submission.\n");
+ else if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
unsigned i;
fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
@@ -467,7 +495,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
break;
}
- if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
+ if (rcs->cdw > rcs->max_dw) {
fprintf(stderr, "radeon: command stream overflowed\n");
}
@@ -486,7 +514,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
cs->cst->cs_trace_id = cs_trace_id;
/* If the CS is not empty or overflowed, emit it in a separate thread. */
- if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
+ if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
unsigned i, crelocs;
crelocs = cs->cst->crelocs;
@@ -522,6 +550,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
default:
case RING_GFX:
+ case RING_COMPUTE:
cs->cst->flags[0] = 0;
cs->cst->flags[1] = RADEON_CS_RING_GFX;
cs->cst->cs.num_chunks = 2;
@@ -537,7 +566,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
cs->cst->cs.num_chunks = 3;
}
- if (flags & RADEON_FLUSH_COMPUTE) {
+ if (cs->base.ring_type == RING_COMPUTE) {
cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
cs->cst->cs.num_chunks = 3;
}
@@ -625,7 +654,7 @@ static bool radeon_fence_wait(struct radeon_winsys *ws,
struct pb_buffer *rfence = (struct pb_buffer*)fence;
if (timeout == 0)
- return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
+ return ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE);
if (timeout != PIPE_TIMEOUT_INFINITE) {
int64_t start_time = os_time_get();
@@ -634,7 +663,7 @@ static bool radeon_fence_wait(struct radeon_winsys *ws,
timeout /= 1000;
/* Wait in a loop. */
- while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
+ while (!ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE)) {
if (os_time_get() - start_time >= timeout) {
return FALSE;
}
@@ -643,7 +672,7 @@ static bool radeon_fence_wait(struct radeon_winsys *ws,
return TRUE;
}
- ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
+ ws->buffer_wait(rfence, PIPE_TIMEOUT_INFINITE, RADEON_USAGE_READWRITE);
return TRUE;
}
@@ -655,6 +684,8 @@ static void radeon_fence_reference(struct pipe_fence_handle **dst,
void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
{
+ ws->base.ctx_create = radeon_drm_ctx_create;
+ ws->base.ctx_destroy = radeon_drm_ctx_destroy;
ws->base.cs_create = radeon_drm_cs_create;
ws->base.cs_destroy = radeon_drm_cs_destroy;
ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index fcc29fe9480..ab154945880 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -30,7 +30,7 @@
#include "radeon_drm_bo.h"
struct radeon_cs_context {
- uint32_t buf[RADEON_MAX_CMDBUF_DWORDS];
+ uint32_t *buf;
int fd;
struct drm_radeon_cs cs;
@@ -79,13 +79,13 @@ struct radeon_drm_cs {
int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo);
-static INLINE struct radeon_drm_cs *
+static inline struct radeon_drm_cs *
radeon_drm_cs(struct radeon_winsys_cs *base)
{
return (struct radeon_drm_cs*)base;
}
-static INLINE boolean
+static inline boolean
radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
struct radeon_bo *bo)
{
@@ -94,7 +94,7 @@ radeon_bo_is_referenced_by_cs(struct radeon_drm_cs *cs,
(num_refs && radeon_get_reloc(cs->csc, bo) != -1);
}
-static INLINE boolean
+static inline boolean
radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
struct radeon_bo *bo)
{
@@ -110,7 +110,7 @@ radeon_bo_is_referenced_by_cs_for_write(struct radeon_drm_cs *cs,
return cs->csc->relocs[index].write_domain != 0;
}
-static INLINE boolean
+static inline boolean
radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
{
return bo->num_cs_references != 0;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index ba8d1437b6f..b70bbaa54a3 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -57,6 +57,12 @@
#define RADEON_INFO_READ_REG 0x24
#endif
+#define RADEON_INFO_VA_UNMAP_WORKING 0x25
+
+#ifndef RADEON_INFO_GPU_RESET_COUNTER
+#define RADEON_INFO_GPU_RESET_COUNTER 0x26
+#endif
+
static struct util_hash_table *fd_tab = NULL;
pipe_static_mutex(fd_tab_mutex);
@@ -389,16 +395,22 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
}
ws->info.r600_virtual_address = FALSE;
- if (ws->info.drm_minor >= 13) {
- uint32_t ib_vm_max_size;
+ ws->ib_max_size = 64 * 1024;
+ if (ws->info.drm_minor >= 13) {
ws->info.r600_virtual_address = TRUE;
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
&ws->va_start))
ws->info.r600_virtual_address = FALSE;
- if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
- &ib_vm_max_size))
+
+ if (radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
+ &ws->ib_max_size))
+ ws->ib_max_size *= 4; /* the kernel returns the size in dwords */
+ else
ws->info.r600_virtual_address = FALSE;
+
+ radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
+ &ws->va_unmap_working);
}
if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", FALSE))
ws->info.r600_virtual_address = FALSE;
@@ -484,6 +496,10 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
if (ws->gen >= DRV_R600) {
radeon_surface_manager_free(ws->surf_man);
}
+
+ if (ws->fd >= 0)
+ close(ws->fd);
+
FREE(rws);
}
@@ -563,6 +579,10 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws,
radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK,
"current-gpu-mclk", (uint32_t*)&retval);
return retval;
+ case RADEON_GPU_RESET_COUNTER:
+ radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER,
+ "gpu-reset-counter", (uint32_t*)&retval);
+ return retval;
}
return 0;
}
@@ -696,7 +716,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
return NULL;
}
- ws->fd = fd;
+ ws->fd = dup(fd);
if (!do_winsys_init(ws))
goto fail;
@@ -706,13 +726,13 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
if (!ws->kman)
goto fail;
- ws->cman = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0,
+ ws->cman = pb_cache_manager_create(ws->kman, 500000, 2.0f, 0,
MIN2(ws->info.vram_size, ws->info.gart_size));
if (!ws->cman)
goto fail;
if (ws->gen >= DRV_R600) {
- ws->surf_man = radeon_surface_manager_new(fd);
+ ws->surf_man = radeon_surface_manager_new(ws->fd);
if (!ws->surf_man)
goto fail;
}
@@ -753,7 +773,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
return NULL;
}
- util_hash_table_set(fd_tab, intptr_to_pointer(fd), ws);
+ util_hash_table_set(fd_tab, intptr_to_pointer(ws->fd), ws);
/* We must unlock the mutex once the winsys is fully initialized, so that
* other threads attempting to create the winsys from the same fd will
@@ -770,6 +790,9 @@ fail:
ws->kman->destroy(ws->kman);
if (ws->surf_man)
radeon_surface_manager_free(ws->surf_man);
+ if (ws->fd >= 0)
+ close(ws->fd);
+
FREE(ws);
return NULL;
}
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index 166b6b93d28..c1a8d6ae564 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -73,7 +73,9 @@ struct radeon_drm_winsys {
enum radeon_generation gen;
struct radeon_info info;
+ uint32_t ib_max_size;
uint32_t va_start;
+ uint32_t va_unmap_working;
uint32_t accel_working2;
struct pb_manager *kman;
@@ -96,7 +98,7 @@ struct radeon_drm_winsys {
struct radeon_drm_cs *cs_stack[RING_LAST];
};
-static INLINE struct radeon_drm_winsys *
+static inline struct radeon_drm_winsys *
radeon_drm_winsys(struct radeon_winsys *base)
{
return (struct radeon_drm_winsys*)base;
diff --git a/src/gallium/winsys/svga/drm/SConscript b/src/gallium/winsys/svga/drm/SConscript
index 099acdac8c0..25850531d31 100644
--- a/src/gallium/winsys/svga/drm/SConscript
+++ b/src/gallium/winsys/svga/drm/SConscript
@@ -8,7 +8,6 @@ if env['gcc'] or env['clang'] or env['icc']:
env.Append(CCFLAGS = ['-fvisibility=hidden'])
env.Append(CPPDEFINES = [
'HAVE_STDINT_H',
- 'HAVE_SYS_TYPES_H',
'-D_FILE_OFFSET_BITS=64',
])
diff --git a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
index fceb0897058..5ef95f3d6a9 100644
--- a/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
+++ b/src/gallium/winsys/svga/drm/pb_buffer_simple_fenced.c
@@ -127,7 +127,7 @@ struct fenced_buffer
};
-static INLINE struct fenced_manager *
+static inline struct fenced_manager *
fenced_manager(struct pb_manager *mgr)
{
assert(mgr);
@@ -135,7 +135,7 @@ fenced_manager(struct pb_manager *mgr)
}
-static INLINE struct fenced_buffer *
+static inline struct fenced_buffer *
fenced_buffer(struct pb_buffer *buf)
{
assert(buf);
@@ -204,7 +204,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
}
-static INLINE void
+static inline void
fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
@@ -228,7 +228,7 @@ fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr,
*
* Reference count should be incremented before calling this function.
*/
-static INLINE void
+static inline void
fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
@@ -252,7 +252,7 @@ fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
*
* Returns TRUE if the buffer was detroyed.
*/
-static INLINE boolean
+static inline boolean
fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
@@ -289,7 +289,7 @@ fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
* This function will release and re-acquire the mutex, so any copy of mutable
* state must be discarded after calling it.
*/
-static INLINE enum pipe_error
+static inline enum pipe_error
fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf)
{
@@ -430,7 +430,7 @@ fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf)
* This function is a shorthand around pb_manager::create_buffer for
* fenced_buffer_create_gpu_storage_locked()'s benefit.
*/
-static INLINE boolean
+static inline boolean
fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr,
struct fenced_buffer *fenced_buf,
const struct pb_desc *desc)
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.c b/src/gallium/winsys/svga/drm/vmw_buffer.c
index c516054b7fc..7eab3d050e4 100644
--- a/src/gallium/winsys/svga/drm/vmw_buffer.c
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.c
@@ -69,7 +69,7 @@ struct vmw_gmr_buffer
extern const struct pb_vtbl vmw_gmr_buffer_vtbl;
-static INLINE struct vmw_gmr_buffer *
+static inline struct vmw_gmr_buffer *
vmw_gmr_buffer(struct pb_buffer *buf)
{
assert(buf);
@@ -86,7 +86,7 @@ struct vmw_gmr_bufmgr
};
-static INLINE struct vmw_gmr_bufmgr *
+static inline struct vmw_gmr_bufmgr *
vmw_gmr_bufmgr(struct pb_manager *mgr)
{
assert(mgr);
diff --git a/src/gallium/winsys/svga/drm/vmw_buffer.h b/src/gallium/winsys/svga/drm/vmw_buffer.h
index e0bb8085a48..b9cbb25541f 100644
--- a/src/gallium/winsys/svga/drm/vmw_buffer.h
+++ b/src/gallium/winsys/svga/drm/vmw_buffer.h
@@ -59,7 +59,7 @@ struct debug_flush_buf *
vmw_debug_flush_buf(struct svga_winsys_buffer *buffer);
#else
-static INLINE struct pb_buffer *
+static inline struct pb_buffer *
vmw_pb_buffer(struct svga_winsys_buffer *buffer)
{
assert(buffer);
@@ -67,7 +67,7 @@ vmw_pb_buffer(struct svga_winsys_buffer *buffer)
}
-static INLINE struct svga_winsys_buffer *
+static inline struct svga_winsys_buffer *
vmw_svga_winsys_buffer_wrap(struct pb_buffer *buffer)
{
return (struct svga_winsys_buffer *)buffer;
diff --git a/src/gallium/winsys/svga/drm/vmw_context.c b/src/gallium/winsys/svga/drm/vmw_context.c
index 4e1c41db886..31bedde7c41 100644
--- a/src/gallium/winsys/svga/drm/vmw_context.c
+++ b/src/gallium/winsys/svga/drm/vmw_context.c
@@ -152,7 +152,7 @@ struct vmw_svga_winsys_context
};
-static INLINE struct vmw_svga_winsys_context *
+static inline struct vmw_svga_winsys_context *
vmw_svga_winsys_context(struct svga_winsys_context *swc)
{
assert(swc);
@@ -160,7 +160,7 @@ vmw_svga_winsys_context(struct svga_winsys_context *swc)
}
-static INLINE unsigned
+static inline unsigned
vmw_translate_to_pb_flags(unsigned flags)
{
unsigned f = 0;
diff --git a/src/gallium/winsys/svga/drm/vmw_fence.c b/src/gallium/winsys/svga/drm/vmw_fence.c
index 1b24239a7ce..17822ce27fd 100644
--- a/src/gallium/winsys/svga/drm/vmw_fence.c
+++ b/src/gallium/winsys/svga/drm/vmw_fence.c
@@ -67,7 +67,7 @@ struct vmw_fence
* @ops: Pointer to a struct pb_fence_ops.
*
*/
-static INLINE boolean
+static inline boolean
vmw_fence_seq_is_signaled(uint32_t seq, uint32_t last, uint32_t cur)
{
return (cur - last <= cur - seq);
@@ -81,7 +81,7 @@ vmw_fence_seq_is_signaled(uint32_t seq, uint32_t last, uint32_t cur)
* @ops: Pointer to a struct pb_fence_ops.
*
*/
-static INLINE struct vmw_fence_ops *
+static inline struct vmw_fence_ops *
vmw_fence_ops(struct pb_fence_ops *ops)
{
assert(ops);
@@ -162,7 +162,7 @@ out_unlock:
*
* @fence: The opaque pipe fence handle.
*/
-static INLINE struct vmw_fence *
+static inline struct vmw_fence *
vmw_fence(struct pipe_fence_handle *fence)
{
return (struct vmw_fence *) fence;
diff --git a/src/gallium/winsys/svga/drm/vmw_screen.h b/src/gallium/winsys/svga/drm/vmw_screen.h
index fd76e614a5e..ce98db9b397 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen.h
+++ b/src/gallium/winsys/svga/drm/vmw_screen.h
@@ -102,7 +102,7 @@ struct vmw_winsys_screen
};
-static INLINE struct vmw_winsys_screen *
+static inline struct vmw_winsys_screen *
vmw_winsys_screen(struct svga_winsys_screen *base)
{
return (struct vmw_winsys_screen *)base;
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_dri.c b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
index 9f335900e68..e70e0fec4a3 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
@@ -126,7 +126,7 @@ out_no_vws:
return NULL;
}
-static INLINE boolean
+static inline boolean
vmw_dri1_intersect_src_bbox(struct drm_clip_rect *dst,
int dst_x,
int dst_y,
diff --git a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
index 14c3b2068c6..e2f0da58bf9 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_ioctl.c
@@ -650,7 +650,7 @@ vmw_ioctl_fence_unref(struct vmw_winsys_screen *vws,
vmw_error("%s Failed\n", __FUNCTION__);
}
-static INLINE uint32_t
+static inline uint32_t
vmw_drm_fence_flags(uint32_t flags)
{
uint32_t dflags = 0;
diff --git a/src/gallium/winsys/svga/drm/vmw_shader.h b/src/gallium/winsys/svga/drm/vmw_shader.h
index 1fd8c3311f9..28f99717391 100644
--- a/src/gallium/winsys/svga/drm/vmw_shader.h
+++ b/src/gallium/winsys/svga/drm/vmw_shader.h
@@ -47,14 +47,14 @@ struct vmw_svga_winsys_shader
uint32_t shid;
};
-static INLINE struct svga_winsys_gb_shader *
+static inline struct svga_winsys_gb_shader *
svga_winsys_shader(struct vmw_svga_winsys_shader *shader)
{
assert(!shader || shader->shid != SVGA3D_INVALID_ID);
return (struct svga_winsys_gb_shader *)shader;
}
-static INLINE struct vmw_svga_winsys_shader *
+static inline struct vmw_svga_winsys_shader *
vmw_svga_winsys_shader(struct svga_winsys_gb_shader *shader)
{
return (struct vmw_svga_winsys_shader *)shader;
diff --git a/src/gallium/winsys/svga/drm/vmw_surface.h b/src/gallium/winsys/svga/drm/vmw_surface.h
index e44d0554fbc..1291f380aa2 100644
--- a/src/gallium/winsys/svga/drm/vmw_surface.h
+++ b/src/gallium/winsys/svga/drm/vmw_surface.h
@@ -68,7 +68,7 @@ struct vmw_svga_winsys_surface
};
-static INLINE struct svga_winsys_surface *
+static inline struct svga_winsys_surface *
svga_winsys_surface(struct vmw_svga_winsys_surface *surf)
{
assert(!surf || surf->sid != SVGA3D_INVALID_ID);
@@ -76,7 +76,7 @@ svga_winsys_surface(struct vmw_svga_winsys_surface *surf)
}
-static INLINE struct vmw_svga_winsys_surface *
+static inline struct vmw_svga_winsys_surface *
vmw_svga_winsys_surface(struct svga_winsys_surface *surf)
{
return (struct vmw_svga_winsys_surface *)surf;
diff --git a/src/gallium/winsys/sw/dri/dri_sw_winsys.c b/src/gallium/winsys/sw/dri/dri_sw_winsys.c
index 6fed22bbd7c..8451d832806 100644
--- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c
@@ -55,13 +55,13 @@ struct dri_sw_winsys
struct drisw_loader_funcs *lf;
};
-static INLINE struct dri_sw_displaytarget *
+static inline struct dri_sw_displaytarget *
dri_sw_displaytarget( struct sw_displaytarget *dt )
{
return (struct dri_sw_displaytarget *)dt;
}
-static INLINE struct dri_sw_winsys *
+static inline struct dri_sw_winsys *
dri_sw_winsys( struct sw_winsys *ws )
{
return (struct dri_sw_winsys *)ws;
diff --git a/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c b/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c
index aae3ec55a25..dc725f4b90c 100644
--- a/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c
+++ b/src/gallium/winsys/sw/gdi/gdi_sw_winsys.c
@@ -62,7 +62,7 @@ struct gdi_sw_displaytarget
/** Cast wrapper */
-static INLINE struct gdi_sw_displaytarget *
+static inline struct gdi_sw_displaytarget *
gdi_sw_displaytarget( struct sw_displaytarget *buf )
{
return (struct gdi_sw_displaytarget *)buf;
diff --git a/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c b/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c
index a71d2a76791..89dd5471b09 100644
--- a/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c
+++ b/src/gallium/winsys/sw/hgl/hgl_sw_winsys.c
@@ -67,7 +67,7 @@ struct haiku_displaytarget
// Cast
-static INLINE struct haiku_displaytarget*
+static inline struct haiku_displaytarget*
hgl_sw_displaytarget(struct sw_displaytarget* target)
{
return (struct haiku_displaytarget *)target;
diff --git a/src/gallium/winsys/sw/kms-dri/SConscript b/src/gallium/winsys/sw/kms-dri/SConscript
deleted file mode 100644
index e7dd721dd13..00000000000
--- a/src/gallium/winsys/sw/kms-dri/SConscript
+++ /dev/null
@@ -1,23 +0,0 @@
-#######################################################################
-# SConscript for kms-dri winsys
-
-
-Import('*')
-
-if env['platform'] not in ('linux'):
- Return()
-
-env = env.Clone()
-
-env.PkgUseModules('DRM')
-
-env.Append(CPPPATH = [
- '#/src/gallium/include',
- '#/src/gallium/auxiliary',
-])
-
-ws_kms_dri = env.ConvenienceLibrary(
- target = 'ws_kms_dri',
- source = env.ParseSourceList('Makefile.sources', 'C_SOURCES'),
-)
-Export('ws_kms_dri')
diff --git a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
index 740b9201140..900c49f83e6 100644
--- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
@@ -83,13 +83,13 @@ struct kms_sw_winsys
struct list_head bo_list;
};
-static INLINE struct kms_sw_displaytarget *
+static inline struct kms_sw_displaytarget *
kms_sw_displaytarget( struct sw_displaytarget *dt )
{
return (struct kms_sw_displaytarget *)dt;
}
-static INLINE struct kms_sw_winsys *
+static inline struct kms_sw_winsys *
kms_sw_winsys( struct sw_winsys *ws )
{
return (struct kms_sw_winsys *)ws;
diff --git a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
index a6bf4985e1e..9b90eaa018b 100644
--- a/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
+++ b/src/gallium/winsys/sw/wrapper/wrapper_sw_winsys.c
@@ -66,13 +66,13 @@ struct wrapper_sw_displaytarget
void *ptr;
};
-static INLINE struct wrapper_sw_winsys *
+static inline struct wrapper_sw_winsys *
wrapper_sw_winsys(struct sw_winsys *ws)
{
return (struct wrapper_sw_winsys *)ws;
}
-static INLINE struct wrapper_sw_displaytarget *
+static inline struct wrapper_sw_displaytarget *
wrapper_sw_displaytarget(struct sw_displaytarget *dt)
{
return (struct wrapper_sw_displaytarget *)dt;
diff --git a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c
index 88310718049..515ecd9f7b7 100644
--- a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c
+++ b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c
@@ -92,7 +92,7 @@ struct xlib_sw_winsys
/** Cast wrapper */
-static INLINE struct xlib_displaytarget *
+static inline struct xlib_displaytarget *
xlib_displaytarget(struct sw_displaytarget *dt)
{
return (struct xlib_displaytarget *) dt;
diff --git a/src/gbm/Makefile.am b/src/gbm/Makefile.am
index 918fdf7d6ad..9a584cab352 100644
--- a/src/gbm/Makefile.am
+++ b/src/gbm/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = main/gbm.pc
@@ -41,18 +39,15 @@ libgbm_la_LIBADD += $(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.l
endif
if HAVE_DRI2
-noinst_LTLIBRARIES = libgbm_dri.la
-libgbm_dri_la_SOURCES = \
+libgbm_la_SOURCES += \
backends/dri/gbm_dri.c \
backends/dri/gbm_driint.h
-libgbm_dri_la_CFLAGS = \
- $(AM_CFLAGS) \
+AM_CFLAGS += \
-DDEFAULT_DRIVER_DIR='"$(DRI_DRIVER_SEARCH_DIR)"' \
$(LIBDRM_CFLAGS)
libgbm_la_LIBADD += \
- libgbm_dri.la \
$(LIBDRM_LIBS)
endif
diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am
index 74da9e5b979..2ab40506e97 100644
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -19,8 +19,6 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
-AUTOMAKE_OPTIONS = subdir-objects
-
AM_CPPFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index a234ac6f8e2..b491ad4d36f 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -33,6 +33,7 @@ NIR_FILES = \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
nir/nir_lower_global_vars_to_local.c \
+ nir/nir_lower_load_const_to_scalar.c \
nir/nir_lower_locals_to_regs.c \
nir/nir_lower_idiv.c \
nir/nir_lower_io.c \
@@ -55,6 +56,7 @@ NIR_FILES = \
nir/nir_opt_peephole_ffma.c \
nir/nir_opt_peephole_select.c \
nir/nir_opt_remove_phis.c \
+ nir/nir_opt_undef.c \
nir/nir_print.c \
nir/nir_remove_dead_variables.c \
nir/nir_search.c \
@@ -157,6 +159,8 @@ LIBGLSL_FILES = \
lower_packed_varyings.cpp \
lower_named_interface_blocks.cpp \
lower_packing_builtins.cpp \
+ lower_subroutine.cpp \
+ lower_tess_level.cpp \
lower_texture_projection.cpp \
lower_variable_index_to_cond_assign.cpp \
lower_vec_index_to_cond_assign.cpp \
diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index c52e518334d..eb6d8461671 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -304,6 +304,16 @@ private:
* Is this function call actually a constructor?
*/
bool cons;
+ ir_rvalue *
+ handle_method(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state);
+};
+
+class ast_subroutine_list : public ast_node
+{
+public:
+ virtual void print(void) const;
+ exec_list declarations;
};
class ast_array_specifier : public ast_node {
@@ -434,7 +444,9 @@ struct ast_type_qualifier {
unsigned out:1;
unsigned centroid:1;
unsigned sample:1;
+ unsigned patch:1;
unsigned uniform:1;
+ unsigned buffer:1;
unsigned smooth:1;
unsigned flat:1;
unsigned noperspective:1;
@@ -518,6 +530,22 @@ struct ast_type_qualifier {
/** \name Vulkan qualifiers */
unsigned vk_set:1;
+ /** \name Layout qualifiers for GL_ARB_tessellation_shader */
+ /** \{ */
+ /* tess eval input layout */
+ /* gs prim_type reused for primitive mode */
+ unsigned vertex_spacing:1;
+ unsigned ordering:1;
+ unsigned point_mode:1;
+ /* tess control output layout */
+ unsigned vertices:1;
+ /** \} */
+
+ /** \name Qualifiers for GL_ARB_shader_subroutine */
+ /** \{ */
+ unsigned subroutine:1; /**< Is this marked 'subroutine' */
+ unsigned subroutine_def:1; /**< Is this marked 'subroutine' with a list of types */
+ /** \} */
}
/** \brief Set of flags, accessed by name. */
q;
@@ -553,7 +581,10 @@ struct ast_type_qualifier {
/** Stream in GLSL 1.50 geometry shaders. */
unsigned stream;
- /** Input or output primitive type in GLSL 1.50 geometry shaders */
+ /**
+ * Input or output primitive type in GLSL 1.50 geometry shaders
+ * and tessellation shaders.
+ */
GLenum prim_type;
/**
@@ -580,6 +611,18 @@ struct ast_type_qualifier {
*/
int local_size[3];
+ /** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */
+ GLenum vertex_spacing;
+
+ /** Tessellation evaluation shader: vertex ordering (CW or CCW) */
+ GLenum ordering;
+
+ /** Tessellation evaluation shader: point mode */
+ bool point_mode;
+
+ /** Tessellation control shader: number of output vertices */
+ int vertices;
+
/**
* Image format specified with an ARB_shader_image_load_store
* layout qualifier.
@@ -640,11 +683,17 @@ struct ast_type_qualifier {
_mesa_glsl_parse_state *state,
ast_type_qualifier q);
+ bool merge_out_qualifier(YYLTYPE *loc,
+ _mesa_glsl_parse_state *state,
+ ast_type_qualifier q,
+ ast_node* &node);
+
bool merge_in_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
ast_type_qualifier q,
ast_node* &node);
+ ast_subroutine_list *subroutine_list;
};
class ast_declarator_list;
@@ -1039,6 +1088,27 @@ public:
};
+/**
+ * AST node representing a declaration of the output layout for tessellation
+ * control shaders.
+ */
+class ast_tcs_output_layout : public ast_node
+{
+public:
+ ast_tcs_output_layout(const struct YYLTYPE &locp, int vertices)
+ : vertices(vertices)
+ {
+ set_location(locp);
+ }
+
+ virtual ir_rvalue *hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state);
+
+private:
+ const int vertices;
+};
+
+
/**
* AST node representing a declaration of the input layout for geometry
* shaders.
diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index 752d86f72fd..27e84d101ec 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -107,6 +107,33 @@ update_max_array_access(ir_rvalue *ir, int idx, YYLTYPE *loc,
}
+static int
+get_implicit_array_size(struct _mesa_glsl_parse_state *state,
+ ir_rvalue *array)
+{
+ ir_variable *var = array->variable_referenced();
+
+ /* Inputs in control shader are implicitly sized
+ * to the maximum patch size.
+ */
+ if (state->stage == MESA_SHADER_TESS_CTRL &&
+ var->data.mode == ir_var_shader_in) {
+ return state->Const.MaxPatchVertices;
+ }
+
+ /* Non-patch inputs in evaluation shader are implicitly sized
+ * to the maximum patch size.
+ */
+ if (state->stage == MESA_SHADER_TESS_EVAL &&
+ var->data.mode == ir_var_shader_in &&
+ !var->data.patch) {
+ return state->Const.MaxPatchVertices;
+ }
+
+ return 0;
+}
+
+
ir_rvalue *
_mesa_ast_array_index_to_hir(void *mem_ctx,
struct _mesa_glsl_parse_state *state,
@@ -183,7 +210,25 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
update_max_array_access(array, idx, &loc, state);
} else if (const_index == NULL && array->type->is_array()) {
if (array->type->is_unsized_array()) {
- _mesa_glsl_error(&loc, state, "unsized array index must be constant");
+ int implicit_size = get_implicit_array_size(state, array);
+ if (implicit_size) {
+ ir_variable *v = array->whole_variable_referenced();
+ if (v != NULL)
+ v->data.max_array_access = implicit_size - 1;
+ }
+ else if (state->stage == MESA_SHADER_TESS_CTRL &&
+ array->variable_referenced()->data.mode == ir_var_shader_out &&
+ !array->variable_referenced()->data.patch) {
+ /* Tessellation control shader output non-patch arrays are
+ * initially unsized. Despite that, they are allowed to be
+ * indexed with a non-constant expression (typically
+ * "gl_InvocationID"). The array size will be determined
+ * by the linker.
+ */
+ }
+ else {
+ _mesa_glsl_error(&loc, state, "unsized array index must be constant");
+ }
} else if (array->type->fields.array->is_interface()
&& array->variable_referenced()->data.mode == ir_var_uniform
&& !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
@@ -226,24 +271,24 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
* dynamically uniform expression is undefined.
*/
if (array->type->without_array()->is_sampler()) {
- if (!state->is_version(130, 100)) {
- if (state->es_shader) {
- _mesa_glsl_warning(&loc, state,
- "sampler arrays indexed with non-constant "
- "expressions is optional in %s",
- state->get_version_string());
- } else {
- _mesa_glsl_warning(&loc, state,
- "sampler arrays indexed with non-constant "
- "expressions will be forbidden in GLSL 1.30 "
- "and later");
- }
- } else if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
- _mesa_glsl_error(&loc, state,
- "sampler arrays indexed with non-constant "
- "expressions is forbidden in GLSL 1.30 and "
- "later");
- }
+ if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
+ if (state->is_version(130, 300))
+ _mesa_glsl_error(&loc, state,
+ "sampler arrays indexed with non-constant "
+ "expressions are forbidden in GLSL %s "
+ "and later",
+ state->es_shader ? "ES 3.00" : "1.30");
+ else if (state->es_shader)
+ _mesa_glsl_warning(&loc, state,
+ "sampler arrays indexed with non-constant "
+ "expressions will be forbidden in GLSL "
+ "3.00 and later");
+ else
+ _mesa_glsl_warning(&loc, state,
+ "sampler arrays indexed with non-constant "
+ "expressions will be forbidden in GLSL "
+ "1.30 and later");
+ }
}
}
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 92e26bf2416..803edf5a14d 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -26,6 +26,7 @@
#include "glsl_types.h"
#include "ir.h"
#include "main/core.h" /* for MIN2 */
+#include "main/shaderobj.h"
static ir_rvalue *
convert_component(ir_rvalue *src, const glsl_type *desired_type);
@@ -355,6 +356,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type,
static ir_rvalue *
generate_call(exec_list *instructions, ir_function_signature *sig,
exec_list *actual_parameters,
+ ir_variable *sub_var,
+ ir_rvalue *array_idx,
struct _mesa_glsl_parse_state *state)
{
void *ctx = state;
@@ -421,7 +424,8 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
deref = new(ctx) ir_dereference_variable(var);
}
- ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters);
+
+ ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters, sub_var, array_idx);
instructions->push_tail(call);
/* Also emit any necessary out-parameter conversions. */
@@ -489,6 +493,40 @@ done:
return sig;
}
+static ir_function_signature *
+match_subroutine_by_name(const char *name,
+ exec_list *actual_parameters,
+ struct _mesa_glsl_parse_state *state,
+ ir_variable **var_r)
+{
+ void *ctx = state;
+ ir_function_signature *sig = NULL;
+ ir_function *f, *found = NULL;
+ const char *new_name;
+ ir_variable *var;
+ bool is_exact = false;
+
+ new_name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), name);
+ var = state->symbols->get_variable(new_name);
+ if (!var)
+ return NULL;
+
+ for (int i = 0; i < state->num_subroutine_types; i++) {
+ f = state->subroutine_types[i];
+ if (strcmp(f->name, var->type->without_array()->name))
+ continue;
+ found = f;
+ break;
+ }
+
+ if (!found)
+ return NULL;
+ *var_r = var;
+ sig = found->matching_signature(state, actual_parameters,
+ false, &is_exact);
+ return sig;
+}
+
static void
print_function_prototypes(_mesa_glsl_parse_state *state, YYLTYPE *loc,
ir_function *f)
@@ -1531,6 +1569,65 @@ process_record_constructor(exec_list *instructions,
&actual_parameters, state);
}
+ir_rvalue *
+ast_function_expression::handle_method(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+{
+ const ast_expression *field = subexpressions[0];
+ ir_rvalue *op;
+ ir_rvalue *result;
+ void *ctx = state;
+ /* Handle "method calls" in GLSL 1.20 - namely, array.length() */
+ YYLTYPE loc = get_location();
+ state->check_version(120, 300, &loc, "methods not supported");
+
+ const char *method;
+ method = field->primary_expression.identifier;
+
+ op = field->subexpressions[0]->hir(instructions, state);
+ if (strcmp(method, "length") == 0) {
+ if (!this->expressions.is_empty()) {
+ _mesa_glsl_error(&loc, state, "length method takes no arguments");
+ goto fail;
+ }
+
+ if (op->type->is_array()) {
+ if (op->type->is_unsized_array()) {
+ _mesa_glsl_error(&loc, state, "length called on unsized array");
+ goto fail;
+ }
+
+ result = new(ctx) ir_constant(op->type->array_size());
+ } else if (op->type->is_vector()) {
+ if (state->ARB_shading_language_420pack_enable) {
+ /* .length() returns int. */
+ result = new(ctx) ir_constant((int) op->type->vector_elements);
+ } else {
+ _mesa_glsl_error(&loc, state, "length method on matrix only available"
+ "with ARB_shading_language_420pack");
+ goto fail;
+ }
+ } else if (op->type->is_matrix()) {
+ if (state->ARB_shading_language_420pack_enable) {
+ /* .length() returns int. */
+ result = new(ctx) ir_constant((int) op->type->matrix_columns);
+ } else {
+ _mesa_glsl_error(&loc, state, "length method on matrix only available"
+ "with ARB_shading_language_420pack");
+ goto fail;
+ }
+ } else {
+ _mesa_glsl_error(&loc, state, "length called on scalar.");
+ goto fail;
+ }
+ } else {
+ _mesa_glsl_error(&loc, state, "unknown method: `%s'", method);
+ goto fail;
+ }
+ return result;
+fail:
+ return ir_rvalue::error_value(ctx);
+}
ir_rvalue *
ast_function_expression::hir(exec_list *instructions,
@@ -1543,8 +1640,6 @@ ast_function_expression::hir(exec_list *instructions,
* 2. methods - Only the .length() method of array types.
* 3. functions - Calls to regular old functions.
*
- * Method calls are actually detected when the ast_field_selection
- * expression is handled.
*/
if (is_constructor()) {
const ast_type_specifier *type = (ast_type_specifier *) subexpressions[0];
@@ -1765,11 +1860,22 @@ ast_function_expression::hir(exec_list *instructions,
&actual_parameters,
ctx);
}
+ } else if (subexpressions[0]->oper == ast_field_selection) {
+ return handle_method(instructions, state);
} else {
const ast_expression *id = subexpressions[0];
- const char *func_name = id->primary_expression.identifier;
+ const char *func_name;
YYLTYPE loc = get_location();
exec_list actual_parameters;
+ ir_variable *sub_var = NULL;
+ ir_rvalue *array_idx = NULL;
+
+ if (id->oper == ast_array_index) {
+ func_name = id->subexpressions[0]->primary_expression.identifier;
+ array_idx = id->subexpressions[1]->hir(instructions, state);
+ } else {
+ func_name = id->primary_expression.identifier;
+ }
process_parameters(instructions, &actual_parameters, &this->expressions,
state);
@@ -1778,6 +1884,10 @@ ast_function_expression::hir(exec_list *instructions,
match_function_by_name(func_name, &actual_parameters, state);
ir_rvalue *value = NULL;
+ if (sig == NULL) {
+ sig = match_subroutine_by_name(func_name, &actual_parameters, state, &sub_var);
+ }
+
if (sig == NULL) {
no_matching_function_error(func_name, &loc, &actual_parameters, state);
value = ir_rvalue::error_value(ctx);
@@ -1785,7 +1895,14 @@ ast_function_expression::hir(exec_list *instructions,
/* an error has already been emitted */
value = ir_rvalue::error_value(ctx);
} else {
- value = generate_call(instructions, sig, &actual_parameters, state);
+ value = generate_call(instructions, sig, &actual_parameters, sub_var, array_idx, state);
+ if (!value) {
+ ir_variable *const tmp = new(ctx) ir_variable(glsl_type::void_type,
+ "void_var",
+ ir_var_temporary);
+ instructions->push_tail(tmp);
+ value = new(ctx) ir_dereference_variable(tmp);
+ }
}
return value;
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 6896b700cd6..fa2c09d2697 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -54,6 +54,7 @@
#include "ast.h"
#include "glsl_types.h"
#include "program/hash_table.h"
+#include "main/shaderobj.h"
#include "ir.h"
#include "ir_builder.h"
@@ -79,6 +80,7 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
state->toplevel_ir = instructions;
state->gs_input_prim_type_specified = false;
+ state->tcs_output_vertices_specified = false;
state->cs_input_local_size_specified = false;
/* Section 4.2 of the GLSL 1.20 specification states:
@@ -638,6 +640,34 @@ shift_result_type(const struct glsl_type *type_a,
return type_a;
}
+/**
+ * Returns the innermost array index expression in an rvalue tree.
+ * This is the largest indexing level -- if an array of blocks, then
+ * it is the block index rather than an indexing expression for an
+ * array-typed member of an array of blocks.
+ */
+static ir_rvalue *
+find_innermost_array_index(ir_rvalue *rv)
+{
+ ir_dereference_array *last = NULL;
+ while (rv) {
+ if (rv->as_dereference_array()) {
+ last = rv->as_dereference_array();
+ rv = last->array;
+ } else if (rv->as_dereference_record())
+ rv = rv->as_dereference_record()->record;
+ else if (rv->as_swizzle())
+ rv = rv->as_swizzle()->val;
+ else
+ rv = NULL;
+ }
+
+ if (last)
+ return last->array_index;
+
+ return NULL;
+}
+
/**
* Validates that a value can be assigned to a location with a specified type
*
@@ -654,9 +684,9 @@ shift_result_type(const struct glsl_type *type_a,
* In addition to being used for assignments, this function is used to
* type-check return values.
*/
-ir_rvalue *
+static ir_rvalue *
validate_assignment(struct _mesa_glsl_parse_state *state,
- YYLTYPE loc, const glsl_type *lhs_type,
+ YYLTYPE loc, ir_rvalue *lhs,
ir_rvalue *rhs, bool is_initializer)
{
/* If there is already some error in the RHS, just return it. Anything
@@ -665,9 +695,28 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
if (rhs->type->is_error())
return rhs;
+ /* In the Tessellation Control Shader:
+ * If a per-vertex output variable is used as an l-value, it is an error
+ * if the expression indicating the vertex number is not the identifier
+ * `gl_InvocationID`.
+ */
+ if (state->stage == MESA_SHADER_TESS_CTRL) {
+ ir_variable *var = lhs->variable_referenced();
+ if (var->data.mode == ir_var_shader_out && !var->data.patch) {
+ ir_rvalue *index = find_innermost_array_index(lhs);
+ ir_variable *index_var = index ? index->variable_referenced() : NULL;
+ if (!index_var || strcmp(index_var->name, "gl_InvocationID") != 0) {
+ _mesa_glsl_error(&loc, state,
+ "Tessellation control shader outputs can only "
+ "be indexed by gl_InvocationID");
+ return NULL;
+ }
+ }
+ }
+
/* If the types are identical, the assignment can trivially proceed.
*/
- if (rhs->type == lhs_type)
+ if (rhs->type == lhs->type)
return rhs;
/* If the array element types are the same and the LHS is unsized,
@@ -677,8 +726,8 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
* Note: Whole-array assignments are not permitted in GLSL 1.10, but this
* is handled by ir_dereference::is_lvalue.
*/
- if (lhs_type->is_unsized_array() && rhs->type->is_array()
- && (lhs_type->fields.array == rhs->type->fields.array)) {
+ if (lhs->type->is_unsized_array() && rhs->type->is_array()
+ && (lhs->type->fields.array == rhs->type->fields.array)) {
if (is_initializer) {
return rhs;
} else {
@@ -689,8 +738,8 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
}
/* Check for implicit conversion in GLSL 1.20 */
- if (apply_implicit_conversion(lhs_type, rhs, state)) {
- if (rhs->type == lhs_type)
+ if (apply_implicit_conversion(lhs->type, rhs, state)) {
+ if (rhs->type == lhs->type)
return rhs;
}
@@ -698,7 +747,7 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
"%s of type %s cannot be assigned to "
"variable of type %s",
is_initializer ? "initializer" : "value",
- rhs->type->name, lhs_type->name);
+ rhs->type->name, lhs->type->name);
return NULL;
}
@@ -733,7 +782,7 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
if (unlikely(lhs_expr->operation == ir_binop_vector_extract)) {
ir_rvalue *new_rhs =
- validate_assignment(state, lhs_loc, lhs->type,
+ validate_assignment(state, lhs_loc, lhs,
rhs, is_initializer);
if (new_rhs == NULL) {
@@ -795,7 +844,7 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
}
ir_rvalue *new_rhs =
- validate_assignment(state, lhs_loc, lhs->type, rhs, is_initializer);
+ validate_assignment(state, lhs_loc, lhs, rhs, is_initializer);
if (new_rhs != NULL) {
rhs = new_rhs;
@@ -972,6 +1021,7 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_SUBROUTINE:
/* I assume a comparison of a struct containing a sampler just
* ignores the sampler present in the type.
*/
@@ -1271,7 +1321,14 @@ ast_expression::do_hir(exec_list *instructions,
* applied to one operand that can make them match, in which
* case this conversion is done."
*/
- if ((!apply_implicit_conversion(op[0]->type, op[1], state)
+
+ if (op[0]->type == glsl_type::void_type || op[1]->type == glsl_type::void_type) {
+ _mesa_glsl_error(& loc, state, "`%s': wrong operand types: "
+ "no operation `%1$s' exists that takes a left-hand "
+ "operand of type 'void' or a right operand of type "
+ "'void'", (this->oper == ast_equal) ? "==" : "!=");
+ error_emitted = true;
+ } else if ((!apply_implicit_conversion(op[0]->type, op[1], state)
&& !apply_implicit_conversion(op[1]->type, op[0], state))
|| (op[0]->type != op[1]->type)) {
_mesa_glsl_error(& loc, state, "operands of `%s' must have the same "
@@ -2008,7 +2065,7 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
const glsl_type *type,
ir_variable *var)
{
- if (var && !var->is_in_uniform_block()) {
+ if (var && !var->is_in_buffer_block()) {
/* Layout qualifiers may only apply to interface blocks and fields in
* them.
*/
@@ -2045,9 +2102,10 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
ir_variable *var,
const ast_type_qualifier *qual)
{
- if (var->data.mode != ir_var_uniform) {
+ if (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage) {
_mesa_glsl_error(loc, state,
- "the \"binding\" qualifier only applies to uniforms");
+ "the \"binding\" qualifier only applies to uniforms and "
+ "shader storage buffer objects");
return false;
}
@@ -2071,13 +2129,31 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
*
* The implementation-dependent maximum is GL_MAX_UNIFORM_BUFFER_BINDINGS.
*/
- if (max_index >= ctx->Const.MaxUniformBufferBindings) {
+ if (var->data.mode == ir_var_uniform &&
+ max_index >= ctx->Const.MaxUniformBufferBindings) {
_mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs exceeds "
"the maximum number of UBO binding points (%d)",
qual->binding, elements,
ctx->Const.MaxUniformBufferBindings);
return false;
}
+ /* SSBOs. From page 67 of the GLSL 4.30 specification:
+ * "If the binding point for any uniform or shader storage block instance
+ * is less than zero, or greater than or equal to the
+ * implementation-dependent maximum number of uniform buffer bindings, a
+ * compile-time error will occur. When the binding identifier is used
+ * with a uniform or shader storage block instanced as an array of size
+ * N, all elements of the array from binding through binding + N – 1 must
+ * be within this range."
+ */
+ if (var->data.mode == ir_var_shader_storage &&
+ max_index >= ctx->Const.MaxShaderStorageBufferBindings) {
+ _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs exceeds "
+ "the maximum number of SSBO binding points (%d)",
+ qual->binding, elements,
+ ctx->Const.MaxShaderStorageBufferBindings);
+ return false;
+ }
} else if (var->type->is_sampler() ||
(var->type->is_array() && var->type->fields.array->is_sampler())) {
/* Samplers. From page 63 of the GLSL 4.20 specification:
@@ -2206,6 +2282,8 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
* input output
* ----- ------
* vertex explicit_loc sso
+ * tess control sso sso
+ * tess eval sso sso
* geometry sso sso
* fragment sso explicit_loc
*/
@@ -2228,6 +2306,8 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
fail = true;
break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
if (var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) {
if (!state->check_separate_shader_objects_allowed(loc, var))
@@ -2287,8 +2367,13 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
: (qual->location + VARYING_SLOT_VAR0);
break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
- var->data.location = qual->location + VARYING_SLOT_VAR0;
+ if (var->data.patch)
+ var->data.location = qual->location + VARYING_SLOT_PATCH0;
+ else
+ var->data.location = qual->location + VARYING_SLOT_VAR0;
break;
case MESA_SHADER_FRAGMENT:
@@ -2439,6 +2524,12 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
}
}
+ if (qual->flags.q.subroutine && !qual->flags.q.uniform) {
+ _mesa_glsl_error(loc, state,
+ "`subroutine' may only be applied to uniforms, "
+ "subroutine type declarations, or function definitions");
+ }
+
if (qual->flags.q.constant || qual->flags.q.attribute
|| qual->flags.q.uniform
|| (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT)))
@@ -2455,6 +2546,9 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
var->data.stream = qual->stream;
}
+ if (qual->flags.q.patch)
+ var->data.patch = 1;
+
if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) {
var->type = glsl_type::error_type;
_mesa_glsl_error(loc, state,
@@ -2502,6 +2596,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
var->data.mode = ir_var_shader_out;
else if (qual->flags.q.uniform)
var->data.mode = ir_var_uniform;
+ else if (qual->flags.q.buffer)
+ var->data.mode = ir_var_shader_storage;
if (!is_parameter && is_varying_var(var, state->stage)) {
/* User-defined ins/outs are not permitted in compute shaders. */
@@ -2565,7 +2661,9 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
case MESA_SHADER_VERTEX:
if (var->data.mode == ir_var_shader_out)
var->data.invariant = true;
- break;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
if ((var->data.mode == ir_var_shader_in)
|| (var->data.mode == ir_var_shader_out))
@@ -2984,6 +3082,15 @@ process_initializer(ir_variable *var, ast_declaration *decl,
"cannot initialize uniforms");
}
+ /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+ *
+ * "Buffer variables cannot have initializers."
+ */
+ if (var->data.mode == ir_var_shader_storage) {
+ _mesa_glsl_error(& initializer_loc, state,
+ "SSBO variables cannot have initializers");
+ }
+
/* From section 4.1.7 of the GLSL 4.40 spec:
*
* "Opaque variables [...] are initialized only through the
@@ -3019,7 +3126,7 @@ process_initializer(ir_variable *var, ast_declaration *decl,
if (type->qualifier.flags.q.constant
|| type->qualifier.flags.q.uniform) {
ir_rvalue *new_rhs = validate_assignment(state, initializer_loc,
- var->type, rhs, true);
+ lhs, rhs, true);
if (new_rhs != NULL) {
rhs = new_rhs;
@@ -3105,30 +3212,13 @@ process_initializer(ir_variable *var, ast_declaration *decl,
return result;
}
-
-/**
- * Do additional processing necessary for geometry shader input declarations
- * (this covers both interface blocks arrays and bare input variables).
- */
static void
-handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state,
- YYLTYPE loc, ir_variable *var)
+validate_layout_qualifier_vertex_count(struct _mesa_glsl_parse_state *state,
+ YYLTYPE loc, ir_variable *var,
+ unsigned num_vertices,
+ unsigned *size,
+ const char *var_category)
{
- unsigned num_vertices = 0;
- if (state->gs_input_prim_type_specified) {
- num_vertices = vertices_per_prim(state->in_qualifier->prim_type);
- }
-
- /* Geometry shader input variables must be arrays. Caller should have
- * reported an error for this.
- */
- if (!var->type->is_array()) {
- assert(state->error);
-
- /* To avoid cascading failures, short circuit the checks below. */
- return;
- }
-
if (var->type->is_unsized_array()) {
/* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec says:
*
@@ -3138,6 +3228,8 @@ handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state,
*
* Followed by a table mapping each allowed input layout qualifier to
* the corresponding input length.
+ *
+ * Similarly for tessellation control shader outputs.
*/
if (num_vertices != 0)
var->type = glsl_type::get_array_instance(var->type->fields.array,
@@ -3164,22 +3256,101 @@ handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state,
*/
if (num_vertices != 0 && var->type->length != num_vertices) {
_mesa_glsl_error(&loc, state,
- "geometry shader input size contradicts previously"
- " declared layout (size is %u, but layout requires a"
- " size of %u)", var->type->length, num_vertices);
- } else if (state->gs_input_size != 0 &&
- var->type->length != state->gs_input_size) {
+ "%s size contradicts previously declared layout "
+ "(size is %u, but layout requires a size of %u)",
+ var_category, var->type->length, num_vertices);
+ } else if (*size != 0 && var->type->length != *size) {
_mesa_glsl_error(&loc, state,
- "geometry shader input sizes are "
- "inconsistent (size is %u, but a previous "
- "declaration has size %u)",
- var->type->length, state->gs_input_size);
+ "%s sizes are inconsistent (size is %u, but a "
+ "previous declaration has size %u)",
+ var_category, var->type->length, *size);
} else {
- state->gs_input_size = var->type->length;
+ *size = var->type->length;
}
}
}
+static void
+handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
+ YYLTYPE loc, ir_variable *var)
+{
+ unsigned num_vertices = 0;
+
+ if (state->tcs_output_vertices_specified) {
+ num_vertices = state->out_qualifier->vertices;
+ }
+
+ if (!var->type->is_array() && !var->data.patch) {
+ _mesa_glsl_error(&loc, state,
+ "tessellation control shader outputs must be arrays");
+
+ /* To avoid cascading failures, short circuit the checks below. */
+ return;
+ }
+
+ if (var->data.patch)
+ return;
+
+ validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
+ &state->tcs_output_size,
+ "geometry shader input");
+}
+
+/**
+ * Do additional processing necessary for tessellation control/evaluation shader
+ * input declarations. This covers both interface block arrays and bare input
+ * variables.
+ */
+static void
+handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state,
+ YYLTYPE loc, ir_variable *var)
+{
+ if (!var->type->is_array() && !var->data.patch) {
+ _mesa_glsl_error(&loc, state,
+ "per-vertex tessellation shader inputs must be arrays");
+ /* Avoid cascading failures. */
+ return;
+ }
+
+ if (var->data.patch)
+ return;
+
+ /* Unsized arrays are implicitly sized to gl_MaxPatchVertices. */
+ if (var->type->is_unsized_array()) {
+ var->type = glsl_type::get_array_instance(var->type->fields.array,
+ state->Const.MaxPatchVertices);
+ }
+}
+
+
+/**
+ * Do additional processing necessary for geometry shader input declarations
+ * (this covers both interface blocks arrays and bare input variables).
+ */
+static void
+handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state,
+ YYLTYPE loc, ir_variable *var)
+{
+ unsigned num_vertices = 0;
+
+ if (state->gs_input_prim_type_specified) {
+ num_vertices = vertices_per_prim(state->in_qualifier->prim_type);
+ }
+
+ /* Geometry shader input variables must be arrays. Caller should have
+ * reported an error for this.
+ */
+ if (!var->type->is_array()) {
+ assert(state->error);
+
+ /* To avoid cascading failures, short circuit the checks below. */
+ return;
+ }
+
+ validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
+ &state->gs_input_size,
+ "geometry shader input");
+}
void
validate_identifier(const char *identifier, YYLTYPE loc,
@@ -3358,6 +3529,18 @@ ast_declarator_list::hir(exec_list *instructions,
decl_type = this->type->glsl_type(& type_name, state);
+ /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec:
+ * "Buffer variables may only be declared inside interface blocks
+ * (section 4.3.9 “Interface Blocks”), which are then referred to as
+ * shader storage blocks. It is a compile-time error to declare buffer
+ * variables at global scope (outside a block)."
+ */
+ if (type->qualifier.flags.q.buffer && !decl_type->is_interface()) {
+ _mesa_glsl_error(&loc, state,
+ "buffer variables cannot be declared outside "
+ "interface blocks");
+ }
+
/* An offset-qualified atomic counter declaration sets the default
* offset for the next declaration within the same atomic counter
* buffer.
@@ -3431,7 +3614,7 @@ ast_declarator_list::hir(exec_list *instructions,
foreach_list_typed (ast_declaration, decl, link, &this->declarations) {
const struct glsl_type *var_type;
ir_variable *var;
-
+ const char *identifier = decl->identifier;
/* FINISHME: Emit a warning if a variable declaration shadows a
* FINISHME: declaration at a higher scope.
*/
@@ -3449,10 +3632,24 @@ ast_declarator_list::hir(exec_list *instructions,
continue;
}
+ if (this->type->qualifier.flags.q.subroutine) {
+ const glsl_type *t;
+ const char *name;
+
+ t = state->symbols->get_type(this->type->specifier->type_name);
+ if (!t)
+ _mesa_glsl_error(& loc, state,
+ "invalid type in declaration of `%s'",
+ decl->identifier);
+ name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), decl->identifier);
+
+ identifier = name;
+
+ }
var_type = process_array_type(&loc, decl_type, decl->array_specifier,
state);
- var = new(ctx) ir_variable(var_type, decl->identifier, ir_var_auto);
+ var = new(ctx) ir_variable(var_type, identifier, ir_var_auto);
/* The 'varying in' and 'varying out' qualifiers can only be used with
* ARB_geometry_shader4 and EXT_geometry_shader4, which we don't support
@@ -3524,6 +3721,8 @@ ast_declarator_list::hir(exec_list *instructions,
*/
if (this->type->qualifier.flags.q.attribute) {
mode = "attribute";
+ } else if (this->type->qualifier.flags.q.subroutine) {
+ mode = "subroutine uniform";
} else if (this->type->qualifier.flags.q.uniform) {
mode = "uniform";
} else if (this->type->qualifier.flags.q.varying) {
@@ -3662,6 +3861,9 @@ ast_declarator_list::hir(exec_list *instructions,
}
}
}
+ } else if (state->stage == MESA_SHADER_TESS_CTRL ||
+ state->stage == MESA_SHADER_TESS_EVAL) {
+ handle_tess_shader_input_decl(state, loc, var);
}
} else if (var->data.mode == ir_var_shader_out) {
const glsl_type *check_type = var->type->without_array();
@@ -3757,6 +3959,13 @@ ast_declarator_list::hir(exec_list *instructions,
}
}
}
+
+ if (state->stage == MESA_SHADER_TESS_CTRL) {
+ handle_tess_ctrl_shader_output_decl(state, loc, var);
+ }
+ } else if (var->type->contains_subroutine()) {
+ /* declare subroutine uniforms as hidden */
+ var->data.how_declared = ir_var_hidden;
}
/* Integer fragment inputs must be qualified with 'flat'. In GLSL ES,
@@ -3880,6 +4089,33 @@ ast_declarator_list::hir(exec_list *instructions,
}
+ /* From section 4.3.4 of the GLSL 4.00 spec:
+ * "Input variables may not be declared using the patch in qualifier
+ * in tessellation control or geometry shaders."
+ *
+ * From section 4.3.6 of the GLSL 4.00 spec:
+ * "It is an error to use patch out in a vertex, tessellation
+ * evaluation, or geometry shader."
+ *
+ * This doesn't explicitly forbid using them in a fragment shader, but
+ * that's probably just an oversight.
+ */
+ if (state->stage != MESA_SHADER_TESS_EVAL
+ && this->type->qualifier.flags.q.patch
+ && this->type->qualifier.flags.q.in) {
+
+ _mesa_glsl_error(&loc, state, "'patch in' can only be used in a "
+ "tessellation evaluation shader");
+ }
+
+ if (state->stage != MESA_SHADER_TESS_CTRL
+ && this->type->qualifier.flags.q.patch
+ && this->type->qualifier.flags.q.out) {
+
+ _mesa_glsl_error(&loc, state, "'patch out' can only be used in a "
+ "tessellation control shader");
+ }
+
/* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30.
*/
if (this->type->qualifier.precision != ast_precision_none) {
@@ -3891,9 +4127,7 @@ ast_declarator_list::hir(exec_list *instructions,
* an array of that type.
*/
if (!(this->type->qualifier.precision == ast_precision_none
- || precision_qualifier_allowed(var->type)
- || (var->type->is_array()
- && precision_qualifier_allowed(var->type->fields.array)))) {
+ || precision_qualifier_allowed(var->type->without_array()))) {
_mesa_glsl_error(&loc, state,
"precision qualifiers apply only to floating point"
@@ -4196,6 +4430,7 @@ ast_function::hir(exec_list *instructions,
ir_function *f = NULL;
ir_function_signature *sig = NULL;
exec_list hir_parameters;
+ YYLTYPE loc = this->get_location();
const char *const name = identifier;
@@ -4247,6 +4482,17 @@ ast_function::hir(exec_list *instructions,
return_type = glsl_type::error_type;
}
+ /* ARB_shader_subroutine states:
+ * "Subroutine declarations cannot be prototyped. It is an error to prepend
+ * subroutine(...) to a function declaration."
+ */
+ if (this->return_type->qualifier.flags.q.subroutine_def && !is_definition) {
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(&loc, state,
+ "function declaration `%s' cannot have subroutine prepended",
+ name);
+ }
+
/* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec:
* "No qualifier is allowed on the return type of a function."
*/
@@ -4284,15 +4530,15 @@ ast_function::hir(exec_list *instructions,
f = state->symbols->get_function(name);
if (f == NULL) {
f = new(ctx) ir_function(name);
- if (!state->symbols->add_function(f)) {
- /* This function name shadows a non-function use of the same name. */
- YYLTYPE loc = this->get_location();
-
- _mesa_glsl_error(&loc, state, "function name `%s' conflicts with "
- "non-function", name);
- return NULL;
+ if (!this->return_type->qualifier.flags.q.subroutine) {
+ if (!state->symbols->add_function(f)) {
+ /* This function name shadows a non-function use of the same name. */
+ YYLTYPE loc = this->get_location();
+ _mesa_glsl_error(&loc, state, "function name `%s' conflicts with "
+ "non-function", name);
+ return NULL;
+ }
}
-
emit_function(state, f);
}
@@ -4379,6 +4625,44 @@ ast_function::hir(exec_list *instructions,
sig->replace_parameters(&hir_parameters);
signature = sig;
+ if (this->return_type->qualifier.flags.q.subroutine_def) {
+ int idx;
+
+ f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length();
+ f->subroutine_types = ralloc_array(state, const struct glsl_type *,
+ f->num_subroutine_types);
+ idx = 0;
+ foreach_list_typed(ast_declaration, decl, link, &this->return_type->qualifier.subroutine_list->declarations) {
+ const struct glsl_type *type;
+ /* the subroutine type must be already declared */
+ type = state->symbols->get_type(decl->identifier);
+ if (!type) {
+ _mesa_glsl_error(& loc, state, "unknown type '%s' in subroutine function definition", decl->identifier);
+ }
+ f->subroutine_types[idx++] = type;
+ }
+ state->subroutines = (ir_function **)reralloc(state, state->subroutines,
+ ir_function *,
+ state->num_subroutines + 1);
+ state->subroutines[state->num_subroutines] = f;
+ state->num_subroutines++;
+
+ }
+
+ if (this->return_type->qualifier.flags.q.subroutine) {
+ if (!state->symbols->add_type(this->identifier, glsl_type::get_subroutine_instance(this->identifier))) {
+ _mesa_glsl_error(& loc, state, "type '%s' previously defined", this->identifier);
+ return NULL;
+ }
+ state->subroutine_types = (ir_function **)reralloc(state, state->subroutine_types,
+ ir_function *,
+ state->num_subroutine_types + 1);
+ state->subroutine_types[state->num_subroutine_types] = f;
+ state->num_subroutine_types++;
+
+ f->is_subroutine = true;
+ }
+
/* Function declarations (prototypes) do not have r-values.
*/
return NULL;
@@ -5277,8 +5561,9 @@ ast_type_specifier::hir(exec_list *instructions,
* \c glsl_struct_field to describe the members.
*
* If we're processing an interface block, var_mode should be the type of the
- * interface block (ir_var_shader_in, ir_var_shader_out, or ir_var_uniform).
- * If we're processing a structure, var_mode should be ir_var_auto.
+ * interface block (ir_var_shader_in, ir_var_shader_out, ir_var_uniform or
+ * ir_var_shader_storage). If we're processing a structure, var_mode should be
+ * ir_var_auto.
*
* \return
* The number of fields processed. A pointer to the array structure fields is
@@ -5351,19 +5636,19 @@ ast_process_structure_or_interface_block(exec_list *instructions,
if (is_interface && field_type->contains_opaque()) {
YYLTYPE loc = decl_list->get_location();
_mesa_glsl_error(&loc, state,
- "uniform in non-default uniform block contains "
+ "uniform/buffer in non-default interface block contains "
"opaque variable");
}
if (field_type->contains_atomic()) {
- /* FINISHME: Add a spec quotation here once updated spec
- * FINISHME: language is available. See Khronos bug #10903
- * FINISHME: on whether atomic counters are allowed in
- * FINISHME: structures.
+ /* From section 4.1.7.3 of the GLSL 4.40 spec:
+ *
+ * "Members of structures cannot be declared as atomic counter
+ * types."
*/
YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state, "atomic counter in structure or "
- "uniform block");
+ _mesa_glsl_error(&loc, state, "atomic counter in structure, "
+ "shader storage block or uniform block");
}
if (field_type->contains_image()) {
@@ -5373,7 +5658,8 @@ ast_process_structure_or_interface_block(exec_list *instructions,
*/
YYLTYPE loc = decl_list->get_location();
_mesa_glsl_error(&loc, state,
- "image in structure or uniform block");
+ "image in structure, shader storage block or "
+ "uniform block");
}
const struct ast_type_qualifier *const qual =
@@ -5382,9 +5668,9 @@ ast_process_structure_or_interface_block(exec_list *instructions,
qual->flags.q.packed ||
qual->flags.q.shared) {
_mesa_glsl_error(&loc, state,
- "uniform block layout qualifiers std140, packed, and "
- "shared can only be applied to uniform blocks, not "
- "members");
+ "uniform/shader storage block layout qualifiers "
+ "std140, packed, and shared can only be applied "
+ "to uniform/shader storage blocks, not members");
}
if (qual->flags.q.constant) {
@@ -5403,15 +5689,16 @@ ast_process_structure_or_interface_block(exec_list *instructions,
interpret_interpolation_qualifier(qual, var_mode, state, &loc);
fields[i].centroid = qual->flags.q.centroid ? 1 : 0;
fields[i].sample = qual->flags.q.sample ? 1 : 0;
+ fields[i].patch = qual->flags.q.patch ? 1 : 0;
/* Only save explicitly defined streams in block's field */
fields[i].stream = qual->flags.q.explicit_stream ? qual->stream : -1;
if (qual->flags.q.row_major || qual->flags.q.column_major) {
- if (!qual->flags.q.uniform) {
+ if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
_mesa_glsl_error(&loc, state,
"row_major and column_major can only be "
- "applied to uniform interface blocks");
+ "applied to interface blocks");
} else
validate_matrix_layout_for_type(state, &loc, field_type, NULL);
}
@@ -5608,6 +5895,9 @@ ast_interface_block::hir(exec_list *instructions,
} else if (this->layout.flags.q.uniform) {
var_mode = ir_var_uniform;
iface_type_name = "uniform";
+ } else if (this->layout.flags.q.buffer) {
+ var_mode = ir_var_shader_storage;
+ iface_type_name = "buffer";
} else {
var_mode = ir_var_auto;
iface_type_name = "UNKNOWN";
@@ -5692,16 +5982,28 @@ ast_interface_block::hir(exec_list *instructions,
if (ir_variable *earlier_gl_Position =
state->symbols->get_variable("gl_Position")) {
earlier_per_vertex = earlier_gl_Position->get_interface_type();
+ } else if (ir_variable *earlier_gl_out =
+ state->symbols->get_variable("gl_out")) {
+ earlier_per_vertex = earlier_gl_out->get_interface_type();
} else {
_mesa_glsl_error(&loc, state,
"redeclaration of gl_PerVertex output not "
"allowed in the %s shader",
_mesa_shader_stage_to_string(state->stage));
}
- if (this->instance_name != NULL) {
- _mesa_glsl_error(&loc, state,
- "gl_PerVertex output may not be redeclared with "
- "an instance name");
+ if (state->stage == MESA_SHADER_TESS_CTRL) {
+ if (this->instance_name == NULL ||
+ strcmp(this->instance_name, "gl_out") != 0 || this->array_specifier == NULL) {
+ _mesa_glsl_error(&loc, state,
+ "gl_PerVertex output must be redeclared as "
+ "gl_out[]");
+ }
+ } else {
+ if (this->instance_name != NULL) {
+ _mesa_glsl_error(&loc, state,
+ "gl_PerVertex output may not be redeclared with "
+ "an instance name");
+ }
}
break;
default:
@@ -5734,6 +6036,8 @@ ast_interface_block::hir(exec_list *instructions,
earlier_per_vertex->fields.structure[j].centroid;
fields[i].sample =
earlier_per_vertex->fields.structure[j].sample;
+ fields[i].patch =
+ earlier_per_vertex->fields.structure[j].patch;
}
}
@@ -5787,8 +6091,18 @@ ast_interface_block::hir(exec_list *instructions,
if (state->stage == MESA_SHADER_GEOMETRY && this->array_specifier == NULL &&
var_mode == ir_var_shader_in) {
_mesa_glsl_error(&loc, state, "geometry shader inputs must be arrays");
+ } else if ((state->stage == MESA_SHADER_TESS_CTRL ||
+ state->stage == MESA_SHADER_TESS_EVAL) &&
+ this->array_specifier == NULL &&
+ var_mode == ir_var_shader_in) {
+ _mesa_glsl_error(&loc, state, "per-vertex tessellation shader inputs must be arrays");
+ } else if (state->stage == MESA_SHADER_TESS_CTRL &&
+ this->array_specifier == NULL &&
+ var_mode == ir_var_shader_out) {
+ _mesa_glsl_error(&loc, state, "tessellation control shader outputs must be arrays");
}
+
/* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec
* says:
*
@@ -5834,16 +6148,39 @@ ast_interface_block::hir(exec_list *instructions,
* geometry shader inputs. All other input and output block
* arrays must specify an array size.
*
+ * The same applies to tessellation shaders.
+ *
* The upshot of this is that the only circumstance where an
* interface array size *doesn't* need to be specified is on a
- * geometry shader input.
+ * geometry shader input, tessellation control shader input,
+ * tessellation control shader output, and tessellation evaluation
+ * shader input.
*/
- if (this->array_specifier->is_unsized_array &&
- (state->stage != MESA_SHADER_GEOMETRY || !this->layout.flags.q.in)) {
- _mesa_glsl_error(&loc, state,
- "only geometry shader inputs may be unsized "
- "instance block arrays");
+ if (this->array_specifier->is_unsized_array) {
+ bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY ||
+ state->stage == MESA_SHADER_TESS_CTRL ||
+ state->stage == MESA_SHADER_TESS_EVAL;
+ bool allow_outputs = state->stage == MESA_SHADER_TESS_CTRL;
+ if (this->layout.flags.q.in) {
+ if (!allow_inputs)
+ _mesa_glsl_error(&loc, state,
+ "unsized input block arrays not allowed in "
+ "%s shader",
+ _mesa_shader_stage_to_string(state->stage));
+ } else if (this->layout.flags.q.out) {
+ if (!allow_outputs)
+ _mesa_glsl_error(&loc, state,
+ "unsized output block arrays not allowed in "
+ "%s shader",
+ _mesa_shader_stage_to_string(state->stage));
+ } else {
+ /* by elimination, this is a uniform block array */
+ _mesa_glsl_error(&loc, state,
+ "unsized uniform block arrays not allowed in "
+ "%s shader",
+ _mesa_shader_stage_to_string(state->stage));
+ }
}
const glsl_type *block_array_type =
@@ -5877,6 +6214,11 @@ ast_interface_block::hir(exec_list *instructions,
if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in)
handle_geometry_shader_input_decl(state, loc, var);
+ else if ((state->stage == MESA_SHADER_TESS_CTRL ||
+ state->stage == MESA_SHADER_TESS_EVAL) && var_mode == ir_var_shader_in)
+ handle_tess_shader_input_decl(state, loc, var);
+ else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out)
+ handle_tess_ctrl_shader_output_decl(state, loc, var);
if (ir_variable *earlier =
state->symbols->get_variable(this->instance_name)) {
@@ -5917,6 +6259,7 @@ ast_interface_block::hir(exec_list *instructions,
var->data.interpolation = fields[i].interpolation;
var->data.centroid = fields[i].centroid;
var->data.sample = fields[i].sample;
+ var->data.patch = fields[i].patch;
var->init_interface_type(block_type);
if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
@@ -5965,8 +6308,8 @@ ast_interface_block::hir(exec_list *instructions,
if (state->symbols->get_variable(var->name) != NULL)
_mesa_glsl_error(&loc, state, "`%s' redeclared", var->name);
- /* Propagate the "binding" keyword into this UBO's fields;
- * the UBO declaration itself doesn't get an ir_variable unless it
+ /* Propagate the "binding" keyword into this UBO/SSBO's fields.
+ * The UBO declaration itself doesn't get an ir_variable unless it
* has an instance name. This is ugly.
*/
var->data.explicit_binding = this->layout.flags.q.explicit_binding;
@@ -6024,6 +6367,67 @@ ast_interface_block::hir(exec_list *instructions,
}
+ir_rvalue *
+ast_tcs_output_layout::hir(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state)
+{
+ YYLTYPE loc = this->get_location();
+
+ /* If any tessellation control output layout declaration preceded this
+ * one, make sure it was consistent with this one.
+ */
+ if (state->tcs_output_vertices_specified &&
+ state->out_qualifier->vertices != this->vertices) {
+ _mesa_glsl_error(&loc, state,
+ "tessellation control shader output layout does not "
+ "match previous declaration");
+ return NULL;
+ }
+
+ /* If any shader outputs occurred before this declaration and specified an
+ * array size, make sure the size they specified is consistent with the
+ * primitive type.
+ */
+ unsigned num_vertices = this->vertices;
+ if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) {
+ _mesa_glsl_error(&loc, state,
+ "this tessellation control shader output layout "
+ "specifies %u vertices, but a previous output "
+ "is declared with size %u",
+ num_vertices, state->tcs_output_size);
+ return NULL;
+ }
+
+ state->tcs_output_vertices_specified = true;
+
+ /* If any shader outputs occurred before this declaration and did not
+ * specify an array size, their size is determined now.
+ */
+ foreach_in_list (ir_instruction, node, instructions) {
+ ir_variable *var = node->as_variable();
+ if (var == NULL || var->data.mode != ir_var_shader_out)
+ continue;
+
+ /* Note: Not all tessellation control shader output are arrays. */
+ if (!var->type->is_unsized_array() || var->data.patch)
+ continue;
+
+ if (var->data.max_array_access >= num_vertices) {
+ _mesa_glsl_error(&loc, state,
+ "this tessellation control shader output layout "
+ "specifies %u vertices, but an access to element "
+ "%u of output `%s' already exists", num_vertices,
+ var->data.max_array_access, var->name);
+ } else {
+ var->type = glsl_type::get_array_instance(var->type->fields.array,
+ num_vertices);
+ }
+ }
+
+ return NULL;
+}
+
+
ir_rvalue *
ast_gs_input_layout::hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state)
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 5eb2913d6b7..892122af03d 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -40,7 +40,12 @@ ast_type_specifier::print(void) const
bool
ast_fully_specified_type::has_qualifiers() const
{
- return this->qualifier.flags.i != 0;
+ /* 'subroutine' isnt a real qualifier. */
+ ast_type_qualifier subroutine_only;
+ subroutine_only.flags.i = 0;
+ subroutine_only.flags.q.subroutine = 1;
+ subroutine_only.flags.q.subroutine_def = 1;
+ return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0;
}
bool ast_type_qualifier::has_interpolation() const
@@ -78,14 +83,16 @@ ast_type_qualifier::has_storage() const
|| this->flags.q.varying
|| this->flags.q.in
|| this->flags.q.out
- || this->flags.q.uniform;
+ || this->flags.q.uniform
+ || this->flags.q.buffer;
}
bool
ast_type_qualifier::has_auxiliary_storage() const
{
return this->flags.q.centroid
- || this->flags.q.sample;
+ || this->flags.q.sample
+ || this->flags.q.patch;
}
const char*
@@ -211,6 +218,44 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
}
}
+ if (q.flags.q.vertices) {
+ if (this->flags.q.vertices && this->vertices != q.vertices) {
+ _mesa_glsl_error(loc, state,
+ "tessellation control shader set conflicting "
+ "vertices (%d and %d)",
+ this->vertices, q.vertices);
+ return false;
+ }
+ this->vertices = q.vertices;
+ }
+
+ if (q.flags.q.vertex_spacing) {
+ if (this->flags.q.vertex_spacing && this->vertex_spacing != q.vertex_spacing) {
+ _mesa_glsl_error(loc, state,
+ "conflicting vertex spacing used");
+ return false;
+ }
+ this->vertex_spacing = q.vertex_spacing;
+ }
+
+ if (q.flags.q.ordering) {
+ if (this->flags.q.ordering && this->ordering != q.ordering) {
+ _mesa_glsl_error(loc, state,
+ "conflicting ordering used");
+ return false;
+ }
+ this->ordering = q.ordering;
+ }
+
+ if (q.flags.q.point_mode) {
+ if (this->flags.q.point_mode && this->point_mode != q.point_mode) {
+ _mesa_glsl_error(loc, state,
+ "conflicting point mode used");
+ return false;
+ }
+ this->point_mode = q.point_mode;
+ }
+
if ((q.flags.i & ubo_mat_mask.flags.i) != 0)
this->flags.i &= ~ubo_mat_mask.flags.i;
if ((q.flags.i & ubo_layout_mask.flags.i) != 0)
@@ -260,6 +305,22 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
return true;
}
+bool
+ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
+ _mesa_glsl_parse_state *state,
+ ast_type_qualifier q,
+ ast_node* &node)
+{
+ void *mem_ctx = state;
+ const bool r = this->merge_qualifier(loc, state, q);
+
+ if (state->stage == MESA_SHADER_TESS_CTRL) {
+ node = new(mem_ctx) ast_tcs_output_layout(*loc, q.vertices);
+ }
+
+ return r;
+}
+
bool
ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
@@ -273,6 +334,27 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
valid_in_mask.flags.i = 0;
switch (state->stage) {
+ case MESA_SHADER_TESS_EVAL:
+ if (q.flags.q.prim_type) {
+ /* Make sure this is a valid input primitive type. */
+ switch (q.prim_type) {
+ case GL_TRIANGLES:
+ case GL_QUADS:
+ case GL_ISOLINES:
+ break;
+ default:
+ _mesa_glsl_error(loc, state,
+ "invalid tessellation evaluation "
+ "shader input primitive type");
+ break;
+ }
+ }
+
+ valid_in_mask.flags.q.prim_type = 1;
+ valid_in_mask.flags.q.vertex_spacing = 1;
+ valid_in_mask.flags.q.ordering = 1;
+ valid_in_mask.flags.q.point_mode = 1;
+ break;
case MESA_SHADER_GEOMETRY:
if (q.flags.q.prim_type) {
/* Make sure this is a valid input primitive type. */
@@ -328,7 +410,9 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
if (q.flags.q.prim_type &&
this->prim_type != q.prim_type) {
_mesa_glsl_error(loc, state,
- "conflicting input primitive types specified");
+ "conflicting input primitive %s specified",
+ state->stage == MESA_SHADER_GEOMETRY ?
+ "type" : "mode");
}
} else if (q.flags.q.prim_type) {
state->in_qualifier->flags.q.prim_type = 1;
@@ -350,6 +434,39 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
state->fs_early_fragment_tests = true;
}
+ if (this->flags.q.vertex_spacing) {
+ if (q.flags.q.vertex_spacing &&
+ this->vertex_spacing != q.vertex_spacing) {
+ _mesa_glsl_error(loc, state,
+ "conflicting vertex spacing specified");
+ }
+ } else if (q.flags.q.vertex_spacing) {
+ this->flags.q.vertex_spacing = 1;
+ this->vertex_spacing = q.vertex_spacing;
+ }
+
+ if (this->flags.q.ordering) {
+ if (q.flags.q.ordering &&
+ this->ordering != q.ordering) {
+ _mesa_glsl_error(loc, state,
+ "conflicting ordering specified");
+ }
+ } else if (q.flags.q.ordering) {
+ this->flags.q.ordering = 1;
+ this->ordering = q.ordering;
+ }
+
+ if (this->flags.q.point_mode) {
+ if (q.flags.q.point_mode &&
+ this->point_mode != q.point_mode) {
+ _mesa_glsl_error(loc, state,
+ "conflicting point mode specified");
+ }
+ } else if (q.flags.q.point_mode) {
+ this->flags.q.point_mode = 1;
+ this->point_mode = q.point_mode;
+ }
+
if (create_gs_ast) {
node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type);
} else if (create_cs_ast) {
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index efab2991993..2175c66cbd7 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -413,8 +413,8 @@ fp64(const _mesa_glsl_parse_state *state)
static bool
barrier_supported(const _mesa_glsl_parse_state *state)
{
- return state->stage == MESA_SHADER_COMPUTE;
- /* TODO: || stage->state == MESA_SHADER_TESS_CTRL; */
+ return state->stage == MESA_SHADER_COMPUTE ||
+ state->stage == MESA_SHADER_TESS_CTRL;
}
/** @} */
diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp
index d92e2eb3007..ffbc5e6fdbc 100644
--- a/src/glsl/builtin_types.cpp
+++ b/src/glsl/builtin_types.cpp
@@ -54,64 +54,64 @@
&glsl_type::_struct_##NAME##_type;
static const struct glsl_struct_field gl_DepthRangeParameters_fields[] = {
- { glsl_type::float_type, "near", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "far", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "diff", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+ glsl_struct_field(glsl_type::float_type, "near"),
+ glsl_struct_field(glsl_type::float_type, "far"),
+ glsl_struct_field(glsl_type::float_type, "diff"),
};
static const struct glsl_struct_field gl_PointParameters_fields[] = {
- { glsl_type::float_type, "size", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "sizeMin", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "sizeMax", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "fadeThresholdSize", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "distanceConstantAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "distanceLinearAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "distanceQuadraticAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+ glsl_struct_field(glsl_type::float_type, "size"),
+ glsl_struct_field(glsl_type::float_type, "sizeMin"),
+ glsl_struct_field(glsl_type::float_type, "sizeMax"),
+ glsl_struct_field(glsl_type::float_type, "fadeThresholdSize"),
+ glsl_struct_field(glsl_type::float_type, "distanceConstantAttenuation"),
+ glsl_struct_field(glsl_type::float_type, "distanceLinearAttenuation"),
+ glsl_struct_field(glsl_type::float_type, "distanceQuadraticAttenuation"),
};
static const struct glsl_struct_field gl_MaterialParameters_fields[] = {
- { glsl_type::vec4_type, "emission", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec4_type, "ambient", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec4_type, "diffuse", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec4_type, "specular", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "shininess", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+ glsl_struct_field(glsl_type::vec4_type, "emission"),
+ glsl_struct_field(glsl_type::vec4_type, "ambient"),
+ glsl_struct_field(glsl_type::vec4_type, "diffuse"),
+ glsl_struct_field(glsl_type::vec4_type, "specular"),
+ glsl_struct_field(glsl_type::float_type, "shininess"),
};
static const struct glsl_struct_field gl_LightSourceParameters_fields[] = {
- { glsl_type::vec4_type, "ambient", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec4_type, "diffuse", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec4_type, "specular", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec4_type, "position", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec4_type, "halfVector", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec3_type, "spotDirection", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "spotExponent", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "spotCutoff", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "spotCosCutoff", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "constantAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "linearAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "quadraticAttenuation", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+ glsl_struct_field(glsl_type::vec4_type, "ambient"),
+ glsl_struct_field(glsl_type::vec4_type, "diffuse"),
+ glsl_struct_field(glsl_type::vec4_type, "specular"),
+ glsl_struct_field(glsl_type::vec4_type, "position"),
+ glsl_struct_field(glsl_type::vec4_type, "halfVector"),
+ glsl_struct_field(glsl_type::vec3_type, "spotDirection"),
+ glsl_struct_field(glsl_type::float_type, "spotExponent"),
+ glsl_struct_field(glsl_type::float_type, "spotCutoff"),
+ glsl_struct_field(glsl_type::float_type, "spotCosCutoff"),
+ glsl_struct_field(glsl_type::float_type, "constantAttenuation"),
+ glsl_struct_field(glsl_type::float_type, "linearAttenuation"),
+ glsl_struct_field(glsl_type::float_type, "quadraticAttenuation"),
};
static const struct glsl_struct_field gl_LightModelParameters_fields[] = {
- { glsl_type::vec4_type, "ambient", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+ glsl_struct_field(glsl_type::vec4_type, "ambient"),
};
static const struct glsl_struct_field gl_LightModelProducts_fields[] = {
- { glsl_type::vec4_type, "sceneColor", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+ glsl_struct_field(glsl_type::vec4_type, "sceneColor"),
};
static const struct glsl_struct_field gl_LightProducts_fields[] = {
- { glsl_type::vec4_type, "ambient", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec4_type, "diffuse", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::vec4_type, "specular", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+ glsl_struct_field(glsl_type::vec4_type, "ambient"),
+ glsl_struct_field(glsl_type::vec4_type, "diffuse"),
+ glsl_struct_field(glsl_type::vec4_type, "specular"),
};
static const struct glsl_struct_field gl_FogParameters_fields[] = {
- { glsl_type::vec4_type, "color", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "density", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "start", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "end", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
- { glsl_type::float_type, "scale", -1, 0, 0, 0, GLSL_MATRIX_LAYOUT_INHERITED, 0 },
+ glsl_struct_field(glsl_type::vec4_type, "color"),
+ glsl_struct_field(glsl_type::float_type, "density"),
+ glsl_struct_field(glsl_type::float_type, "start"),
+ glsl_struct_field(glsl_type::float_type, "end"),
+ glsl_struct_field(glsl_type::float_type, "scale"),
};
#include "builtin_type_macros.h"
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index a765d35fde0..53d3500b1f4 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -322,6 +322,7 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
this->fields[this->num_fields].interpolation = INTERP_QUALIFIER_NONE;
this->fields[this->num_fields].centroid = 0;
this->fields[this->num_fields].sample = 0;
+ this->fields[this->num_fields].patch = 0;
this->num_fields++;
}
@@ -343,6 +344,8 @@ public:
void generate_constants();
void generate_uniforms();
void generate_vs_special_vars();
+ void generate_tcs_special_vars();
+ void generate_tes_special_vars();
void generate_gs_special_vars();
void generate_fs_special_vars();
void generate_cs_special_vars();
@@ -436,11 +439,12 @@ builtin_variable_generator::add_variable(const char *name,
var->data.read_only = true;
break;
case ir_var_shader_out:
+ case ir_var_shader_storage:
break;
default:
/* The only variables that are added using this function should be
- * uniforms, shader inputs, and shader outputs, constants (which use
- * ir_var_auto), and system values.
+ * uniforms, shader storage, shader inputs, and shader outputs, constants
+ * (which use ir_var_auto), and system values.
*/
assert(0);
break;
@@ -669,8 +673,14 @@ builtin_variable_generator::generate_constants()
if (!state->es_shader) {
add_const("gl_MaxGeometryAtomicCounters",
state->Const.MaxGeometryAtomicCounters);
- add_const("gl_MaxTessControlAtomicCounters", 0);
- add_const("gl_MaxTessEvaluationAtomicCounters", 0);
+
+ if (state->is_version(400, 0) ||
+ state->ARB_tessellation_shader_enable) {
+ add_const("gl_MaxTessControlAtomicCounters",
+ state->Const.MaxTessControlAtomicCounters);
+ add_const("gl_MaxTessEvaluationAtomicCounters",
+ state->Const.MaxTessEvaluationAtomicCounters);
+ }
}
}
@@ -690,8 +700,10 @@ builtin_variable_generator::generate_constants()
if (!state->es_shader) {
add_const("gl_MaxGeometryAtomicCounterBuffers",
state->Const.MaxGeometryAtomicCounterBuffers);
- add_const("gl_MaxTessControlAtomicCounterBuffers", 0);
- add_const("gl_MaxTessEvaluationAtomicCounterBuffers", 0);
+ add_const("gl_MaxTessControlAtomicCounterBuffers",
+ state->Const.MaxTessControlAtomicCounterBuffers);
+ add_const("gl_MaxTessEvaluationAtomicCounterBuffers",
+ state->Const.MaxTessEvaluationAtomicCounterBuffers);
}
}
@@ -750,11 +762,35 @@ builtin_variable_generator::generate_constants()
state->Const.MaxFragmentImageUniforms);
add_const("gl_MaxCombinedImageUniforms",
state->Const.MaxCombinedImageUniforms);
+
+ if (state->is_version(400, 0) ||
+ state->ARB_tessellation_shader_enable) {
+ add_const("gl_MaxTessControlImageUniforms",
+ state->Const.MaxTessControlImageUniforms);
+ add_const("gl_MaxTessEvaluationImageUniforms",
+ state->Const.MaxTessEvaluationImageUniforms);
+ }
}
if (state->is_version(410, 0) ||
state->ARB_viewport_array_enable)
add_const("gl_MaxViewports", state->Const.MaxViewports);
+
+ if (state->is_version(400, 0) ||
+ state->ARB_tessellation_shader_enable) {
+ add_const("gl_MaxPatchVertices", state->Const.MaxPatchVertices);
+ add_const("gl_MaxTessGenLevel", state->Const.MaxTessGenLevel);
+ add_const("gl_MaxTessControlInputComponents", state->Const.MaxTessControlInputComponents);
+ add_const("gl_MaxTessControlOutputComponents", state->Const.MaxTessControlOutputComponents);
+ add_const("gl_MaxTessControlTextureImageUnits", state->Const.MaxTessControlTextureImageUnits);
+ add_const("gl_MaxTessEvaluationInputComponents", state->Const.MaxTessEvaluationInputComponents);
+ add_const("gl_MaxTessEvaluationOutputComponents", state->Const.MaxTessEvaluationOutputComponents);
+ add_const("gl_MaxTessEvaluationTextureImageUnits", state->Const.MaxTessEvaluationTextureImageUnits);
+ add_const("gl_MaxTessPatchComponents", state->Const.MaxTessPatchComponents);
+ add_const("gl_MaxTessControlTotalOutputComponents", state->Const.MaxTessControlTotalOutputComponents);
+ add_const("gl_MaxTessControlUniformComponents", state->Const.MaxTessControlUniformComponents);
+ add_const("gl_MaxTessEvaluationUniformComponents", state->Const.MaxTessEvaluationUniformComponents);
+ }
}
@@ -870,6 +906,39 @@ builtin_variable_generator::generate_vs_special_vars()
}
+/**
+ * Generate variables which only exist in tessellation control shaders.
+ */
+void
+builtin_variable_generator::generate_tcs_special_vars()
+{
+ add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
+ add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn");
+ add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID");
+
+ add_output(VARYING_SLOT_TESS_LEVEL_OUTER, array(float_t, 4),
+ "gl_TessLevelOuter")->data.patch = 1;
+ add_output(VARYING_SLOT_TESS_LEVEL_INNER, array(float_t, 2),
+ "gl_TessLevelInner")->data.patch = 1;
+}
+
+
+/**
+ * Generate variables which only exist in tessellation evaluation shaders.
+ */
+void
+builtin_variable_generator::generate_tes_special_vars()
+{
+ add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
+ add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn");
+ add_system_value(SYSTEM_VALUE_TESS_COORD, vec3_t, "gl_TessCoord");
+ add_system_value(SYSTEM_VALUE_TESS_LEVEL_OUTER, array(float_t, 4),
+ "gl_TessLevelOuter");
+ add_system_value(SYSTEM_VALUE_TESS_LEVEL_INNER, array(float_t, 2),
+ "gl_TessLevelInner");
+}
+
+
/**
* Generate variables which only exist in geometry shaders.
*/
@@ -993,6 +1062,8 @@ builtin_variable_generator::add_varying(int slot, const glsl_type *type,
const char *name_as_gs_input)
{
switch (state->stage) {
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
this->per_vertex_in.add_field(slot, type, name);
/* FALLTHROUGH */
@@ -1045,13 +1116,40 @@ builtin_variable_generator::generate_varyings()
}
}
+ /* Section 7.1 (Built-In Language Variables) of the GLSL 4.00 spec
+ * says:
+ *
+ * "In the tessellation control language, built-in variables are
+ * intrinsically declared as:
+ *
+ * in gl_PerVertex {
+ * vec4 gl_Position;
+ * float gl_PointSize;
+ * float gl_ClipDistance[];
+ * } gl_in[gl_MaxPatchVertices];"
+ */
+ if (state->stage == MESA_SHADER_TESS_CTRL ||
+ state->stage == MESA_SHADER_TESS_EVAL) {
+ const glsl_type *per_vertex_in_type =
+ this->per_vertex_in.construct_interface_instance();
+ add_variable("gl_in", array(per_vertex_in_type, state->Const.MaxPatchVertices),
+ ir_var_shader_in, -1);
+ }
if (state->stage == MESA_SHADER_GEOMETRY) {
const glsl_type *per_vertex_in_type =
this->per_vertex_in.construct_interface_instance();
add_variable("gl_in", array(per_vertex_in_type, 0),
ir_var_shader_in, -1);
}
- if (state->stage == MESA_SHADER_VERTEX || state->stage == MESA_SHADER_GEOMETRY) {
+ if (state->stage == MESA_SHADER_TESS_CTRL) {
+ const glsl_type *per_vertex_out_type =
+ this->per_vertex_out.construct_interface_instance();
+ add_variable("gl_out", array(per_vertex_out_type, 0),
+ ir_var_shader_out, -1);
+ }
+ if (state->stage == MESA_SHADER_VERTEX ||
+ state->stage == MESA_SHADER_TESS_EVAL ||
+ state->stage == MESA_SHADER_GEOMETRY) {
const glsl_type *per_vertex_out_type =
this->per_vertex_out.construct_interface_instance();
const glsl_struct_field *fields = per_vertex_out_type->fields.structure;
@@ -1062,6 +1160,7 @@ builtin_variable_generator::generate_varyings()
var->data.interpolation = fields[i].interpolation;
var->data.centroid = fields[i].centroid;
var->data.sample = fields[i].sample;
+ var->data.patch = fields[i].patch;
var->init_interface_type(per_vertex_out_type);
}
}
@@ -1086,6 +1185,12 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
case MESA_SHADER_VERTEX:
gen.generate_vs_special_vars();
break;
+ case MESA_SHADER_TESS_CTRL:
+ gen.generate_tcs_special_vars();
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ gen.generate_tes_special_vars();
+ break;
case MESA_SHADER_GEOMETRY:
gen.generate_gs_special_vars();
break;
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index a11b6b2c7c8..dd5ec2a30b5 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -1074,9 +1074,9 @@ _token_list_equal_ignoring_space (token_list_t *a, token_list_t *b)
*/
if (node_a->token->type == SPACE
&& node_b->token->type == SPACE) {
- while (node_a->token->type == SPACE)
+ while (node_a && node_a->token->type == SPACE)
node_a = node_a->next;
- while (node_b->token->type == SPACE)
+ while (node_b && node_b->token->type == SPACE)
node_b = node_b->next;
continue;
}
@@ -2483,6 +2483,15 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
if (extensions->ARB_shader_precision)
add_builtin_define(parser, "GL_ARB_shader_precision", 1);
+
+ if (extensions->ARB_shader_storage_buffer_object)
+ add_builtin_define(parser, "GL_ARB_shader_storage_buffer_object", 1);
+
+ if (extensions->ARB_tessellation_shader)
+ add_builtin_define(parser, "GL_ARB_tessellation_shader", 1);
+
+ if (extensions->ARB_shader_subroutine)
+ add_builtin_define(parser, "GL_ARB_shader_subroutine", 1);
}
}
diff --git a/src/glsl/glcpp/glcpp.c b/src/glsl/glcpp/glcpp.c
index 5144516a69c..c62f4efec9d 100644
--- a/src/glsl/glcpp/glcpp.c
+++ b/src/glsl/glcpp/glcpp.c
@@ -29,6 +29,7 @@
#include "glcpp.h"
#include "main/mtypes.h"
#include "main/shaderobj.h"
+#include "util/strtod.h"
extern int glcpp_parser_debug;
@@ -168,6 +169,8 @@ main (int argc, char *argv[])
if (shader == NULL)
return 1;
+ _mesa_locale_init();
+
ret = glcpp_preprocess(ctx, &shader, &info_log, NULL, &gl_ctx);
printf("%s", shader);
diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll
index 10db5b8b632..efa0bb68099 100644
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -308,12 +308,14 @@ in return IN_TOK;
out return OUT_TOK;
inout return INOUT_TOK;
uniform return UNIFORM;
+buffer return BUFFER;
varying DEPRECATED_ES_KEYWORD(VARYING);
centroid KEYWORD(120, 300, 120, 300, CENTROID);
invariant KEYWORD(120, 100, 120, 100, INVARIANT);
flat KEYWORD(130, 100, 130, 300, FLAT);
smooth KEYWORD(130, 300, 130, 300, SMOOTH);
noperspective KEYWORD(130, 300, 130, 0, NOPERSPECTIVE);
+patch KEYWORD_WITH_ALT(0, 300, 400, 0, yyextra->ARB_tessellation_shader_enable, PATCH);
sampler1D DEPRECATED_ES_KEYWORD(SAMPLER1D);
sampler2D return SAMPLER2D;
@@ -424,7 +426,8 @@ layout {
|| yyextra->ARB_uniform_buffer_object_enable
|| yyextra->ARB_fragment_coord_conventions_enable
|| yyextra->ARB_shading_language_420pack_enable
- || yyextra->ARB_compute_shader_enable) {
+ || yyextra->ARB_compute_shader_enable
+ || yyextra->ARB_tessellation_shader_enable) {
return LAYOUT_TOK;
} else {
void *mem_ctx = yyextra;
@@ -575,9 +578,8 @@ usamplerBuffer KEYWORD(140, 300, 140, 0, USAMPLERBUFFER);
/* Additional reserved words in GLSL ES 3.00 */
resource KEYWORD(0, 300, 0, 0, RESOURCE);
-patch KEYWORD(0, 300, 0, 0, PATCH);
sample KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_gpu_shader5_enable, SAMPLE);
-subroutine KEYWORD(0, 300, 0, 0, SUBROUTINE);
+subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_shader_subroutine_enable, SUBROUTINE);
[_a-zA-Z][_a-zA-Z0-9]* {
@@ -593,6 +595,10 @@ subroutine KEYWORD(0, 300, 0, 0, SUBROUTINE);
return classify_identifier(state, yytext);
}
+\. { struct _mesa_glsl_parse_state *state = yyextra;
+ state->is_field = true;
+ return DOT_TOK; }
+
. { return yytext[0]; }
%%
@@ -600,6 +606,10 @@ subroutine KEYWORD(0, 300, 0, 0, SUBROUTINE);
int
classify_identifier(struct _mesa_glsl_parse_state *state, const char *name)
{
+ if (state->is_field) {
+ state->is_field = false;
+ return FIELD_SELECTION;
+ }
if (state->symbols->get_variable(name) || state->symbols->get_function(name))
return IDENTIFIER;
else if (state->symbols->get_type(name))
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 05fa4ea9ac5..97648c15ccc 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -121,7 +121,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
ast_case_statement *case_statement;
ast_case_statement_list *case_statement_list;
ast_interface_block *interface_block;
-
+ ast_subroutine_list *subroutine_list;
struct {
ast_node *cond;
ast_expression *rest;
@@ -134,7 +134,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
}
%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK
-%token BREAK CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
+%token BREAK BUFFER CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 DVEC2 DVEC3 DVEC4
%token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE
%token NOPERSPECTIVE FLAT SMOOTH
@@ -186,7 +186,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
%token PRAGMA_OPTIMIZE_ON PRAGMA_OPTIMIZE_OFF
%token PRAGMA_INVARIANT_ALL
%token LAYOUT_TOK
-
+%token DOT_TOK
/* Reserved words that are not actually used in the grammar.
*/
%token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO
@@ -215,6 +215,8 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
%type layout_qualifier_id_list layout_qualifier_id
%type interface_block_layout_qualifier
%type memory_qualifier
+%type subroutine_qualifier
+%type