mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-08 17:10:11 +01:00
Merge branch 'vulkan'
This commit is contained in:
commit
cab30cc5f9
106 changed files with 57524 additions and 0 deletions
|
|
@ -2621,6 +2621,11 @@ AC_CONFIG_FILES([Makefile
|
|||
src/glx/apple/Makefile
|
||||
src/glx/tests/Makefile
|
||||
src/gtest/Makefile
|
||||
src/intel/Makefile
|
||||
src/intel/genxml/Makefile
|
||||
src/intel/isl/Makefile
|
||||
src/intel/vulkan/Makefile
|
||||
src/intel/vulkan/tests/Makefile
|
||||
src/loader/Makefile
|
||||
src/mapi/Makefile
|
||||
src/mapi/es1api/glesv1_cm.pc
|
||||
|
|
|
|||
85
include/vulkan/vk_icd.h
Normal file
85
include/vulkan/vk_icd.h
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
#ifndef VKICD_H
|
||||
#define VKICD_H
|
||||
|
||||
#include "vk_platform.h"
|
||||
|
||||
/*
|
||||
* The ICD must reserve space for a pointer for the loader's dispatch
|
||||
* table, at the start of <each object>.
|
||||
* The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro.
|
||||
*/
|
||||
|
||||
#define ICD_LOADER_MAGIC 0x01CDC0DE
|
||||
|
||||
typedef union _VK_LOADER_DATA {
|
||||
uintptr_t loaderMagic;
|
||||
void *loaderData;
|
||||
} VK_LOADER_DATA;
|
||||
|
||||
static inline void set_loader_magic_value(void* pNewObject) {
|
||||
VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
|
||||
loader_info->loaderMagic = ICD_LOADER_MAGIC;
|
||||
}
|
||||
|
||||
static inline bool valid_loader_magic_value(void* pNewObject) {
|
||||
const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
|
||||
return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC;
|
||||
}
|
||||
|
||||
/*
|
||||
* Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that
|
||||
* contains the platform-specific connection and surface information.
|
||||
*/
|
||||
typedef enum _VkIcdWsiPlatform {
|
||||
VK_ICD_WSI_PLATFORM_MIR,
|
||||
VK_ICD_WSI_PLATFORM_WAYLAND,
|
||||
VK_ICD_WSI_PLATFORM_WIN32,
|
||||
VK_ICD_WSI_PLATFORM_XCB,
|
||||
VK_ICD_WSI_PLATFORM_XLIB,
|
||||
} VkIcdWsiPlatform;
|
||||
|
||||
typedef struct _VkIcdSurfaceBase {
|
||||
VkIcdWsiPlatform platform;
|
||||
} VkIcdSurfaceBase;
|
||||
|
||||
#ifdef VK_USE_PLATFORM_MIR_KHR
|
||||
typedef struct _VkIcdSurfaceMir {
|
||||
VkIcdSurfaceBase base;
|
||||
MirConnection* connection;
|
||||
MirSurface* mirSurface;
|
||||
} VkIcdSurfaceMir;
|
||||
#endif // VK_USE_PLATFORM_MIR_KHR
|
||||
|
||||
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
|
||||
typedef struct _VkIcdSurfaceWayland {
|
||||
VkIcdSurfaceBase base;
|
||||
struct wl_display* display;
|
||||
struct wl_surface* surface;
|
||||
} VkIcdSurfaceWayland;
|
||||
#endif // VK_USE_PLATFORM_WAYLAND_KHR
|
||||
|
||||
#ifdef VK_USE_PLATFORM_WIN32_KHR
|
||||
typedef struct _VkIcdSurfaceWin32 {
|
||||
VkIcdSurfaceBase base;
|
||||
HINSTANCE hinstance;
|
||||
HWND hwnd;
|
||||
} VkIcdSurfaceWin32;
|
||||
#endif // VK_USE_PLATFORM_WIN32_KHR
|
||||
|
||||
#ifdef VK_USE_PLATFORM_XCB_KHR
|
||||
typedef struct _VkIcdSurfaceXcb {
|
||||
VkIcdSurfaceBase base;
|
||||
xcb_connection_t* connection;
|
||||
xcb_window_t window;
|
||||
} VkIcdSurfaceXcb;
|
||||
#endif // VK_USE_PLATFORM_XCB_KHR
|
||||
|
||||
#ifdef VK_USE_PLATFORM_XLIB_KHR
|
||||
typedef struct _VkIcdSurfaceXlib {
|
||||
VkIcdSurfaceBase base;
|
||||
Display* dpy;
|
||||
Window window;
|
||||
} VkIcdSurfaceXlib;
|
||||
#endif // VK_USE_PLATFORM_XLIB_KHR
|
||||
|
||||
#endif // VKICD_H
|
||||
127
include/vulkan/vk_platform.h
Normal file
127
include/vulkan/vk_platform.h
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
//
|
||||
// File: vk_platform.h
|
||||
//
|
||||
/*
|
||||
** Copyright (c) 2014-2015 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a
|
||||
** copy of this software and/or associated documentation files (the
|
||||
** "Materials"), to deal in the Materials without restriction, including
|
||||
** without limitation the rights to use, copy, modify, merge, publish,
|
||||
** distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
** permit persons to whom the Materials are furnished to do so, subject to
|
||||
** the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included
|
||||
** in all copies or substantial portions of the Materials.
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VK_PLATFORM_H_
|
||||
#define VK_PLATFORM_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif // __cplusplus
|
||||
|
||||
/*
|
||||
***************************************************************************************************
|
||||
* Platform-specific directives and type declarations
|
||||
***************************************************************************************************
|
||||
*/
|
||||
|
||||
/* Platform-specific calling convention macros.
|
||||
*
|
||||
* Platforms should define these so that Vulkan clients call Vulkan commands
|
||||
* with the same calling conventions that the Vulkan implementation expects.
|
||||
*
|
||||
* VKAPI_ATTR - Placed before the return type in function declarations.
|
||||
* Useful for C++11 and GCC/Clang-style function attribute syntax.
|
||||
* VKAPI_CALL - Placed after the return type in function declarations.
|
||||
* Useful for MSVC-style calling convention syntax.
|
||||
* VKAPI_PTR - Placed between the '(' and '*' in function pointer types.
|
||||
*
|
||||
* Function declaration: VKAPI_ATTR void VKAPI_CALL vkCommand(void);
|
||||
* Function pointer type: typedef void (VKAPI_PTR *PFN_vkCommand)(void);
|
||||
*/
|
||||
#if defined(_WIN32)
|
||||
// On Windows, Vulkan commands use the stdcall convention
|
||||
#define VKAPI_ATTR
|
||||
#define VKAPI_CALL __stdcall
|
||||
#define VKAPI_PTR VKAPI_CALL
|
||||
#elif defined(__ANDROID__) && defined(__ARM_EABI__) && !defined(__ARM_ARCH_7A__)
|
||||
// Android does not support Vulkan in native code using the "armeabi" ABI.
|
||||
#error "Vulkan requires the 'armeabi-v7a' or 'armeabi-v7a-hard' ABI on 32-bit ARM CPUs"
|
||||
#elif defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
|
||||
// On Android/ARMv7a, Vulkan functions use the armeabi-v7a-hard calling
|
||||
// convention, even if the application's native code is compiled with the
|
||||
// armeabi-v7a calling convention.
|
||||
#define VKAPI_ATTR __attribute__((pcs("aapcs-vfp")))
|
||||
#define VKAPI_CALL
|
||||
#define VKAPI_PTR VKAPI_ATTR
|
||||
#else
|
||||
// On other platforms, use the default calling convention
|
||||
#define VKAPI_ATTR
|
||||
#define VKAPI_CALL
|
||||
#define VKAPI_PTR
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#if !defined(VK_NO_STDINT_H)
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1600)
|
||||
typedef signed __int8 int8_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef signed __int16 int16_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef signed __int32 int32_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
#endif // !defined(VK_NO_STDINT_H)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
// Platform-specific headers required by platform window system extensions.
|
||||
// These are enabled prior to #including "vulkan.h". The same enable then
|
||||
// controls inclusion of the extension interfaces in vulkan.h.
|
||||
|
||||
#ifdef VK_USE_PLATFORM_ANDROID_KHR
|
||||
#include <android/native_window.h>
|
||||
#endif
|
||||
|
||||
#ifdef VK_USE_PLATFORM_MIR_KHR
|
||||
#include <mir_toolkit/client_types.h>
|
||||
#endif
|
||||
|
||||
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
|
||||
#include <wayland-client.h>
|
||||
#endif
|
||||
|
||||
#ifdef VK_USE_PLATFORM_WIN32_KHR
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#ifdef VK_USE_PLATFORM_XLIB_KHR
|
||||
#include <X11/Xlib.h>
|
||||
#endif
|
||||
|
||||
#ifdef VK_USE_PLATFORM_XCB_KHR
|
||||
#include <xcb/xcb.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
3800
include/vulkan/vulkan.h
Normal file
3800
include/vulkan/vulkan.h
Normal file
File diff suppressed because it is too large
Load diff
62
include/vulkan/vulkan_intel.h
Normal file
62
include/vulkan/vulkan_intel.h
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __VULKAN_INTEL_H__
|
||||
#define __VULKAN_INTEL_H__
|
||||
|
||||
#include "vulkan.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif // __cplusplus
|
||||
|
||||
#define VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL 1024
|
||||
typedef struct VkDmaBufImageCreateInfo_
|
||||
{
|
||||
VkStructureType sType; // Must be VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL
|
||||
const void* pNext; // Pointer to next structure.
|
||||
int fd;
|
||||
VkFormat format;
|
||||
VkExtent3D extent; // Depth must be 1
|
||||
uint32_t strideInBytes;
|
||||
} VkDmaBufImageCreateInfo;
|
||||
|
||||
typedef VkResult (VKAPI_PTR *PFN_vkCreateDmaBufImageINTEL)(VkDevice device, const VkDmaBufImageCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMem, VkImage* pImage);
|
||||
|
||||
#ifdef VK_PROTOTYPES
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL vkCreateDmaBufImageINTEL(
|
||||
VkDevice _device,
|
||||
const VkDmaBufImageCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkDeviceMemory* pMem,
|
||||
VkImage* pImage);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // __VULKAN_INTEL_H__
|
||||
|
|
@ -56,6 +56,10 @@ EXTRA_DIST = \
|
|||
AM_CFLAGS = $(VISIBILITY_CFLAGS)
|
||||
AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
|
||||
|
||||
if HAVE_INTEL_DRIVERS
|
||||
SUBDIRS += intel
|
||||
endif
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include/ \
|
||||
-I$(top_srcdir)/src/mapi/ \
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ nir_libnir_la_LIBADD = \
|
|||
|
||||
nir_libnir_la_SOURCES = \
|
||||
$(NIR_FILES) \
|
||||
$(SPIRV_FILES) \
|
||||
$(NIR_GENERATED_FILES)
|
||||
|
||||
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
|
||||
|
|
|
|||
|
|
@ -233,3 +233,12 @@ NIR_FILES = \
|
|||
nir/nir_vla.h \
|
||||
nir/nir_worklist.c \
|
||||
nir/nir_worklist.h
|
||||
|
||||
SPIRV_FILES = \
|
||||
spirv/nir_spirv.h \
|
||||
spirv/spirv_to_nir.c \
|
||||
spirv/vtn_alu.c \
|
||||
spirv/vtn_cfg.c \
|
||||
spirv/vtn_glsl450.c \
|
||||
spirv/vtn_private.h \
|
||||
spirv/vtn_variables.c
|
||||
|
|
|
|||
|
|
@ -345,6 +345,9 @@ LOAD(output, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
|||
LOAD(per_vertex_output, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base, range } */
|
||||
LOAD(push_constant, 1, 2, BASE, RANGE, xx,
|
||||
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
|
||||
|
||||
/*
|
||||
* Stores work the same way as loads, except now the first source is the value
|
||||
|
|
|
|||
127
src/compiler/spirv/GLSL.std.450.h
Normal file
127
src/compiler/spirv/GLSL.std.450.h
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
** Copyright (c) 2014-2015 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
** of this software and/or associated documentation files (the "Materials"),
|
||||
** to deal in the Materials without restriction, including without limitation
|
||||
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
** and/or sell copies of the Materials, and to permit persons to whom the
|
||||
** Materials are furnished to do so, subject to the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included in
|
||||
** all copies or substantial portions of the Materials.
|
||||
**
|
||||
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
|
||||
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
|
||||
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
|
||||
** IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
#ifndef GLSLstd450_H
|
||||
#define GLSLstd450_H
|
||||
|
||||
const int GLSLstd450Version = 99;
|
||||
const int GLSLstd450Revision = 3;
|
||||
|
||||
enum GLSLstd450 {
|
||||
GLSLstd450Bad = 0, // Don't use
|
||||
|
||||
GLSLstd450Round = 1,
|
||||
GLSLstd450RoundEven = 2,
|
||||
GLSLstd450Trunc = 3,
|
||||
GLSLstd450FAbs = 4,
|
||||
GLSLstd450SAbs = 5,
|
||||
GLSLstd450FSign = 6,
|
||||
GLSLstd450SSign = 7,
|
||||
GLSLstd450Floor = 8,
|
||||
GLSLstd450Ceil = 9,
|
||||
GLSLstd450Fract = 10,
|
||||
|
||||
GLSLstd450Radians = 11,
|
||||
GLSLstd450Degrees = 12,
|
||||
GLSLstd450Sin = 13,
|
||||
GLSLstd450Cos = 14,
|
||||
GLSLstd450Tan = 15,
|
||||
GLSLstd450Asin = 16,
|
||||
GLSLstd450Acos = 17,
|
||||
GLSLstd450Atan = 18,
|
||||
GLSLstd450Sinh = 19,
|
||||
GLSLstd450Cosh = 20,
|
||||
GLSLstd450Tanh = 21,
|
||||
GLSLstd450Asinh = 22,
|
||||
GLSLstd450Acosh = 23,
|
||||
GLSLstd450Atanh = 24,
|
||||
GLSLstd450Atan2 = 25,
|
||||
|
||||
GLSLstd450Pow = 26,
|
||||
GLSLstd450Exp = 27,
|
||||
GLSLstd450Log = 28,
|
||||
GLSLstd450Exp2 = 29,
|
||||
GLSLstd450Log2 = 30,
|
||||
GLSLstd450Sqrt = 31,
|
||||
GLSLstd450InverseSqrt = 32,
|
||||
|
||||
GLSLstd450Determinant = 33,
|
||||
GLSLstd450MatrixInverse = 34,
|
||||
|
||||
GLSLstd450Modf = 35, // second operand needs an OpVariable to write to
|
||||
GLSLstd450ModfStruct = 36, // no OpVariable operand
|
||||
GLSLstd450FMin = 37,
|
||||
GLSLstd450UMin = 38,
|
||||
GLSLstd450SMin = 39,
|
||||
GLSLstd450FMax = 40,
|
||||
GLSLstd450UMax = 41,
|
||||
GLSLstd450SMax = 42,
|
||||
GLSLstd450FClamp = 43,
|
||||
GLSLstd450UClamp = 44,
|
||||
GLSLstd450SClamp = 45,
|
||||
GLSLstd450FMix = 46,
|
||||
GLSLstd450IMix = 47,
|
||||
GLSLstd450Step = 48,
|
||||
GLSLstd450SmoothStep = 49,
|
||||
|
||||
GLSLstd450Fma = 50,
|
||||
GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to
|
||||
GLSLstd450FrexpStruct = 52, // no OpVariable operand
|
||||
GLSLstd450Ldexp = 53,
|
||||
|
||||
GLSLstd450PackSnorm4x8 = 54,
|
||||
GLSLstd450PackUnorm4x8 = 55,
|
||||
GLSLstd450PackSnorm2x16 = 56,
|
||||
GLSLstd450PackUnorm2x16 = 57,
|
||||
GLSLstd450PackHalf2x16 = 58,
|
||||
GLSLstd450PackDouble2x32 = 59,
|
||||
GLSLstd450UnpackSnorm2x16 = 60,
|
||||
GLSLstd450UnpackUnorm2x16 = 61,
|
||||
GLSLstd450UnpackHalf2x16 = 62,
|
||||
GLSLstd450UnpackSnorm4x8 = 63,
|
||||
GLSLstd450UnpackUnorm4x8 = 64,
|
||||
GLSLstd450UnpackDouble2x32 = 65,
|
||||
|
||||
GLSLstd450Length = 66,
|
||||
GLSLstd450Distance = 67,
|
||||
GLSLstd450Cross = 68,
|
||||
GLSLstd450Normalize = 69,
|
||||
GLSLstd450FaceForward = 70,
|
||||
GLSLstd450Reflect = 71,
|
||||
GLSLstd450Refract = 72,
|
||||
|
||||
GLSLstd450FindILsb = 73,
|
||||
GLSLstd450FindSMsb = 74,
|
||||
GLSLstd450FindUMsb = 75,
|
||||
|
||||
GLSLstd450InterpolateAtCentroid = 76,
|
||||
GLSLstd450InterpolateAtSample = 77,
|
||||
GLSLstd450InterpolateAtOffset = 78,
|
||||
|
||||
GLSLstd450Count
|
||||
};
|
||||
|
||||
#endif // #ifndef GLSLstd450_H
|
||||
54
src/compiler/spirv/nir_spirv.h
Normal file
54
src/compiler/spirv/nir_spirv.h
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jason Ekstrand (jason@jlekstrand.net)
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef _NIR_SPIRV_H_
|
||||
#define _NIR_SPIRV_H_
|
||||
|
||||
#include "nir/nir.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct nir_spirv_specialization {
|
||||
uint32_t id;
|
||||
uint32_t data;
|
||||
};
|
||||
|
||||
nir_function *spirv_to_nir(const uint32_t *words, size_t word_count,
|
||||
struct nir_spirv_specialization *specializations,
|
||||
unsigned num_specializations,
|
||||
gl_shader_stage stage, const char *entry_point_name,
|
||||
const nir_shader_compiler_options *options);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _NIR_SPIRV_H_ */
|
||||
870
src/compiler/spirv/spirv.h
Normal file
870
src/compiler/spirv/spirv.h
Normal file
|
|
@ -0,0 +1,870 @@
|
|||
/*
|
||||
** Copyright (c) 2014-2015 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
** of this software and/or associated documentation files (the "Materials"),
|
||||
** to deal in the Materials without restriction, including without limitation
|
||||
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
** and/or sell copies of the Materials, and to permit persons to whom the
|
||||
** Materials are furnished to do so, subject to the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included in
|
||||
** all copies or substantial portions of the Materials.
|
||||
**
|
||||
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
|
||||
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
|
||||
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
|
||||
** IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
/*
|
||||
** This header is automatically generated by the same tool that creates
|
||||
** the Binary Section of the SPIR-V specification.
|
||||
*/
|
||||
|
||||
/*
|
||||
** Enumeration tokens for SPIR-V, in various styles:
|
||||
** C, C++, C++11, JSON, Lua, Python
|
||||
**
|
||||
** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL
|
||||
** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL
|
||||
** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL
|
||||
** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL
|
||||
** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL']
|
||||
**
|
||||
** Some tokens act like mask values, which can be OR'd together,
|
||||
** while others are mutually exclusive. The mask-like ones have
|
||||
** "Mask" in their name, and a parallel enum that has the shift
|
||||
** amount (1 << x) for each corresponding enumerant.
|
||||
*/
|
||||
|
||||
#ifndef spirv_H
|
||||
#define spirv_H
|
||||
|
||||
typedef unsigned int SpvId;
|
||||
|
||||
#define SPV_VERSION 0x10000
|
||||
#define SPV_REVISION 2
|
||||
|
||||
static const unsigned int SpvMagicNumber = 0x07230203;
|
||||
static const unsigned int SpvVersion = 0x00010000;
|
||||
static const unsigned int SpvRevision = 2;
|
||||
static const unsigned int SpvOpCodeMask = 0xffff;
|
||||
static const unsigned int SpvWordCountShift = 16;
|
||||
|
||||
typedef enum SpvSourceLanguage_ {
|
||||
SpvSourceLanguageUnknown = 0,
|
||||
SpvSourceLanguageESSL = 1,
|
||||
SpvSourceLanguageGLSL = 2,
|
||||
SpvSourceLanguageOpenCL_C = 3,
|
||||
SpvSourceLanguageOpenCL_CPP = 4,
|
||||
} SpvSourceLanguage;
|
||||
|
||||
typedef enum SpvExecutionModel_ {
|
||||
SpvExecutionModelVertex = 0,
|
||||
SpvExecutionModelTessellationControl = 1,
|
||||
SpvExecutionModelTessellationEvaluation = 2,
|
||||
SpvExecutionModelGeometry = 3,
|
||||
SpvExecutionModelFragment = 4,
|
||||
SpvExecutionModelGLCompute = 5,
|
||||
SpvExecutionModelKernel = 6,
|
||||
} SpvExecutionModel;
|
||||
|
||||
typedef enum SpvAddressingModel_ {
|
||||
SpvAddressingModelLogical = 0,
|
||||
SpvAddressingModelPhysical32 = 1,
|
||||
SpvAddressingModelPhysical64 = 2,
|
||||
} SpvAddressingModel;
|
||||
|
||||
typedef enum SpvMemoryModel_ {
|
||||
SpvMemoryModelSimple = 0,
|
||||
SpvMemoryModelGLSL450 = 1,
|
||||
SpvMemoryModelOpenCL = 2,
|
||||
} SpvMemoryModel;
|
||||
|
||||
typedef enum SpvExecutionMode_ {
|
||||
SpvExecutionModeInvocations = 0,
|
||||
SpvExecutionModeSpacingEqual = 1,
|
||||
SpvExecutionModeSpacingFractionalEven = 2,
|
||||
SpvExecutionModeSpacingFractionalOdd = 3,
|
||||
SpvExecutionModeVertexOrderCw = 4,
|
||||
SpvExecutionModeVertexOrderCcw = 5,
|
||||
SpvExecutionModePixelCenterInteger = 6,
|
||||
SpvExecutionModeOriginUpperLeft = 7,
|
||||
SpvExecutionModeOriginLowerLeft = 8,
|
||||
SpvExecutionModeEarlyFragmentTests = 9,
|
||||
SpvExecutionModePointMode = 10,
|
||||
SpvExecutionModeXfb = 11,
|
||||
SpvExecutionModeDepthReplacing = 12,
|
||||
SpvExecutionModeDepthGreater = 14,
|
||||
SpvExecutionModeDepthLess = 15,
|
||||
SpvExecutionModeDepthUnchanged = 16,
|
||||
SpvExecutionModeLocalSize = 17,
|
||||
SpvExecutionModeLocalSizeHint = 18,
|
||||
SpvExecutionModeInputPoints = 19,
|
||||
SpvExecutionModeInputLines = 20,
|
||||
SpvExecutionModeInputLinesAdjacency = 21,
|
||||
SpvExecutionModeTriangles = 22,
|
||||
SpvExecutionModeInputTrianglesAdjacency = 23,
|
||||
SpvExecutionModeQuads = 24,
|
||||
SpvExecutionModeIsolines = 25,
|
||||
SpvExecutionModeOutputVertices = 26,
|
||||
SpvExecutionModeOutputPoints = 27,
|
||||
SpvExecutionModeOutputLineStrip = 28,
|
||||
SpvExecutionModeOutputTriangleStrip = 29,
|
||||
SpvExecutionModeVecTypeHint = 30,
|
||||
SpvExecutionModeContractionOff = 31,
|
||||
} SpvExecutionMode;
|
||||
|
||||
typedef enum SpvStorageClass_ {
|
||||
SpvStorageClassUniformConstant = 0,
|
||||
SpvStorageClassInput = 1,
|
||||
SpvStorageClassUniform = 2,
|
||||
SpvStorageClassOutput = 3,
|
||||
SpvStorageClassWorkgroup = 4,
|
||||
SpvStorageClassCrossWorkgroup = 5,
|
||||
SpvStorageClassPrivate = 6,
|
||||
SpvStorageClassFunction = 7,
|
||||
SpvStorageClassGeneric = 8,
|
||||
SpvStorageClassPushConstant = 9,
|
||||
SpvStorageClassAtomicCounter = 10,
|
||||
SpvStorageClassImage = 11,
|
||||
} SpvStorageClass;
|
||||
|
||||
typedef enum SpvDim_ {
|
||||
SpvDim1D = 0,
|
||||
SpvDim2D = 1,
|
||||
SpvDim3D = 2,
|
||||
SpvDimCube = 3,
|
||||
SpvDimRect = 4,
|
||||
SpvDimBuffer = 5,
|
||||
SpvDimSubpassData = 6,
|
||||
} SpvDim;
|
||||
|
||||
typedef enum SpvSamplerAddressingMode_ {
|
||||
SpvSamplerAddressingModeNone = 0,
|
||||
SpvSamplerAddressingModeClampToEdge = 1,
|
||||
SpvSamplerAddressingModeClamp = 2,
|
||||
SpvSamplerAddressingModeRepeat = 3,
|
||||
SpvSamplerAddressingModeRepeatMirrored = 4,
|
||||
} SpvSamplerAddressingMode;
|
||||
|
||||
typedef enum SpvSamplerFilterMode_ {
|
||||
SpvSamplerFilterModeNearest = 0,
|
||||
SpvSamplerFilterModeLinear = 1,
|
||||
} SpvSamplerFilterMode;
|
||||
|
||||
typedef enum SpvImageFormat_ {
|
||||
SpvImageFormatUnknown = 0,
|
||||
SpvImageFormatRgba32f = 1,
|
||||
SpvImageFormatRgba16f = 2,
|
||||
SpvImageFormatR32f = 3,
|
||||
SpvImageFormatRgba8 = 4,
|
||||
SpvImageFormatRgba8Snorm = 5,
|
||||
SpvImageFormatRg32f = 6,
|
||||
SpvImageFormatRg16f = 7,
|
||||
SpvImageFormatR11fG11fB10f = 8,
|
||||
SpvImageFormatR16f = 9,
|
||||
SpvImageFormatRgba16 = 10,
|
||||
SpvImageFormatRgb10A2 = 11,
|
||||
SpvImageFormatRg16 = 12,
|
||||
SpvImageFormatRg8 = 13,
|
||||
SpvImageFormatR16 = 14,
|
||||
SpvImageFormatR8 = 15,
|
||||
SpvImageFormatRgba16Snorm = 16,
|
||||
SpvImageFormatRg16Snorm = 17,
|
||||
SpvImageFormatRg8Snorm = 18,
|
||||
SpvImageFormatR16Snorm = 19,
|
||||
SpvImageFormatR8Snorm = 20,
|
||||
SpvImageFormatRgba32i = 21,
|
||||
SpvImageFormatRgba16i = 22,
|
||||
SpvImageFormatRgba8i = 23,
|
||||
SpvImageFormatR32i = 24,
|
||||
SpvImageFormatRg32i = 25,
|
||||
SpvImageFormatRg16i = 26,
|
||||
SpvImageFormatRg8i = 27,
|
||||
SpvImageFormatR16i = 28,
|
||||
SpvImageFormatR8i = 29,
|
||||
SpvImageFormatRgba32ui = 30,
|
||||
SpvImageFormatRgba16ui = 31,
|
||||
SpvImageFormatRgba8ui = 32,
|
||||
SpvImageFormatR32ui = 33,
|
||||
SpvImageFormatRgb10a2ui = 34,
|
||||
SpvImageFormatRg32ui = 35,
|
||||
SpvImageFormatRg16ui = 36,
|
||||
SpvImageFormatRg8ui = 37,
|
||||
SpvImageFormatR16ui = 38,
|
||||
SpvImageFormatR8ui = 39,
|
||||
} SpvImageFormat;
|
||||
|
||||
typedef enum SpvImageChannelOrder_ {
|
||||
SpvImageChannelOrderR = 0,
|
||||
SpvImageChannelOrderA = 1,
|
||||
SpvImageChannelOrderRG = 2,
|
||||
SpvImageChannelOrderRA = 3,
|
||||
SpvImageChannelOrderRGB = 4,
|
||||
SpvImageChannelOrderRGBA = 5,
|
||||
SpvImageChannelOrderBGRA = 6,
|
||||
SpvImageChannelOrderARGB = 7,
|
||||
SpvImageChannelOrderIntensity = 8,
|
||||
SpvImageChannelOrderLuminance = 9,
|
||||
SpvImageChannelOrderRx = 10,
|
||||
SpvImageChannelOrderRGx = 11,
|
||||
SpvImageChannelOrderRGBx = 12,
|
||||
SpvImageChannelOrderDepth = 13,
|
||||
SpvImageChannelOrderDepthStencil = 14,
|
||||
SpvImageChannelOrdersRGB = 15,
|
||||
SpvImageChannelOrdersRGBx = 16,
|
||||
SpvImageChannelOrdersRGBA = 17,
|
||||
SpvImageChannelOrdersBGRA = 18,
|
||||
} SpvImageChannelOrder;
|
||||
|
||||
typedef enum SpvImageChannelDataType_ {
|
||||
SpvImageChannelDataTypeSnormInt8 = 0,
|
||||
SpvImageChannelDataTypeSnormInt16 = 1,
|
||||
SpvImageChannelDataTypeUnormInt8 = 2,
|
||||
SpvImageChannelDataTypeUnormInt16 = 3,
|
||||
SpvImageChannelDataTypeUnormShort565 = 4,
|
||||
SpvImageChannelDataTypeUnormShort555 = 5,
|
||||
SpvImageChannelDataTypeUnormInt101010 = 6,
|
||||
SpvImageChannelDataTypeSignedInt8 = 7,
|
||||
SpvImageChannelDataTypeSignedInt16 = 8,
|
||||
SpvImageChannelDataTypeSignedInt32 = 9,
|
||||
SpvImageChannelDataTypeUnsignedInt8 = 10,
|
||||
SpvImageChannelDataTypeUnsignedInt16 = 11,
|
||||
SpvImageChannelDataTypeUnsignedInt32 = 12,
|
||||
SpvImageChannelDataTypeHalfFloat = 13,
|
||||
SpvImageChannelDataTypeFloat = 14,
|
||||
SpvImageChannelDataTypeUnormInt24 = 15,
|
||||
SpvImageChannelDataTypeUnormInt101010_2 = 16,
|
||||
} SpvImageChannelDataType;
|
||||
|
||||
typedef enum SpvImageOperandsShift_ {
|
||||
SpvImageOperandsBiasShift = 0,
|
||||
SpvImageOperandsLodShift = 1,
|
||||
SpvImageOperandsGradShift = 2,
|
||||
SpvImageOperandsConstOffsetShift = 3,
|
||||
SpvImageOperandsOffsetShift = 4,
|
||||
SpvImageOperandsConstOffsetsShift = 5,
|
||||
SpvImageOperandsSampleShift = 6,
|
||||
SpvImageOperandsMinLodShift = 7,
|
||||
} SpvImageOperandsShift;
|
||||
|
||||
typedef enum SpvImageOperandsMask_ {
|
||||
SpvImageOperandsMaskNone = 0,
|
||||
SpvImageOperandsBiasMask = 0x00000001,
|
||||
SpvImageOperandsLodMask = 0x00000002,
|
||||
SpvImageOperandsGradMask = 0x00000004,
|
||||
SpvImageOperandsConstOffsetMask = 0x00000008,
|
||||
SpvImageOperandsOffsetMask = 0x00000010,
|
||||
SpvImageOperandsConstOffsetsMask = 0x00000020,
|
||||
SpvImageOperandsSampleMask = 0x00000040,
|
||||
SpvImageOperandsMinLodMask = 0x00000080,
|
||||
} SpvImageOperandsMask;
|
||||
|
||||
typedef enum SpvFPFastMathModeShift_ {
|
||||
SpvFPFastMathModeNotNaNShift = 0,
|
||||
SpvFPFastMathModeNotInfShift = 1,
|
||||
SpvFPFastMathModeNSZShift = 2,
|
||||
SpvFPFastMathModeAllowRecipShift = 3,
|
||||
SpvFPFastMathModeFastShift = 4,
|
||||
} SpvFPFastMathModeShift;
|
||||
|
||||
typedef enum SpvFPFastMathModeMask_ {
|
||||
SpvFPFastMathModeMaskNone = 0,
|
||||
SpvFPFastMathModeNotNaNMask = 0x00000001,
|
||||
SpvFPFastMathModeNotInfMask = 0x00000002,
|
||||
SpvFPFastMathModeNSZMask = 0x00000004,
|
||||
SpvFPFastMathModeAllowRecipMask = 0x00000008,
|
||||
SpvFPFastMathModeFastMask = 0x00000010,
|
||||
} SpvFPFastMathModeMask;
|
||||
|
||||
typedef enum SpvFPRoundingMode_ {
|
||||
SpvFPRoundingModeRTE = 0,
|
||||
SpvFPRoundingModeRTZ = 1,
|
||||
SpvFPRoundingModeRTP = 2,
|
||||
SpvFPRoundingModeRTN = 3,
|
||||
} SpvFPRoundingMode;
|
||||
|
||||
typedef enum SpvLinkageType_ {
|
||||
SpvLinkageTypeExport = 0,
|
||||
SpvLinkageTypeImport = 1,
|
||||
} SpvLinkageType;
|
||||
|
||||
typedef enum SpvAccessQualifier_ {
|
||||
SpvAccessQualifierReadOnly = 0,
|
||||
SpvAccessQualifierWriteOnly = 1,
|
||||
SpvAccessQualifierReadWrite = 2,
|
||||
} SpvAccessQualifier;
|
||||
|
||||
typedef enum SpvFunctionParameterAttribute_ {
|
||||
SpvFunctionParameterAttributeZext = 0,
|
||||
SpvFunctionParameterAttributeSext = 1,
|
||||
SpvFunctionParameterAttributeByVal = 2,
|
||||
SpvFunctionParameterAttributeSret = 3,
|
||||
SpvFunctionParameterAttributeNoAlias = 4,
|
||||
SpvFunctionParameterAttributeNoCapture = 5,
|
||||
SpvFunctionParameterAttributeNoWrite = 6,
|
||||
SpvFunctionParameterAttributeNoReadWrite = 7,
|
||||
} SpvFunctionParameterAttribute;
|
||||
|
||||
typedef enum SpvDecoration_ {
|
||||
SpvDecorationRelaxedPrecision = 0,
|
||||
SpvDecorationSpecId = 1,
|
||||
SpvDecorationBlock = 2,
|
||||
SpvDecorationBufferBlock = 3,
|
||||
SpvDecorationRowMajor = 4,
|
||||
SpvDecorationColMajor = 5,
|
||||
SpvDecorationArrayStride = 6,
|
||||
SpvDecorationMatrixStride = 7,
|
||||
SpvDecorationGLSLShared = 8,
|
||||
SpvDecorationGLSLPacked = 9,
|
||||
SpvDecorationCPacked = 10,
|
||||
SpvDecorationBuiltIn = 11,
|
||||
SpvDecorationNoPerspective = 13,
|
||||
SpvDecorationFlat = 14,
|
||||
SpvDecorationPatch = 15,
|
||||
SpvDecorationCentroid = 16,
|
||||
SpvDecorationSample = 17,
|
||||
SpvDecorationInvariant = 18,
|
||||
SpvDecorationRestrict = 19,
|
||||
SpvDecorationAliased = 20,
|
||||
SpvDecorationVolatile = 21,
|
||||
SpvDecorationConstant = 22,
|
||||
SpvDecorationCoherent = 23,
|
||||
SpvDecorationNonWritable = 24,
|
||||
SpvDecorationNonReadable = 25,
|
||||
SpvDecorationUniform = 26,
|
||||
SpvDecorationSaturatedConversion = 28,
|
||||
SpvDecorationStream = 29,
|
||||
SpvDecorationLocation = 30,
|
||||
SpvDecorationComponent = 31,
|
||||
SpvDecorationIndex = 32,
|
||||
SpvDecorationBinding = 33,
|
||||
SpvDecorationDescriptorSet = 34,
|
||||
SpvDecorationOffset = 35,
|
||||
SpvDecorationXfbBuffer = 36,
|
||||
SpvDecorationXfbStride = 37,
|
||||
SpvDecorationFuncParamAttr = 38,
|
||||
SpvDecorationFPRoundingMode = 39,
|
||||
SpvDecorationFPFastMathMode = 40,
|
||||
SpvDecorationLinkageAttributes = 41,
|
||||
SpvDecorationNoContraction = 42,
|
||||
SpvDecorationInputAttachmentIndex = 43,
|
||||
SpvDecorationAlignment = 44,
|
||||
} SpvDecoration;
|
||||
|
||||
typedef enum SpvBuiltIn_ {
|
||||
SpvBuiltInPosition = 0,
|
||||
SpvBuiltInPointSize = 1,
|
||||
SpvBuiltInClipDistance = 3,
|
||||
SpvBuiltInCullDistance = 4,
|
||||
SpvBuiltInVertexId = 5,
|
||||
SpvBuiltInInstanceId = 6,
|
||||
SpvBuiltInPrimitiveId = 7,
|
||||
SpvBuiltInInvocationId = 8,
|
||||
SpvBuiltInLayer = 9,
|
||||
SpvBuiltInViewportIndex = 10,
|
||||
SpvBuiltInTessLevelOuter = 11,
|
||||
SpvBuiltInTessLevelInner = 12,
|
||||
SpvBuiltInTessCoord = 13,
|
||||
SpvBuiltInPatchVertices = 14,
|
||||
SpvBuiltInFragCoord = 15,
|
||||
SpvBuiltInPointCoord = 16,
|
||||
SpvBuiltInFrontFacing = 17,
|
||||
SpvBuiltInSampleId = 18,
|
||||
SpvBuiltInSamplePosition = 19,
|
||||
SpvBuiltInSampleMask = 20,
|
||||
SpvBuiltInFragDepth = 22,
|
||||
SpvBuiltInHelperInvocation = 23,
|
||||
SpvBuiltInNumWorkgroups = 24,
|
||||
SpvBuiltInWorkgroupSize = 25,
|
||||
SpvBuiltInWorkgroupId = 26,
|
||||
SpvBuiltInLocalInvocationId = 27,
|
||||
SpvBuiltInGlobalInvocationId = 28,
|
||||
SpvBuiltInLocalInvocationIndex = 29,
|
||||
SpvBuiltInWorkDim = 30,
|
||||
SpvBuiltInGlobalSize = 31,
|
||||
SpvBuiltInEnqueuedWorkgroupSize = 32,
|
||||
SpvBuiltInGlobalOffset = 33,
|
||||
SpvBuiltInGlobalLinearId = 34,
|
||||
SpvBuiltInSubgroupSize = 36,
|
||||
SpvBuiltInSubgroupMaxSize = 37,
|
||||
SpvBuiltInNumSubgroups = 38,
|
||||
SpvBuiltInNumEnqueuedSubgroups = 39,
|
||||
SpvBuiltInSubgroupId = 40,
|
||||
SpvBuiltInSubgroupLocalInvocationId = 41,
|
||||
SpvBuiltInVertexIndex = 42,
|
||||
SpvBuiltInInstanceIndex = 43,
|
||||
} SpvBuiltIn;
|
||||
|
||||
typedef enum SpvSelectionControlShift_ {
|
||||
SpvSelectionControlFlattenShift = 0,
|
||||
SpvSelectionControlDontFlattenShift = 1,
|
||||
} SpvSelectionControlShift;
|
||||
|
||||
typedef enum SpvSelectionControlMask_ {
|
||||
SpvSelectionControlMaskNone = 0,
|
||||
SpvSelectionControlFlattenMask = 0x00000001,
|
||||
SpvSelectionControlDontFlattenMask = 0x00000002,
|
||||
} SpvSelectionControlMask;
|
||||
|
||||
typedef enum SpvLoopControlShift_ {
|
||||
SpvLoopControlUnrollShift = 0,
|
||||
SpvLoopControlDontUnrollShift = 1,
|
||||
} SpvLoopControlShift;
|
||||
|
||||
typedef enum SpvLoopControlMask_ {
|
||||
SpvLoopControlMaskNone = 0,
|
||||
SpvLoopControlUnrollMask = 0x00000001,
|
||||
SpvLoopControlDontUnrollMask = 0x00000002,
|
||||
} SpvLoopControlMask;
|
||||
|
||||
typedef enum SpvFunctionControlShift_ {
|
||||
SpvFunctionControlInlineShift = 0,
|
||||
SpvFunctionControlDontInlineShift = 1,
|
||||
SpvFunctionControlPureShift = 2,
|
||||
SpvFunctionControlConstShift = 3,
|
||||
} SpvFunctionControlShift;
|
||||
|
||||
typedef enum SpvFunctionControlMask_ {
|
||||
SpvFunctionControlMaskNone = 0,
|
||||
SpvFunctionControlInlineMask = 0x00000001,
|
||||
SpvFunctionControlDontInlineMask = 0x00000002,
|
||||
SpvFunctionControlPureMask = 0x00000004,
|
||||
SpvFunctionControlConstMask = 0x00000008,
|
||||
} SpvFunctionControlMask;
|
||||
|
||||
typedef enum SpvMemorySemanticsShift_ {
|
||||
SpvMemorySemanticsAcquireShift = 1,
|
||||
SpvMemorySemanticsReleaseShift = 2,
|
||||
SpvMemorySemanticsAcquireReleaseShift = 3,
|
||||
SpvMemorySemanticsSequentiallyConsistentShift = 4,
|
||||
SpvMemorySemanticsUniformMemoryShift = 6,
|
||||
SpvMemorySemanticsSubgroupMemoryShift = 7,
|
||||
SpvMemorySemanticsWorkgroupMemoryShift = 8,
|
||||
SpvMemorySemanticsCrossWorkgroupMemoryShift = 9,
|
||||
SpvMemorySemanticsAtomicCounterMemoryShift = 10,
|
||||
SpvMemorySemanticsImageMemoryShift = 11,
|
||||
} SpvMemorySemanticsShift;
|
||||
|
||||
typedef enum SpvMemorySemanticsMask_ {
|
||||
SpvMemorySemanticsMaskNone = 0,
|
||||
SpvMemorySemanticsAcquireMask = 0x00000002,
|
||||
SpvMemorySemanticsReleaseMask = 0x00000004,
|
||||
SpvMemorySemanticsAcquireReleaseMask = 0x00000008,
|
||||
SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010,
|
||||
SpvMemorySemanticsUniformMemoryMask = 0x00000040,
|
||||
SpvMemorySemanticsSubgroupMemoryMask = 0x00000080,
|
||||
SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100,
|
||||
SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
|
||||
SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400,
|
||||
SpvMemorySemanticsImageMemoryMask = 0x00000800,
|
||||
} SpvMemorySemanticsMask;
|
||||
|
||||
typedef enum SpvMemoryAccessShift_ {
|
||||
SpvMemoryAccessVolatileShift = 0,
|
||||
SpvMemoryAccessAlignedShift = 1,
|
||||
SpvMemoryAccessNontemporalShift = 2,
|
||||
} SpvMemoryAccessShift;
|
||||
|
||||
typedef enum SpvMemoryAccessMask_ {
|
||||
SpvMemoryAccessMaskNone = 0,
|
||||
SpvMemoryAccessVolatileMask = 0x00000001,
|
||||
SpvMemoryAccessAlignedMask = 0x00000002,
|
||||
SpvMemoryAccessNontemporalMask = 0x00000004,
|
||||
} SpvMemoryAccessMask;
|
||||
|
||||
typedef enum SpvScope_ {
|
||||
SpvScopeCrossDevice = 0,
|
||||
SpvScopeDevice = 1,
|
||||
SpvScopeWorkgroup = 2,
|
||||
SpvScopeSubgroup = 3,
|
||||
SpvScopeInvocation = 4,
|
||||
} SpvScope;
|
||||
|
||||
typedef enum SpvGroupOperation_ {
|
||||
SpvGroupOperationReduce = 0,
|
||||
SpvGroupOperationInclusiveScan = 1,
|
||||
SpvGroupOperationExclusiveScan = 2,
|
||||
} SpvGroupOperation;
|
||||
|
||||
typedef enum SpvKernelEnqueueFlags_ {
|
||||
SpvKernelEnqueueFlagsNoWait = 0,
|
||||
SpvKernelEnqueueFlagsWaitKernel = 1,
|
||||
SpvKernelEnqueueFlagsWaitWorkGroup = 2,
|
||||
} SpvKernelEnqueueFlags;
|
||||
|
||||
typedef enum SpvKernelProfilingInfoShift_ {
|
||||
SpvKernelProfilingInfoCmdExecTimeShift = 0,
|
||||
} SpvKernelProfilingInfoShift;
|
||||
|
||||
typedef enum SpvKernelProfilingInfoMask_ {
|
||||
SpvKernelProfilingInfoMaskNone = 0,
|
||||
SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001,
|
||||
} SpvKernelProfilingInfoMask;
|
||||
|
||||
typedef enum SpvCapability_ {
|
||||
SpvCapabilityMatrix = 0,
|
||||
SpvCapabilityShader = 1,
|
||||
SpvCapabilityGeometry = 2,
|
||||
SpvCapabilityTessellation = 3,
|
||||
SpvCapabilityAddresses = 4,
|
||||
SpvCapabilityLinkage = 5,
|
||||
SpvCapabilityKernel = 6,
|
||||
SpvCapabilityVector16 = 7,
|
||||
SpvCapabilityFloat16Buffer = 8,
|
||||
SpvCapabilityFloat16 = 9,
|
||||
SpvCapabilityFloat64 = 10,
|
||||
SpvCapabilityInt64 = 11,
|
||||
SpvCapabilityInt64Atomics = 12,
|
||||
SpvCapabilityImageBasic = 13,
|
||||
SpvCapabilityImageReadWrite = 14,
|
||||
SpvCapabilityImageMipmap = 15,
|
||||
SpvCapabilityPipes = 17,
|
||||
SpvCapabilityGroups = 18,
|
||||
SpvCapabilityDeviceEnqueue = 19,
|
||||
SpvCapabilityLiteralSampler = 20,
|
||||
SpvCapabilityAtomicStorage = 21,
|
||||
SpvCapabilityInt16 = 22,
|
||||
SpvCapabilityTessellationPointSize = 23,
|
||||
SpvCapabilityGeometryPointSize = 24,
|
||||
SpvCapabilityImageGatherExtended = 25,
|
||||
SpvCapabilityStorageImageMultisample = 27,
|
||||
SpvCapabilityUniformBufferArrayDynamicIndexing = 28,
|
||||
SpvCapabilitySampledImageArrayDynamicIndexing = 29,
|
||||
SpvCapabilityStorageBufferArrayDynamicIndexing = 30,
|
||||
SpvCapabilityStorageImageArrayDynamicIndexing = 31,
|
||||
SpvCapabilityClipDistance = 32,
|
||||
SpvCapabilityCullDistance = 33,
|
||||
SpvCapabilityImageCubeArray = 34,
|
||||
SpvCapabilitySampleRateShading = 35,
|
||||
SpvCapabilityImageRect = 36,
|
||||
SpvCapabilitySampledRect = 37,
|
||||
SpvCapabilityGenericPointer = 38,
|
||||
SpvCapabilityInt8 = 39,
|
||||
SpvCapabilityInputAttachment = 40,
|
||||
SpvCapabilitySparseResidency = 41,
|
||||
SpvCapabilityMinLod = 42,
|
||||
SpvCapabilitySampled1D = 43,
|
||||
SpvCapabilityImage1D = 44,
|
||||
SpvCapabilitySampledCubeArray = 45,
|
||||
SpvCapabilitySampledBuffer = 46,
|
||||
SpvCapabilityImageBuffer = 47,
|
||||
SpvCapabilityImageMSArray = 48,
|
||||
SpvCapabilityStorageImageExtendedFormats = 49,
|
||||
SpvCapabilityImageQuery = 50,
|
||||
SpvCapabilityDerivativeControl = 51,
|
||||
SpvCapabilityInterpolationFunction = 52,
|
||||
SpvCapabilityTransformFeedback = 53,
|
||||
SpvCapabilityGeometryStreams = 54,
|
||||
SpvCapabilityStorageImageReadWithoutFormat = 55,
|
||||
SpvCapabilityStorageImageWriteWithoutFormat = 56,
|
||||
SpvCapabilityMultiViewport = 57,
|
||||
} SpvCapability;
|
||||
|
||||
typedef enum SpvOp_ {
|
||||
SpvOpNop = 0,
|
||||
SpvOpUndef = 1,
|
||||
SpvOpSourceContinued = 2,
|
||||
SpvOpSource = 3,
|
||||
SpvOpSourceExtension = 4,
|
||||
SpvOpName = 5,
|
||||
SpvOpMemberName = 6,
|
||||
SpvOpString = 7,
|
||||
SpvOpLine = 8,
|
||||
SpvOpExtension = 10,
|
||||
SpvOpExtInstImport = 11,
|
||||
SpvOpExtInst = 12,
|
||||
SpvOpMemoryModel = 14,
|
||||
SpvOpEntryPoint = 15,
|
||||
SpvOpExecutionMode = 16,
|
||||
SpvOpCapability = 17,
|
||||
SpvOpTypeVoid = 19,
|
||||
SpvOpTypeBool = 20,
|
||||
SpvOpTypeInt = 21,
|
||||
SpvOpTypeFloat = 22,
|
||||
SpvOpTypeVector = 23,
|
||||
SpvOpTypeMatrix = 24,
|
||||
SpvOpTypeImage = 25,
|
||||
SpvOpTypeSampler = 26,
|
||||
SpvOpTypeSampledImage = 27,
|
||||
SpvOpTypeArray = 28,
|
||||
SpvOpTypeRuntimeArray = 29,
|
||||
SpvOpTypeStruct = 30,
|
||||
SpvOpTypeOpaque = 31,
|
||||
SpvOpTypePointer = 32,
|
||||
SpvOpTypeFunction = 33,
|
||||
SpvOpTypeEvent = 34,
|
||||
SpvOpTypeDeviceEvent = 35,
|
||||
SpvOpTypeReserveId = 36,
|
||||
SpvOpTypeQueue = 37,
|
||||
SpvOpTypePipe = 38,
|
||||
SpvOpTypeForwardPointer = 39,
|
||||
SpvOpConstantTrue = 41,
|
||||
SpvOpConstantFalse = 42,
|
||||
SpvOpConstant = 43,
|
||||
SpvOpConstantComposite = 44,
|
||||
SpvOpConstantSampler = 45,
|
||||
SpvOpConstantNull = 46,
|
||||
SpvOpSpecConstantTrue = 48,
|
||||
SpvOpSpecConstantFalse = 49,
|
||||
SpvOpSpecConstant = 50,
|
||||
SpvOpSpecConstantComposite = 51,
|
||||
SpvOpSpecConstantOp = 52,
|
||||
SpvOpFunction = 54,
|
||||
SpvOpFunctionParameter = 55,
|
||||
SpvOpFunctionEnd = 56,
|
||||
SpvOpFunctionCall = 57,
|
||||
SpvOpVariable = 59,
|
||||
SpvOpImageTexelPointer = 60,
|
||||
SpvOpLoad = 61,
|
||||
SpvOpStore = 62,
|
||||
SpvOpCopyMemory = 63,
|
||||
SpvOpCopyMemorySized = 64,
|
||||
SpvOpAccessChain = 65,
|
||||
SpvOpInBoundsAccessChain = 66,
|
||||
SpvOpPtrAccessChain = 67,
|
||||
SpvOpArrayLength = 68,
|
||||
SpvOpGenericPtrMemSemantics = 69,
|
||||
SpvOpInBoundsPtrAccessChain = 70,
|
||||
SpvOpDecorate = 71,
|
||||
SpvOpMemberDecorate = 72,
|
||||
SpvOpDecorationGroup = 73,
|
||||
SpvOpGroupDecorate = 74,
|
||||
SpvOpGroupMemberDecorate = 75,
|
||||
SpvOpVectorExtractDynamic = 77,
|
||||
SpvOpVectorInsertDynamic = 78,
|
||||
SpvOpVectorShuffle = 79,
|
||||
SpvOpCompositeConstruct = 80,
|
||||
SpvOpCompositeExtract = 81,
|
||||
SpvOpCompositeInsert = 82,
|
||||
SpvOpCopyObject = 83,
|
||||
SpvOpTranspose = 84,
|
||||
SpvOpSampledImage = 86,
|
||||
SpvOpImageSampleImplicitLod = 87,
|
||||
SpvOpImageSampleExplicitLod = 88,
|
||||
SpvOpImageSampleDrefImplicitLod = 89,
|
||||
SpvOpImageSampleDrefExplicitLod = 90,
|
||||
SpvOpImageSampleProjImplicitLod = 91,
|
||||
SpvOpImageSampleProjExplicitLod = 92,
|
||||
SpvOpImageSampleProjDrefImplicitLod = 93,
|
||||
SpvOpImageSampleProjDrefExplicitLod = 94,
|
||||
SpvOpImageFetch = 95,
|
||||
SpvOpImageGather = 96,
|
||||
SpvOpImageDrefGather = 97,
|
||||
SpvOpImageRead = 98,
|
||||
SpvOpImageWrite = 99,
|
||||
SpvOpImage = 100,
|
||||
SpvOpImageQueryFormat = 101,
|
||||
SpvOpImageQueryOrder = 102,
|
||||
SpvOpImageQuerySizeLod = 103,
|
||||
SpvOpImageQuerySize = 104,
|
||||
SpvOpImageQueryLod = 105,
|
||||
SpvOpImageQueryLevels = 106,
|
||||
SpvOpImageQuerySamples = 107,
|
||||
SpvOpConvertFToU = 109,
|
||||
SpvOpConvertFToS = 110,
|
||||
SpvOpConvertSToF = 111,
|
||||
SpvOpConvertUToF = 112,
|
||||
SpvOpUConvert = 113,
|
||||
SpvOpSConvert = 114,
|
||||
SpvOpFConvert = 115,
|
||||
SpvOpQuantizeToF16 = 116,
|
||||
SpvOpConvertPtrToU = 117,
|
||||
SpvOpSatConvertSToU = 118,
|
||||
SpvOpSatConvertUToS = 119,
|
||||
SpvOpConvertUToPtr = 120,
|
||||
SpvOpPtrCastToGeneric = 121,
|
||||
SpvOpGenericCastToPtr = 122,
|
||||
SpvOpGenericCastToPtrExplicit = 123,
|
||||
SpvOpBitcast = 124,
|
||||
SpvOpSNegate = 126,
|
||||
SpvOpFNegate = 127,
|
||||
SpvOpIAdd = 128,
|
||||
SpvOpFAdd = 129,
|
||||
SpvOpISub = 130,
|
||||
SpvOpFSub = 131,
|
||||
SpvOpIMul = 132,
|
||||
SpvOpFMul = 133,
|
||||
SpvOpUDiv = 134,
|
||||
SpvOpSDiv = 135,
|
||||
SpvOpFDiv = 136,
|
||||
SpvOpUMod = 137,
|
||||
SpvOpSRem = 138,
|
||||
SpvOpSMod = 139,
|
||||
SpvOpFRem = 140,
|
||||
SpvOpFMod = 141,
|
||||
SpvOpVectorTimesScalar = 142,
|
||||
SpvOpMatrixTimesScalar = 143,
|
||||
SpvOpVectorTimesMatrix = 144,
|
||||
SpvOpMatrixTimesVector = 145,
|
||||
SpvOpMatrixTimesMatrix = 146,
|
||||
SpvOpOuterProduct = 147,
|
||||
SpvOpDot = 148,
|
||||
SpvOpIAddCarry = 149,
|
||||
SpvOpISubBorrow = 150,
|
||||
SpvOpUMulExtended = 151,
|
||||
SpvOpSMulExtended = 152,
|
||||
SpvOpAny = 154,
|
||||
SpvOpAll = 155,
|
||||
SpvOpIsNan = 156,
|
||||
SpvOpIsInf = 157,
|
||||
SpvOpIsFinite = 158,
|
||||
SpvOpIsNormal = 159,
|
||||
SpvOpSignBitSet = 160,
|
||||
SpvOpLessOrGreater = 161,
|
||||
SpvOpOrdered = 162,
|
||||
SpvOpUnordered = 163,
|
||||
SpvOpLogicalEqual = 164,
|
||||
SpvOpLogicalNotEqual = 165,
|
||||
SpvOpLogicalOr = 166,
|
||||
SpvOpLogicalAnd = 167,
|
||||
SpvOpLogicalNot = 168,
|
||||
SpvOpSelect = 169,
|
||||
SpvOpIEqual = 170,
|
||||
SpvOpINotEqual = 171,
|
||||
SpvOpUGreaterThan = 172,
|
||||
SpvOpSGreaterThan = 173,
|
||||
SpvOpUGreaterThanEqual = 174,
|
||||
SpvOpSGreaterThanEqual = 175,
|
||||
SpvOpULessThan = 176,
|
||||
SpvOpSLessThan = 177,
|
||||
SpvOpULessThanEqual = 178,
|
||||
SpvOpSLessThanEqual = 179,
|
||||
SpvOpFOrdEqual = 180,
|
||||
SpvOpFUnordEqual = 181,
|
||||
SpvOpFOrdNotEqual = 182,
|
||||
SpvOpFUnordNotEqual = 183,
|
||||
SpvOpFOrdLessThan = 184,
|
||||
SpvOpFUnordLessThan = 185,
|
||||
SpvOpFOrdGreaterThan = 186,
|
||||
SpvOpFUnordGreaterThan = 187,
|
||||
SpvOpFOrdLessThanEqual = 188,
|
||||
SpvOpFUnordLessThanEqual = 189,
|
||||
SpvOpFOrdGreaterThanEqual = 190,
|
||||
SpvOpFUnordGreaterThanEqual = 191,
|
||||
SpvOpShiftRightLogical = 194,
|
||||
SpvOpShiftRightArithmetic = 195,
|
||||
SpvOpShiftLeftLogical = 196,
|
||||
SpvOpBitwiseOr = 197,
|
||||
SpvOpBitwiseXor = 198,
|
||||
SpvOpBitwiseAnd = 199,
|
||||
SpvOpNot = 200,
|
||||
SpvOpBitFieldInsert = 201,
|
||||
SpvOpBitFieldSExtract = 202,
|
||||
SpvOpBitFieldUExtract = 203,
|
||||
SpvOpBitReverse = 204,
|
||||
SpvOpBitCount = 205,
|
||||
SpvOpDPdx = 207,
|
||||
SpvOpDPdy = 208,
|
||||
SpvOpFwidth = 209,
|
||||
SpvOpDPdxFine = 210,
|
||||
SpvOpDPdyFine = 211,
|
||||
SpvOpFwidthFine = 212,
|
||||
SpvOpDPdxCoarse = 213,
|
||||
SpvOpDPdyCoarse = 214,
|
||||
SpvOpFwidthCoarse = 215,
|
||||
SpvOpEmitVertex = 218,
|
||||
SpvOpEndPrimitive = 219,
|
||||
SpvOpEmitStreamVertex = 220,
|
||||
SpvOpEndStreamPrimitive = 221,
|
||||
SpvOpControlBarrier = 224,
|
||||
SpvOpMemoryBarrier = 225,
|
||||
SpvOpAtomicLoad = 227,
|
||||
SpvOpAtomicStore = 228,
|
||||
SpvOpAtomicExchange = 229,
|
||||
SpvOpAtomicCompareExchange = 230,
|
||||
SpvOpAtomicCompareExchangeWeak = 231,
|
||||
SpvOpAtomicIIncrement = 232,
|
||||
SpvOpAtomicIDecrement = 233,
|
||||
SpvOpAtomicIAdd = 234,
|
||||
SpvOpAtomicISub = 235,
|
||||
SpvOpAtomicSMin = 236,
|
||||
SpvOpAtomicUMin = 237,
|
||||
SpvOpAtomicSMax = 238,
|
||||
SpvOpAtomicUMax = 239,
|
||||
SpvOpAtomicAnd = 240,
|
||||
SpvOpAtomicOr = 241,
|
||||
SpvOpAtomicXor = 242,
|
||||
SpvOpPhi = 245,
|
||||
SpvOpLoopMerge = 246,
|
||||
SpvOpSelectionMerge = 247,
|
||||
SpvOpLabel = 248,
|
||||
SpvOpBranch = 249,
|
||||
SpvOpBranchConditional = 250,
|
||||
SpvOpSwitch = 251,
|
||||
SpvOpKill = 252,
|
||||
SpvOpReturn = 253,
|
||||
SpvOpReturnValue = 254,
|
||||
SpvOpUnreachable = 255,
|
||||
SpvOpLifetimeStart = 256,
|
||||
SpvOpLifetimeStop = 257,
|
||||
SpvOpGroupAsyncCopy = 259,
|
||||
SpvOpGroupWaitEvents = 260,
|
||||
SpvOpGroupAll = 261,
|
||||
SpvOpGroupAny = 262,
|
||||
SpvOpGroupBroadcast = 263,
|
||||
SpvOpGroupIAdd = 264,
|
||||
SpvOpGroupFAdd = 265,
|
||||
SpvOpGroupFMin = 266,
|
||||
SpvOpGroupUMin = 267,
|
||||
SpvOpGroupSMin = 268,
|
||||
SpvOpGroupFMax = 269,
|
||||
SpvOpGroupUMax = 270,
|
||||
SpvOpGroupSMax = 271,
|
||||
SpvOpReadPipe = 274,
|
||||
SpvOpWritePipe = 275,
|
||||
SpvOpReservedReadPipe = 276,
|
||||
SpvOpReservedWritePipe = 277,
|
||||
SpvOpReserveReadPipePackets = 278,
|
||||
SpvOpReserveWritePipePackets = 279,
|
||||
SpvOpCommitReadPipe = 280,
|
||||
SpvOpCommitWritePipe = 281,
|
||||
SpvOpIsValidReserveId = 282,
|
||||
SpvOpGetNumPipePackets = 283,
|
||||
SpvOpGetMaxPipePackets = 284,
|
||||
SpvOpGroupReserveReadPipePackets = 285,
|
||||
SpvOpGroupReserveWritePipePackets = 286,
|
||||
SpvOpGroupCommitReadPipe = 287,
|
||||
SpvOpGroupCommitWritePipe = 288,
|
||||
SpvOpEnqueueMarker = 291,
|
||||
SpvOpEnqueueKernel = 292,
|
||||
SpvOpGetKernelNDrangeSubGroupCount = 293,
|
||||
SpvOpGetKernelNDrangeMaxSubGroupSize = 294,
|
||||
SpvOpGetKernelWorkGroupSize = 295,
|
||||
SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296,
|
||||
SpvOpRetainEvent = 297,
|
||||
SpvOpReleaseEvent = 298,
|
||||
SpvOpCreateUserEvent = 299,
|
||||
SpvOpIsValidEvent = 300,
|
||||
SpvOpSetUserEventStatus = 301,
|
||||
SpvOpCaptureEventProfilingInfo = 302,
|
||||
SpvOpGetDefaultQueue = 303,
|
||||
SpvOpBuildNDRange = 304,
|
||||
SpvOpImageSparseSampleImplicitLod = 305,
|
||||
SpvOpImageSparseSampleExplicitLod = 306,
|
||||
SpvOpImageSparseSampleDrefImplicitLod = 307,
|
||||
SpvOpImageSparseSampleDrefExplicitLod = 308,
|
||||
SpvOpImageSparseSampleProjImplicitLod = 309,
|
||||
SpvOpImageSparseSampleProjExplicitLod = 310,
|
||||
SpvOpImageSparseSampleProjDrefImplicitLod = 311,
|
||||
SpvOpImageSparseSampleProjDrefExplicitLod = 312,
|
||||
SpvOpImageSparseFetch = 313,
|
||||
SpvOpImageSparseGather = 314,
|
||||
SpvOpImageSparseDrefGather = 315,
|
||||
SpvOpImageSparseTexelsResident = 316,
|
||||
SpvOpNoLine = 317,
|
||||
SpvOpAtomicFlagTestAndSet = 318,
|
||||
SpvOpAtomicFlagClear = 319,
|
||||
} SpvOp;
|
||||
|
||||
#endif // #ifndef spirv_H
|
||||
|
||||
2710
src/compiler/spirv/spirv_to_nir.c
Normal file
2710
src/compiler/spirv/spirv_to_nir.c
Normal file
File diff suppressed because it is too large
Load diff
464
src/compiler/spirv/vtn_alu.c
Normal file
464
src/compiler/spirv/vtn_alu.c
Normal file
|
|
@ -0,0 +1,464 @@
|
|||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "vtn_private.h"
|
||||
|
||||
/*
|
||||
* Normally, column vectors in SPIR-V correspond to a single NIR SSA
|
||||
* definition. But for matrix multiplies, we want to do one routine for
|
||||
* multiplying a matrix by a matrix and then pretend that vectors are matrices
|
||||
* with one column. So we "wrap" these things, and unwrap the result before we
|
||||
* send it off.
|
||||
*/
|
||||
|
||||
static struct vtn_ssa_value *
|
||||
wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val)
|
||||
{
|
||||
if (val == NULL)
|
||||
return NULL;
|
||||
|
||||
if (glsl_type_is_matrix(val->type))
|
||||
return val;
|
||||
|
||||
struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value);
|
||||
dest->type = val->type;
|
||||
dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1);
|
||||
dest->elems[0] = val;
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
static struct vtn_ssa_value *
|
||||
unwrap_matrix(struct vtn_ssa_value *val)
|
||||
{
|
||||
if (glsl_type_is_matrix(val->type))
|
||||
return val;
|
||||
|
||||
return val->elems[0];
|
||||
}
|
||||
|
||||
static struct vtn_ssa_value *
|
||||
matrix_multiply(struct vtn_builder *b,
|
||||
struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1)
|
||||
{
|
||||
|
||||
struct vtn_ssa_value *src0 = wrap_matrix(b, _src0);
|
||||
struct vtn_ssa_value *src1 = wrap_matrix(b, _src1);
|
||||
struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed);
|
||||
struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed);
|
||||
|
||||
unsigned src0_rows = glsl_get_vector_elements(src0->type);
|
||||
unsigned src0_columns = glsl_get_matrix_columns(src0->type);
|
||||
unsigned src1_columns = glsl_get_matrix_columns(src1->type);
|
||||
|
||||
const struct glsl_type *dest_type;
|
||||
if (src1_columns > 1) {
|
||||
dest_type = glsl_matrix_type(glsl_get_base_type(src0->type),
|
||||
src0_rows, src1_columns);
|
||||
} else {
|
||||
dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows);
|
||||
}
|
||||
struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type);
|
||||
|
||||
dest = wrap_matrix(b, dest);
|
||||
|
||||
bool transpose_result = false;
|
||||
if (src0_transpose && src1_transpose) {
|
||||
/* transpose(A) * transpose(B) = transpose(B * A) */
|
||||
src1 = src0_transpose;
|
||||
src0 = src1_transpose;
|
||||
src0_transpose = NULL;
|
||||
src1_transpose = NULL;
|
||||
transpose_result = true;
|
||||
}
|
||||
|
||||
if (src0_transpose && !src1_transpose &&
|
||||
glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
|
||||
/* We already have the rows of src0 and the columns of src1 available,
|
||||
* so we can just take the dot product of each row with each column to
|
||||
* get the result.
|
||||
*/
|
||||
|
||||
for (unsigned i = 0; i < src1_columns; i++) {
|
||||
nir_ssa_def *vec_src[4];
|
||||
for (unsigned j = 0; j < src0_rows; j++) {
|
||||
vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def,
|
||||
src1->elems[i]->def);
|
||||
}
|
||||
dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows);
|
||||
}
|
||||
} else {
|
||||
/* We don't handle the case where src1 is transposed but not src0, since
|
||||
* the general case only uses individual components of src1 so the
|
||||
* optimizer should chew through the transpose we emitted for src1.
|
||||
*/
|
||||
|
||||
for (unsigned i = 0; i < src1_columns; i++) {
|
||||
/* dest[i] = sum(src0[j] * src1[i][j] for all j) */
|
||||
dest->elems[i]->def =
|
||||
nir_fmul(&b->nb, src0->elems[0]->def,
|
||||
nir_channel(&b->nb, src1->elems[i]->def, 0));
|
||||
for (unsigned j = 1; j < src0_columns; j++) {
|
||||
dest->elems[i]->def =
|
||||
nir_fadd(&b->nb, dest->elems[i]->def,
|
||||
nir_fmul(&b->nb, src0->elems[j]->def,
|
||||
nir_channel(&b->nb, src1->elems[i]->def, j)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dest = unwrap_matrix(dest);
|
||||
|
||||
if (transpose_result)
|
||||
dest = vtn_ssa_transpose(b, dest);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
static struct vtn_ssa_value *
|
||||
mat_times_scalar(struct vtn_builder *b,
|
||||
struct vtn_ssa_value *mat,
|
||||
nir_ssa_def *scalar)
|
||||
{
|
||||
struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type);
|
||||
for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) {
|
||||
if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT)
|
||||
dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar);
|
||||
else
|
||||
dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar);
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
static void
|
||||
vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode,
|
||||
struct vtn_value *dest,
|
||||
struct vtn_ssa_value *src0, struct vtn_ssa_value *src1)
|
||||
{
|
||||
switch (opcode) {
|
||||
case SpvOpFNegate: {
|
||||
dest->ssa = vtn_create_ssa_value(b, src0->type);
|
||||
unsigned cols = glsl_get_matrix_columns(src0->type);
|
||||
for (unsigned i = 0; i < cols; i++)
|
||||
dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def);
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpFAdd: {
|
||||
dest->ssa = vtn_create_ssa_value(b, src0->type);
|
||||
unsigned cols = glsl_get_matrix_columns(src0->type);
|
||||
for (unsigned i = 0; i < cols; i++)
|
||||
dest->ssa->elems[i]->def =
|
||||
nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpFSub: {
|
||||
dest->ssa = vtn_create_ssa_value(b, src0->type);
|
||||
unsigned cols = glsl_get_matrix_columns(src0->type);
|
||||
for (unsigned i = 0; i < cols; i++)
|
||||
dest->ssa->elems[i]->def =
|
||||
nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpTranspose:
|
||||
dest->ssa = vtn_ssa_transpose(b, src0);
|
||||
break;
|
||||
|
||||
case SpvOpMatrixTimesScalar:
|
||||
if (src0->transposed) {
|
||||
dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed,
|
||||
src1->def));
|
||||
} else {
|
||||
dest->ssa = mat_times_scalar(b, src0, src1->def);
|
||||
}
|
||||
break;
|
||||
|
||||
case SpvOpVectorTimesMatrix:
|
||||
case SpvOpMatrixTimesVector:
|
||||
case SpvOpMatrixTimesMatrix:
|
||||
if (opcode == SpvOpVectorTimesMatrix) {
|
||||
dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0);
|
||||
} else {
|
||||
dest->ssa = matrix_multiply(b, src0, src1);
|
||||
}
|
||||
break;
|
||||
|
||||
default: unreachable("unknown matrix opcode");
|
||||
}
|
||||
}
|
||||
|
||||
nir_op
|
||||
vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap)
|
||||
{
|
||||
/* Indicates that the first two arguments should be swapped. This is
|
||||
* used for implementing greater-than and less-than-or-equal.
|
||||
*/
|
||||
*swap = false;
|
||||
|
||||
switch (opcode) {
|
||||
case SpvOpSNegate: return nir_op_ineg;
|
||||
case SpvOpFNegate: return nir_op_fneg;
|
||||
case SpvOpNot: return nir_op_inot;
|
||||
case SpvOpIAdd: return nir_op_iadd;
|
||||
case SpvOpFAdd: return nir_op_fadd;
|
||||
case SpvOpISub: return nir_op_isub;
|
||||
case SpvOpFSub: return nir_op_fsub;
|
||||
case SpvOpIMul: return nir_op_imul;
|
||||
case SpvOpFMul: return nir_op_fmul;
|
||||
case SpvOpUDiv: return nir_op_udiv;
|
||||
case SpvOpSDiv: return nir_op_idiv;
|
||||
case SpvOpFDiv: return nir_op_fdiv;
|
||||
case SpvOpUMod: return nir_op_umod;
|
||||
case SpvOpSMod: return nir_op_imod;
|
||||
case SpvOpFMod: return nir_op_fmod;
|
||||
case SpvOpSRem: return nir_op_irem;
|
||||
case SpvOpFRem: return nir_op_frem;
|
||||
|
||||
case SpvOpShiftRightLogical: return nir_op_ushr;
|
||||
case SpvOpShiftRightArithmetic: return nir_op_ishr;
|
||||
case SpvOpShiftLeftLogical: return nir_op_ishl;
|
||||
case SpvOpLogicalOr: return nir_op_ior;
|
||||
case SpvOpLogicalEqual: return nir_op_ieq;
|
||||
case SpvOpLogicalNotEqual: return nir_op_ine;
|
||||
case SpvOpLogicalAnd: return nir_op_iand;
|
||||
case SpvOpLogicalNot: return nir_op_inot;
|
||||
case SpvOpBitwiseOr: return nir_op_ior;
|
||||
case SpvOpBitwiseXor: return nir_op_ixor;
|
||||
case SpvOpBitwiseAnd: return nir_op_iand;
|
||||
case SpvOpSelect: return nir_op_bcsel;
|
||||
case SpvOpIEqual: return nir_op_ieq;
|
||||
|
||||
case SpvOpBitFieldInsert: return nir_op_bitfield_insert;
|
||||
case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract;
|
||||
case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract;
|
||||
case SpvOpBitReverse: return nir_op_bitfield_reverse;
|
||||
case SpvOpBitCount: return nir_op_bit_count;
|
||||
|
||||
/* Comparisons: (TODO: How do we want to handled ordered/unordered?) */
|
||||
case SpvOpFOrdEqual: return nir_op_feq;
|
||||
case SpvOpFUnordEqual: return nir_op_feq;
|
||||
case SpvOpINotEqual: return nir_op_ine;
|
||||
case SpvOpFOrdNotEqual: return nir_op_fne;
|
||||
case SpvOpFUnordNotEqual: return nir_op_fne;
|
||||
case SpvOpULessThan: return nir_op_ult;
|
||||
case SpvOpSLessThan: return nir_op_ilt;
|
||||
case SpvOpFOrdLessThan: return nir_op_flt;
|
||||
case SpvOpFUnordLessThan: return nir_op_flt;
|
||||
case SpvOpUGreaterThan: *swap = true; return nir_op_ult;
|
||||
case SpvOpSGreaterThan: *swap = true; return nir_op_ilt;
|
||||
case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt;
|
||||
case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt;
|
||||
case SpvOpULessThanEqual: *swap = true; return nir_op_uge;
|
||||
case SpvOpSLessThanEqual: *swap = true; return nir_op_ige;
|
||||
case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge;
|
||||
case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge;
|
||||
case SpvOpUGreaterThanEqual: return nir_op_uge;
|
||||
case SpvOpSGreaterThanEqual: return nir_op_ige;
|
||||
case SpvOpFOrdGreaterThanEqual: return nir_op_fge;
|
||||
case SpvOpFUnordGreaterThanEqual: return nir_op_fge;
|
||||
|
||||
/* Conversions: */
|
||||
case SpvOpConvertFToU: return nir_op_f2u;
|
||||
case SpvOpConvertFToS: return nir_op_f2i;
|
||||
case SpvOpConvertSToF: return nir_op_i2f;
|
||||
case SpvOpConvertUToF: return nir_op_u2f;
|
||||
case SpvOpBitcast: return nir_op_imov;
|
||||
case SpvOpUConvert:
|
||||
case SpvOpQuantizeToF16: return nir_op_fquantize2f16;
|
||||
/* TODO: NIR is 32-bit only; these are no-ops. */
|
||||
case SpvOpSConvert: return nir_op_imov;
|
||||
case SpvOpFConvert: return nir_op_fmov;
|
||||
|
||||
/* Derivatives: */
|
||||
case SpvOpDPdx: return nir_op_fddx;
|
||||
case SpvOpDPdy: return nir_op_fddy;
|
||||
case SpvOpDPdxFine: return nir_op_fddx_fine;
|
||||
case SpvOpDPdyFine: return nir_op_fddy_fine;
|
||||
case SpvOpDPdxCoarse: return nir_op_fddx_coarse;
|
||||
case SpvOpDPdyCoarse: return nir_op_fddy_coarse;
|
||||
|
||||
default:
|
||||
unreachable("No NIR equivalent");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
handle_no_contraction(struct vtn_builder *b, struct vtn_value *val, int member,
|
||||
const struct vtn_decoration *dec, void *_void)
|
||||
{
|
||||
assert(dec->scope == VTN_DEC_DECORATION);
|
||||
if (dec->decoration != SpvDecorationNoContraction)
|
||||
return;
|
||||
|
||||
b->nb.exact = true;
|
||||
}
|
||||
|
||||
void
|
||||
vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
|
||||
const struct glsl_type *type =
|
||||
vtn_value(b, w[1], vtn_value_type_type)->type->type;
|
||||
|
||||
vtn_foreach_decoration(b, val, handle_no_contraction, NULL);
|
||||
|
||||
/* Collect the various SSA sources */
|
||||
const unsigned num_inputs = count - 3;
|
||||
struct vtn_ssa_value *vtn_src[4] = { NULL, };
|
||||
for (unsigned i = 0; i < num_inputs; i++)
|
||||
vtn_src[i] = vtn_ssa_value(b, w[i + 3]);
|
||||
|
||||
if (glsl_type_is_matrix(vtn_src[0]->type) ||
|
||||
(num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) {
|
||||
vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]);
|
||||
b->nb.exact = false;
|
||||
return;
|
||||
}
|
||||
|
||||
val->ssa = vtn_create_ssa_value(b, type);
|
||||
nir_ssa_def *src[4] = { NULL, };
|
||||
for (unsigned i = 0; i < num_inputs; i++) {
|
||||
assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type));
|
||||
src[i] = vtn_src[i]->def;
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
case SpvOpAny:
|
||||
if (src[0]->num_components == 1) {
|
||||
val->ssa->def = nir_imov(&b->nb, src[0]);
|
||||
} else {
|
||||
nir_op op;
|
||||
switch (src[0]->num_components) {
|
||||
case 2: op = nir_op_bany_inequal2; break;
|
||||
case 3: op = nir_op_bany_inequal3; break;
|
||||
case 4: op = nir_op_bany_inequal4; break;
|
||||
}
|
||||
val->ssa->def = nir_build_alu(&b->nb, op, src[0],
|
||||
nir_imm_int(&b->nb, NIR_FALSE),
|
||||
NULL, NULL);
|
||||
}
|
||||
break;
|
||||
|
||||
case SpvOpAll:
|
||||
if (src[0]->num_components == 1) {
|
||||
val->ssa->def = nir_imov(&b->nb, src[0]);
|
||||
} else {
|
||||
nir_op op;
|
||||
switch (src[0]->num_components) {
|
||||
case 2: op = nir_op_ball_iequal2; break;
|
||||
case 3: op = nir_op_ball_iequal3; break;
|
||||
case 4: op = nir_op_ball_iequal4; break;
|
||||
}
|
||||
val->ssa->def = nir_build_alu(&b->nb, op, src[0],
|
||||
nir_imm_int(&b->nb, NIR_TRUE),
|
||||
NULL, NULL);
|
||||
}
|
||||
break;
|
||||
|
||||
case SpvOpOuterProduct: {
|
||||
for (unsigned i = 0; i < src[1]->num_components; i++) {
|
||||
val->ssa->elems[i]->def =
|
||||
nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpDot:
|
||||
val->ssa->def = nir_fdot(&b->nb, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SpvOpIAddCarry:
|
||||
assert(glsl_type_is_struct(val->ssa->type));
|
||||
val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]);
|
||||
val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SpvOpISubBorrow:
|
||||
assert(glsl_type_is_struct(val->ssa->type));
|
||||
val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]);
|
||||
val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SpvOpUMulExtended:
|
||||
assert(glsl_type_is_struct(val->ssa->type));
|
||||
val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
|
||||
val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SpvOpSMulExtended:
|
||||
assert(glsl_type_is_struct(val->ssa->type));
|
||||
val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
|
||||
val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SpvOpFwidth:
|
||||
val->ssa->def = nir_fadd(&b->nb,
|
||||
nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])),
|
||||
nir_fabs(&b->nb, nir_fddy(&b->nb, src[0])));
|
||||
break;
|
||||
case SpvOpFwidthFine:
|
||||
val->ssa->def = nir_fadd(&b->nb,
|
||||
nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])),
|
||||
nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0])));
|
||||
break;
|
||||
case SpvOpFwidthCoarse:
|
||||
val->ssa->def = nir_fadd(&b->nb,
|
||||
nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])),
|
||||
nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0])));
|
||||
break;
|
||||
|
||||
case SpvOpVectorTimesScalar:
|
||||
/* The builder will take care of splatting for us. */
|
||||
val->ssa->def = nir_fmul(&b->nb, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case SpvOpIsNan:
|
||||
val->ssa->def = nir_fne(&b->nb, src[0], src[0]);
|
||||
break;
|
||||
|
||||
case SpvOpIsInf:
|
||||
val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]),
|
||||
nir_imm_float(&b->nb, INFINITY));
|
||||
break;
|
||||
|
||||
default: {
|
||||
bool swap;
|
||||
nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
|
||||
|
||||
if (swap) {
|
||||
nir_ssa_def *tmp = src[0];
|
||||
src[0] = src[1];
|
||||
src[1] = tmp;
|
||||
}
|
||||
|
||||
val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
|
||||
break;
|
||||
} /* default */
|
||||
}
|
||||
|
||||
b->nb.exact = false;
|
||||
}
|
||||
778
src/compiler/spirv/vtn_cfg.c
Normal file
778
src/compiler/spirv/vtn_cfg.c
Normal file
|
|
@ -0,0 +1,778 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "vtn_private.h"
|
||||
#include "nir/nir_vla.h"
|
||||
|
||||
static bool
|
||||
vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
switch (opcode) {
|
||||
case SpvOpFunction: {
|
||||
assert(b->func == NULL);
|
||||
b->func = rzalloc(b, struct vtn_function);
|
||||
|
||||
list_inithead(&b->func->body);
|
||||
b->func->control = w[3];
|
||||
|
||||
const struct glsl_type *result_type =
|
||||
vtn_value(b, w[1], vtn_value_type_type)->type->type;
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function);
|
||||
val->func = b->func;
|
||||
|
||||
const struct glsl_type *func_type =
|
||||
vtn_value(b, w[4], vtn_value_type_type)->type->type;
|
||||
|
||||
assert(glsl_get_function_return_type(func_type) == result_type);
|
||||
|
||||
nir_function *func =
|
||||
nir_function_create(b->shader, ralloc_strdup(b->shader, val->name));
|
||||
|
||||
func->num_params = glsl_get_length(func_type);
|
||||
func->params = ralloc_array(b->shader, nir_parameter, func->num_params);
|
||||
for (unsigned i = 0; i < func->num_params; i++) {
|
||||
const struct glsl_function_param *param =
|
||||
glsl_get_function_param(func_type, i);
|
||||
func->params[i].type = param->type;
|
||||
if (param->in) {
|
||||
if (param->out) {
|
||||
func->params[i].param_type = nir_parameter_inout;
|
||||
} else {
|
||||
func->params[i].param_type = nir_parameter_in;
|
||||
}
|
||||
} else {
|
||||
if (param->out) {
|
||||
func->params[i].param_type = nir_parameter_out;
|
||||
} else {
|
||||
assert(!"Parameter is neither in nor out");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func->return_type = glsl_get_function_return_type(func_type);
|
||||
|
||||
b->func->impl = nir_function_impl_create(func);
|
||||
|
||||
b->func_param_idx = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpFunctionEnd:
|
||||
b->func->end = w;
|
||||
b->func = NULL;
|
||||
break;
|
||||
|
||||
case SpvOpFunctionParameter: {
|
||||
struct vtn_value *val =
|
||||
vtn_push_value(b, w[2], vtn_value_type_access_chain);
|
||||
|
||||
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
|
||||
|
||||
assert(b->func_param_idx < b->func->impl->num_params);
|
||||
nir_variable *param = b->func->impl->params[b->func_param_idx++];
|
||||
|
||||
assert(param->type == type->type);
|
||||
|
||||
/* Name the parameter so it shows up nicely in NIR */
|
||||
param->name = ralloc_strdup(param, val->name);
|
||||
|
||||
struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable);
|
||||
vtn_var->type = type;
|
||||
vtn_var->var = param;
|
||||
vtn_var->chain.var = vtn_var;
|
||||
vtn_var->chain.length = 0;
|
||||
|
||||
struct vtn_type *without_array = type;
|
||||
while(glsl_type_is_array(without_array->type))
|
||||
without_array = without_array->array_element;
|
||||
|
||||
if (glsl_type_is_image(without_array->type)) {
|
||||
vtn_var->mode = vtn_variable_mode_image;
|
||||
param->interface_type = without_array->type;
|
||||
} else if (glsl_type_is_sampler(without_array->type)) {
|
||||
vtn_var->mode = vtn_variable_mode_sampler;
|
||||
param->interface_type = without_array->type;
|
||||
} else {
|
||||
vtn_var->mode = vtn_variable_mode_param;
|
||||
}
|
||||
|
||||
val->access_chain = &vtn_var->chain;
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpLabel: {
|
||||
assert(b->block == NULL);
|
||||
b->block = rzalloc(b, struct vtn_block);
|
||||
b->block->node.type = vtn_cf_node_type_block;
|
||||
b->block->label = w;
|
||||
vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block;
|
||||
|
||||
if (b->func->start_block == NULL) {
|
||||
/* This is the first block encountered for this function. In this
|
||||
* case, we set the start block and add it to the list of
|
||||
* implemented functions that we'll walk later.
|
||||
*/
|
||||
b->func->start_block = b->block;
|
||||
exec_list_push_tail(&b->functions, &b->func->node);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpSelectionMerge:
|
||||
case SpvOpLoopMerge:
|
||||
assert(b->block && b->block->merge == NULL);
|
||||
b->block->merge = w;
|
||||
break;
|
||||
|
||||
case SpvOpBranch:
|
||||
case SpvOpBranchConditional:
|
||||
case SpvOpSwitch:
|
||||
case SpvOpKill:
|
||||
case SpvOpReturn:
|
||||
case SpvOpReturnValue:
|
||||
case SpvOpUnreachable:
|
||||
assert(b->block && b->block->branch == NULL);
|
||||
b->block->branch = w;
|
||||
b->block = NULL;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Continue on as per normal */
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch,
|
||||
struct vtn_block *break_block,
|
||||
uint32_t block_id, uint32_t val, bool is_default)
|
||||
{
|
||||
struct vtn_block *case_block =
|
||||
vtn_value(b, block_id, vtn_value_type_block)->block;
|
||||
|
||||
/* Don't create dummy cases that just break */
|
||||
if (case_block == break_block)
|
||||
return;
|
||||
|
||||
if (case_block->switch_case == NULL) {
|
||||
struct vtn_case *c = ralloc(b, struct vtn_case);
|
||||
|
||||
list_inithead(&c->body);
|
||||
c->start_block = case_block;
|
||||
c->fallthrough = NULL;
|
||||
nir_array_init(&c->values, b);
|
||||
c->is_default = false;
|
||||
c->visited = false;
|
||||
|
||||
list_addtail(&c->link, &swtch->cases);
|
||||
|
||||
case_block->switch_case = c;
|
||||
}
|
||||
|
||||
if (is_default) {
|
||||
case_block->switch_case->is_default = true;
|
||||
} else {
|
||||
nir_array_add(&case_block->switch_case->values, uint32_t, val);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function performs a depth-first search of the cases and puts them
|
||||
* in fall-through order.
|
||||
*/
|
||||
static void
|
||||
vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse)
|
||||
{
|
||||
if (cse->visited)
|
||||
return;
|
||||
|
||||
cse->visited = true;
|
||||
|
||||
list_del(&cse->link);
|
||||
|
||||
if (cse->fallthrough) {
|
||||
vtn_order_case(swtch, cse->fallthrough);
|
||||
|
||||
/* If we have a fall-through, place this case right before the case it
|
||||
* falls through to. This ensures that fallthroughs come one after
|
||||
* the other. These two can never get separated because that would
|
||||
* imply something else falling through to the same case. Also, this
|
||||
* can't break ordering because the DFS ensures that this case is
|
||||
* visited before anything that falls through to it.
|
||||
*/
|
||||
list_addtail(&cse->link, &cse->fallthrough->link);
|
||||
} else {
|
||||
list_add(&cse->link, &swtch->cases);
|
||||
}
|
||||
}
|
||||
|
||||
static enum vtn_branch_type
|
||||
vtn_get_branch_type(struct vtn_block *block,
|
||||
struct vtn_case *swcase, struct vtn_block *switch_break,
|
||||
struct vtn_block *loop_break, struct vtn_block *loop_cont)
|
||||
{
|
||||
if (block->switch_case) {
|
||||
/* This branch is actually a fallthrough */
|
||||
assert(swcase->fallthrough == NULL ||
|
||||
swcase->fallthrough == block->switch_case);
|
||||
swcase->fallthrough = block->switch_case;
|
||||
return vtn_branch_type_switch_fallthrough;
|
||||
} else if (block == switch_break) {
|
||||
return vtn_branch_type_switch_break;
|
||||
} else if (block == loop_break) {
|
||||
return vtn_branch_type_loop_break;
|
||||
} else if (block == loop_cont) {
|
||||
return vtn_branch_type_loop_continue;
|
||||
} else {
|
||||
return vtn_branch_type_none;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list,
|
||||
struct vtn_block *start, struct vtn_case *switch_case,
|
||||
struct vtn_block *switch_break,
|
||||
struct vtn_block *loop_break, struct vtn_block *loop_cont,
|
||||
struct vtn_block *end)
|
||||
{
|
||||
struct vtn_block *block = start;
|
||||
while (block != end) {
|
||||
if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge &&
|
||||
!block->loop) {
|
||||
struct vtn_loop *loop = ralloc(b, struct vtn_loop);
|
||||
|
||||
loop->node.type = vtn_cf_node_type_loop;
|
||||
list_inithead(&loop->body);
|
||||
list_inithead(&loop->cont_body);
|
||||
loop->control = block->merge[3];
|
||||
|
||||
list_addtail(&loop->node.link, cf_list);
|
||||
block->loop = loop;
|
||||
|
||||
struct vtn_block *new_loop_break =
|
||||
vtn_value(b, block->merge[1], vtn_value_type_block)->block;
|
||||
struct vtn_block *new_loop_cont =
|
||||
vtn_value(b, block->merge[2], vtn_value_type_block)->block;
|
||||
|
||||
/* Note: This recursive call will start with the current block as
|
||||
* its start block. If we weren't careful, we would get here
|
||||
* again and end up in infinite recursion. This is why we set
|
||||
* block->loop above and check for it before creating one. This
|
||||
* way, we only create the loop once and the second call that
|
||||
* tries to handle this loop goes to the cases below and gets
|
||||
* handled as a regular block.
|
||||
*
|
||||
* Note: When we make the recursive walk calls, we pass NULL for
|
||||
* the switch break since you have to break out of the loop first.
|
||||
* We do, however, still pass the current switch case because it's
|
||||
* possible that the merge block for the loop is the start of
|
||||
* another case.
|
||||
*/
|
||||
vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL,
|
||||
new_loop_break, new_loop_cont, NULL );
|
||||
vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL,
|
||||
new_loop_break, NULL, block);
|
||||
|
||||
block = new_loop_break;
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(block->node.link.next == NULL);
|
||||
list_addtail(&block->node.link, cf_list);
|
||||
|
||||
switch (*block->branch & SpvOpCodeMask) {
|
||||
case SpvOpBranch: {
|
||||
struct vtn_block *branch_block =
|
||||
vtn_value(b, block->branch[1], vtn_value_type_block)->block;
|
||||
|
||||
block->branch_type = vtn_get_branch_type(branch_block,
|
||||
switch_case, switch_break,
|
||||
loop_break, loop_cont);
|
||||
|
||||
if (block->branch_type != vtn_branch_type_none)
|
||||
return;
|
||||
|
||||
block = branch_block;
|
||||
continue;
|
||||
}
|
||||
|
||||
case SpvOpReturn:
|
||||
case SpvOpReturnValue:
|
||||
block->branch_type = vtn_branch_type_return;
|
||||
return;
|
||||
|
||||
case SpvOpKill:
|
||||
block->branch_type = vtn_branch_type_discard;
|
||||
return;
|
||||
|
||||
case SpvOpBranchConditional: {
|
||||
struct vtn_block *then_block =
|
||||
vtn_value(b, block->branch[2], vtn_value_type_block)->block;
|
||||
struct vtn_block *else_block =
|
||||
vtn_value(b, block->branch[3], vtn_value_type_block)->block;
|
||||
|
||||
struct vtn_if *if_stmt = ralloc(b, struct vtn_if);
|
||||
|
||||
if_stmt->node.type = vtn_cf_node_type_if;
|
||||
if_stmt->condition = block->branch[1];
|
||||
list_inithead(&if_stmt->then_body);
|
||||
list_inithead(&if_stmt->else_body);
|
||||
|
||||
list_addtail(&if_stmt->node.link, cf_list);
|
||||
|
||||
if (block->merge &&
|
||||
(*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) {
|
||||
if_stmt->control = block->merge[2];
|
||||
}
|
||||
|
||||
if_stmt->then_type = vtn_get_branch_type(then_block,
|
||||
switch_case, switch_break,
|
||||
loop_break, loop_cont);
|
||||
if_stmt->else_type = vtn_get_branch_type(else_block,
|
||||
switch_case, switch_break,
|
||||
loop_break, loop_cont);
|
||||
|
||||
if (if_stmt->then_type == vtn_branch_type_none &&
|
||||
if_stmt->else_type == vtn_branch_type_none) {
|
||||
/* Neither side of the if is something we can short-circuit. */
|
||||
assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge);
|
||||
struct vtn_block *merge_block =
|
||||
vtn_value(b, block->merge[1], vtn_value_type_block)->block;
|
||||
|
||||
vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block,
|
||||
switch_case, switch_break,
|
||||
loop_break, loop_cont, merge_block);
|
||||
vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block,
|
||||
switch_case, switch_break,
|
||||
loop_break, loop_cont, merge_block);
|
||||
|
||||
enum vtn_branch_type merge_type =
|
||||
vtn_get_branch_type(merge_block, switch_case, switch_break,
|
||||
loop_break, loop_cont);
|
||||
if (merge_type == vtn_branch_type_none) {
|
||||
block = merge_block;
|
||||
continue;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
} else if (if_stmt->then_type != vtn_branch_type_none &&
|
||||
if_stmt->else_type != vtn_branch_type_none) {
|
||||
/* Both sides were short-circuited. We're done here. */
|
||||
return;
|
||||
} else {
|
||||
/* Exeactly one side of the branch could be short-circuited.
|
||||
* We set the branch up as a predicated break/continue and we
|
||||
* continue on with the other side as if it were what comes
|
||||
* after the if.
|
||||
*/
|
||||
if (if_stmt->then_type == vtn_branch_type_none) {
|
||||
block = then_block;
|
||||
} else {
|
||||
block = else_block;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
unreachable("Should have returned or continued");
|
||||
}
|
||||
|
||||
case SpvOpSwitch: {
|
||||
assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge);
|
||||
struct vtn_block *break_block =
|
||||
vtn_value(b, block->merge[1], vtn_value_type_block)->block;
|
||||
|
||||
struct vtn_switch *swtch = ralloc(b, struct vtn_switch);
|
||||
|
||||
swtch->node.type = vtn_cf_node_type_switch;
|
||||
swtch->selector = block->branch[1];
|
||||
list_inithead(&swtch->cases);
|
||||
|
||||
list_addtail(&swtch->node.link, cf_list);
|
||||
|
||||
/* First, we go through and record all of the cases. */
|
||||
const uint32_t *branch_end =
|
||||
block->branch + (block->branch[0] >> SpvWordCountShift);
|
||||
|
||||
vtn_add_case(b, swtch, break_block, block->branch[2], 0, true);
|
||||
for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2)
|
||||
vtn_add_case(b, swtch, break_block, w[1], w[0], false);
|
||||
|
||||
/* Now, we go through and walk the blocks. While we walk through
|
||||
* the blocks, we also gather the much-needed fall-through
|
||||
* information.
|
||||
*/
|
||||
list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) {
|
||||
assert(cse->start_block != break_block);
|
||||
vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse,
|
||||
break_block, NULL, loop_cont, NULL);
|
||||
}
|
||||
|
||||
/* Finally, we walk over all of the cases one more time and put
|
||||
* them in fall-through order.
|
||||
*/
|
||||
for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) {
|
||||
struct vtn_block *case_block =
|
||||
vtn_value(b, *w, vtn_value_type_block)->block;
|
||||
|
||||
if (case_block == break_block)
|
||||
continue;
|
||||
|
||||
assert(case_block->switch_case);
|
||||
|
||||
vtn_order_case(swtch, case_block->switch_case);
|
||||
}
|
||||
|
||||
block = break_block;
|
||||
continue;
|
||||
}
|
||||
|
||||
case SpvOpUnreachable:
|
||||
return;
|
||||
|
||||
default:
|
||||
unreachable("Unhandled opcode");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end)
|
||||
{
|
||||
vtn_foreach_instruction(b, words, end,
|
||||
vtn_cfg_handle_prepass_instruction);
|
||||
|
||||
foreach_list_typed(struct vtn_function, func, node, &b->functions) {
|
||||
vtn_cfg_walk_blocks(b, &func->body, func->start_block,
|
||||
NULL, NULL, NULL, NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
if (opcode == SpvOpLabel)
|
||||
return true; /* Nothing to do */
|
||||
|
||||
/* If this isn't a phi node, stop. */
|
||||
if (opcode != SpvOpPhi)
|
||||
return false;
|
||||
|
||||
/* For handling phi nodes, we do a poor-man's out-of-ssa on the spot.
|
||||
* For each phi, we create a variable with the appropreate type and
|
||||
* do a load from that variable. Then, in a second pass, we add
|
||||
* stores to that variable to each of the predecessor blocks.
|
||||
*
|
||||
* We could do something more intelligent here. However, in order to
|
||||
* handle loops and things properly, we really need dominance
|
||||
* information. It would end up basically being the into-SSA
|
||||
* algorithm all over again. It's easier if we just let
|
||||
* lower_vars_to_ssa do that for us instead of repeating it here.
|
||||
*/
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
|
||||
|
||||
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
|
||||
nir_variable *phi_var =
|
||||
nir_local_variable_create(b->nb.impl, type->type, "phi");
|
||||
_mesa_hash_table_insert(b->phi_table, w, phi_var);
|
||||
|
||||
val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
if (opcode != SpvOpPhi)
|
||||
return true;
|
||||
|
||||
struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w);
|
||||
assert(phi_entry);
|
||||
nir_variable *phi_var = phi_entry->data;
|
||||
|
||||
for (unsigned i = 3; i < count; i += 2) {
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
|
||||
struct vtn_block *pred =
|
||||
vtn_value(b, w[i + 1], vtn_value_type_block)->block;
|
||||
|
||||
b->nb.cursor = nir_after_block_before_jump(pred->end_block);
|
||||
|
||||
vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type,
|
||||
nir_variable *switch_fall_var, bool *has_switch_break)
|
||||
{
|
||||
switch (branch_type) {
|
||||
case vtn_branch_type_switch_break:
|
||||
nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1);
|
||||
*has_switch_break = true;
|
||||
break;
|
||||
case vtn_branch_type_switch_fallthrough:
|
||||
break; /* Nothing to do */
|
||||
case vtn_branch_type_loop_break:
|
||||
nir_jump(&b->nb, nir_jump_break);
|
||||
break;
|
||||
case vtn_branch_type_loop_continue:
|
||||
nir_jump(&b->nb, nir_jump_continue);
|
||||
break;
|
||||
case vtn_branch_type_return:
|
||||
nir_jump(&b->nb, nir_jump_return);
|
||||
break;
|
||||
case vtn_branch_type_discard: {
|
||||
nir_intrinsic_instr *discard =
|
||||
nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard);
|
||||
nir_builder_instr_insert(&b->nb, &discard->instr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Invalid branch type");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list,
|
||||
nir_variable *switch_fall_var, bool *has_switch_break,
|
||||
vtn_instruction_handler handler)
|
||||
{
|
||||
list_for_each_entry(struct vtn_cf_node, node, cf_list, link) {
|
||||
switch (node->type) {
|
||||
case vtn_cf_node_type_block: {
|
||||
struct vtn_block *block = (struct vtn_block *)node;
|
||||
|
||||
const uint32_t *block_start = block->label;
|
||||
const uint32_t *block_end = block->merge ? block->merge :
|
||||
block->branch;
|
||||
|
||||
block_start = vtn_foreach_instruction(b, block_start, block_end,
|
||||
vtn_handle_phis_first_pass);
|
||||
|
||||
vtn_foreach_instruction(b, block_start, block_end, handler);
|
||||
|
||||
block->end_block = nir_cursor_current_block(b->nb.cursor);
|
||||
|
||||
if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) {
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]);
|
||||
vtn_local_store(b, src,
|
||||
nir_deref_var_create(b, b->impl->return_var));
|
||||
}
|
||||
|
||||
if (block->branch_type != vtn_branch_type_none) {
|
||||
vtn_emit_branch(b, block->branch_type,
|
||||
switch_fall_var, has_switch_break);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case vtn_cf_node_type_if: {
|
||||
struct vtn_if *vtn_if = (struct vtn_if *)node;
|
||||
|
||||
nir_if *if_stmt = nir_if_create(b->shader);
|
||||
if_stmt->condition =
|
||||
nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def);
|
||||
nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node);
|
||||
|
||||
bool sw_break = false;
|
||||
|
||||
b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
|
||||
if (vtn_if->then_type == vtn_branch_type_none) {
|
||||
vtn_emit_cf_list(b, &vtn_if->then_body,
|
||||
switch_fall_var, &sw_break, handler);
|
||||
} else {
|
||||
vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break);
|
||||
}
|
||||
|
||||
b->nb.cursor = nir_after_cf_list(&if_stmt->else_list);
|
||||
if (vtn_if->else_type == vtn_branch_type_none) {
|
||||
vtn_emit_cf_list(b, &vtn_if->else_body,
|
||||
switch_fall_var, &sw_break, handler);
|
||||
} else {
|
||||
vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break);
|
||||
}
|
||||
|
||||
b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node);
|
||||
|
||||
/* If we encountered a switch break somewhere inside of the if,
|
||||
* then it would have been handled correctly by calling
|
||||
* emit_cf_list or emit_branch for the interrior. However, we
|
||||
* need to predicate everything following on wether or not we're
|
||||
* still going.
|
||||
*/
|
||||
if (sw_break) {
|
||||
*has_switch_break = true;
|
||||
|
||||
nir_if *switch_if = nir_if_create(b->shader);
|
||||
switch_if->condition =
|
||||
nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var));
|
||||
nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node);
|
||||
|
||||
b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case vtn_cf_node_type_loop: {
|
||||
struct vtn_loop *vtn_loop = (struct vtn_loop *)node;
|
||||
|
||||
nir_loop *loop = nir_loop_create(b->shader);
|
||||
nir_cf_node_insert(b->nb.cursor, &loop->cf_node);
|
||||
|
||||
b->nb.cursor = nir_after_cf_list(&loop->body);
|
||||
vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler);
|
||||
|
||||
if (!list_empty(&vtn_loop->cont_body)) {
|
||||
/* If we have a non-trivial continue body then we need to put
|
||||
* it at the beginning of the loop with a flag to ensure that
|
||||
* it doesn't get executed in the first iteration.
|
||||
*/
|
||||
nir_variable *do_cont =
|
||||
nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont");
|
||||
|
||||
b->nb.cursor = nir_before_cf_node(&loop->cf_node);
|
||||
nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1);
|
||||
|
||||
b->nb.cursor = nir_before_cf_list(&loop->body);
|
||||
nir_if *cont_if = nir_if_create(b->shader);
|
||||
cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont));
|
||||
nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node);
|
||||
|
||||
b->nb.cursor = nir_after_cf_list(&cont_if->then_list);
|
||||
vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler);
|
||||
|
||||
b->nb.cursor = nir_after_cf_node(&cont_if->cf_node);
|
||||
nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1);
|
||||
|
||||
b->has_loop_continue = true;
|
||||
}
|
||||
|
||||
b->nb.cursor = nir_after_cf_node(&loop->cf_node);
|
||||
break;
|
||||
}
|
||||
|
||||
case vtn_cf_node_type_switch: {
|
||||
struct vtn_switch *vtn_switch = (struct vtn_switch *)node;
|
||||
|
||||
/* First, we create a variable to keep track of whether or not the
|
||||
* switch is still going at any given point. Any switch breaks
|
||||
* will set this variable to false.
|
||||
*/
|
||||
nir_variable *fall_var =
|
||||
nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall");
|
||||
nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1);
|
||||
|
||||
/* Next, we gather up all of the conditions. We have to do this
|
||||
* up-front because we also need to build an "any" condition so
|
||||
* that we can use !any for default.
|
||||
*/
|
||||
const int num_cases = list_length(&vtn_switch->cases);
|
||||
NIR_VLA(nir_ssa_def *, conditions, num_cases);
|
||||
|
||||
nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def;
|
||||
/* An accumulation of all conditions. Used for the default */
|
||||
nir_ssa_def *any = NULL;
|
||||
|
||||
int i = 0;
|
||||
list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
|
||||
if (cse->is_default) {
|
||||
conditions[i++] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
nir_ssa_def *cond = NULL;
|
||||
nir_array_foreach(&cse->values, uint32_t, val) {
|
||||
nir_ssa_def *is_val =
|
||||
nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val));
|
||||
|
||||
cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val;
|
||||
}
|
||||
|
||||
any = any ? nir_ior(&b->nb, any, cond) : cond;
|
||||
conditions[i++] = cond;
|
||||
}
|
||||
assert(i == num_cases);
|
||||
|
||||
/* Now we can walk the list of cases and actually emit code */
|
||||
i = 0;
|
||||
list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
|
||||
/* Figure out the condition */
|
||||
nir_ssa_def *cond = conditions[i++];
|
||||
if (cse->is_default) {
|
||||
assert(cond == NULL);
|
||||
cond = nir_inot(&b->nb, any);
|
||||
}
|
||||
/* Take fallthrough into account */
|
||||
cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var));
|
||||
|
||||
nir_if *case_if = nir_if_create(b->nb.shader);
|
||||
case_if->condition = nir_src_for_ssa(cond);
|
||||
nir_cf_node_insert(b->nb.cursor, &case_if->cf_node);
|
||||
|
||||
bool has_break = false;
|
||||
b->nb.cursor = nir_after_cf_list(&case_if->then_list);
|
||||
nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1);
|
||||
vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler);
|
||||
(void)has_break; /* We don't care */
|
||||
|
||||
b->nb.cursor = nir_after_cf_node(&case_if->cf_node);
|
||||
}
|
||||
assert(i == num_cases);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid CF node type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vtn_function_emit(struct vtn_builder *b, struct vtn_function *func,
|
||||
vtn_instruction_handler instruction_handler)
|
||||
{
|
||||
nir_builder_init(&b->nb, func->impl);
|
||||
b->nb.cursor = nir_after_cf_list(&func->impl->body);
|
||||
b->has_loop_continue = false;
|
||||
b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler);
|
||||
|
||||
vtn_foreach_instruction(b, func->start_block->label, func->end,
|
||||
vtn_handle_phi_second_pass);
|
||||
|
||||
/* Continue blocks for loops get inserted before the body of the loop
|
||||
* but instructions in the continue may use SSA defs in the loop body.
|
||||
* Therefore, we need to repair SSA to insert the needed phi nodes.
|
||||
*/
|
||||
if (b->has_loop_continue)
|
||||
nir_repair_ssa_impl(func->impl);
|
||||
}
|
||||
666
src/compiler/spirv/vtn_glsl450.c
Normal file
666
src/compiler/spirv/vtn_glsl450.c
Normal file
|
|
@ -0,0 +1,666 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jason Ekstrand (jason@jlekstrand.net)
|
||||
*
|
||||
*/
|
||||
|
||||
#include "vtn_private.h"
|
||||
#include "GLSL.std.450.h"
|
||||
|
||||
#define M_PIf ((float) M_PI)
|
||||
#define M_PI_2f ((float) M_PI_2)
|
||||
#define M_PI_4f ((float) M_PI_4)
|
||||
|
||||
static nir_ssa_def *
|
||||
build_mat2_det(nir_builder *b, nir_ssa_def *col[2])
|
||||
{
|
||||
unsigned swiz[4] = {1, 0, 0, 0};
|
||||
nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true));
|
||||
return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_mat3_det(nir_builder *b, nir_ssa_def *col[3])
|
||||
{
|
||||
unsigned yzx[4] = {1, 2, 0, 0};
|
||||
unsigned zxy[4] = {2, 0, 1, 0};
|
||||
|
||||
nir_ssa_def *prod0 =
|
||||
nir_fmul(b, col[0],
|
||||
nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true),
|
||||
nir_swizzle(b, col[2], zxy, 3, true)));
|
||||
nir_ssa_def *prod1 =
|
||||
nir_fmul(b, col[0],
|
||||
nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true),
|
||||
nir_swizzle(b, col[2], yzx, 3, true)));
|
||||
|
||||
nir_ssa_def *diff = nir_fsub(b, prod0, prod1);
|
||||
|
||||
return nir_fadd(b, nir_channel(b, diff, 0),
|
||||
nir_fadd(b, nir_channel(b, diff, 1),
|
||||
nir_channel(b, diff, 2)));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_mat4_det(nir_builder *b, nir_ssa_def **col)
|
||||
{
|
||||
nir_ssa_def *subdet[4];
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
unsigned swiz[3];
|
||||
for (unsigned j = 0; j < 3; j++)
|
||||
swiz[j] = j + (j >= i);
|
||||
|
||||
nir_ssa_def *subcol[3];
|
||||
subcol[0] = nir_swizzle(b, col[1], swiz, 3, true);
|
||||
subcol[1] = nir_swizzle(b, col[2], swiz, 3, true);
|
||||
subcol[2] = nir_swizzle(b, col[3], swiz, 3, true);
|
||||
|
||||
subdet[i] = build_mat3_det(b, subcol);
|
||||
}
|
||||
|
||||
nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4));
|
||||
|
||||
return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0),
|
||||
nir_channel(b, prod, 1)),
|
||||
nir_fsub(b, nir_channel(b, prod, 2),
|
||||
nir_channel(b, prod, 3)));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src)
|
||||
{
|
||||
unsigned size = glsl_get_vector_elements(src->type);
|
||||
|
||||
nir_ssa_def *cols[4];
|
||||
for (unsigned i = 0; i < size; i++)
|
||||
cols[i] = src->elems[i]->def;
|
||||
|
||||
switch(size) {
|
||||
case 2: return build_mat2_det(&b->nb, cols);
|
||||
case 3: return build_mat3_det(&b->nb, cols);
|
||||
case 4: return build_mat4_det(&b->nb, cols);
|
||||
default:
|
||||
unreachable("Invalid matrix size");
|
||||
}
|
||||
}
|
||||
|
||||
/* Computes the determinate of the submatrix given by taking src and
|
||||
* removing the specified row and column.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src,
|
||||
unsigned size, unsigned row, unsigned col)
|
||||
{
|
||||
assert(row < size && col < size);
|
||||
if (size == 2) {
|
||||
return nir_channel(b, src->elems[1 - col]->def, 1 - row);
|
||||
} else {
|
||||
/* Swizzle to get all but the specified row */
|
||||
unsigned swiz[3];
|
||||
for (unsigned j = 0; j < 3; j++)
|
||||
swiz[j] = j + (j >= row);
|
||||
|
||||
/* Grab all but the specified column */
|
||||
nir_ssa_def *subcol[3];
|
||||
for (unsigned j = 0; j < size; j++) {
|
||||
if (j != col) {
|
||||
subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def,
|
||||
swiz, size - 1, true);
|
||||
}
|
||||
}
|
||||
|
||||
if (size == 3) {
|
||||
return build_mat2_det(b, subcol);
|
||||
} else {
|
||||
assert(size == 4);
|
||||
return build_mat3_det(b, subcol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct vtn_ssa_value *
|
||||
matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src)
|
||||
{
|
||||
nir_ssa_def *adj_col[4];
|
||||
unsigned size = glsl_get_vector_elements(src->type);
|
||||
|
||||
/* Build up an adjugate matrix */
|
||||
for (unsigned c = 0; c < size; c++) {
|
||||
nir_ssa_def *elem[4];
|
||||
for (unsigned r = 0; r < size; r++) {
|
||||
elem[r] = build_mat_subdet(&b->nb, src, size, c, r);
|
||||
|
||||
if ((r + c) % 2)
|
||||
elem[r] = nir_fneg(&b->nb, elem[r]);
|
||||
}
|
||||
|
||||
adj_col[c] = nir_vec(&b->nb, elem, size);
|
||||
}
|
||||
|
||||
nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src));
|
||||
|
||||
struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type);
|
||||
for (unsigned i = 0; i < size; i++)
|
||||
val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static nir_ssa_def*
|
||||
build_length(nir_builder *b, nir_ssa_def *vec)
|
||||
{
|
||||
switch (vec->num_components) {
|
||||
case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec));
|
||||
case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec));
|
||||
case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec));
|
||||
case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec));
|
||||
default:
|
||||
unreachable("Invalid number of components");
|
||||
}
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
build_fclamp(nir_builder *b,
|
||||
nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
|
||||
{
|
||||
return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return e^x.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
build_exp(nir_builder *b, nir_ssa_def *x)
|
||||
{
|
||||
return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return ln(x) - the natural logarithm of x.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
build_log(nir_builder *b, nir_ssa_def *x)
|
||||
{
|
||||
return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E));
|
||||
}
|
||||
|
||||
/**
|
||||
* Approximate asin(x) by the formula:
|
||||
* asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1))))
|
||||
*
|
||||
* which is correct to first order at x=0 and x=±1 regardless of the p
|
||||
* coefficients but can be made second-order correct at both ends by selecting
|
||||
* the fit coefficients appropriately. Different p coefficients can be used
|
||||
* in the asin and acos implementation to minimize some relative error metric
|
||||
* in each case.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
|
||||
{
|
||||
nir_ssa_def *abs_x = nir_fabs(b, x);
|
||||
return nir_fmul(b, nir_fsign(b, x),
|
||||
nir_fsub(b, nir_imm_float(b, M_PI_2f),
|
||||
nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
|
||||
nir_fadd(b, nir_imm_float(b, M_PI_2f),
|
||||
nir_fmul(b, abs_x,
|
||||
nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
|
||||
nir_fmul(b, abs_x,
|
||||
nir_fadd(b, nir_imm_float(b, p0),
|
||||
nir_fmul(b, abs_x,
|
||||
nir_imm_float(b, p1))))))))));
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute xs[0] + xs[1] + xs[2] + ... using fadd.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
build_fsum(nir_builder *b, nir_ssa_def **xs, int terms)
|
||||
{
|
||||
nir_ssa_def *accum = xs[0];
|
||||
|
||||
for (int i = 1; i < terms; i++)
|
||||
accum = nir_fadd(b, accum, xs[i]);
|
||||
|
||||
return accum;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_atan(nir_builder *b, nir_ssa_def *y_over_x)
|
||||
{
|
||||
nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
|
||||
nir_ssa_def *one = nir_imm_float(b, 1.0f);
|
||||
|
||||
/*
|
||||
* range-reduction, first step:
|
||||
*
|
||||
* / y_over_x if |y_over_x| <= 1.0;
|
||||
* x = <
|
||||
* \ 1.0 / y_over_x otherwise
|
||||
*/
|
||||
nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one),
|
||||
nir_fmax(b, abs_y_over_x, one));
|
||||
|
||||
/*
|
||||
* approximate atan by evaluating polynomial:
|
||||
*
|
||||
* x * 0.9999793128310355 - x^3 * 0.3326756418091246 +
|
||||
* x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 +
|
||||
* x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
|
||||
*/
|
||||
nir_ssa_def *x_2 = nir_fmul(b, x, x);
|
||||
nir_ssa_def *x_3 = nir_fmul(b, x_2, x);
|
||||
nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2);
|
||||
nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2);
|
||||
nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2);
|
||||
nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
|
||||
|
||||
nir_ssa_def *polynomial_terms[] = {
|
||||
nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)),
|
||||
nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)),
|
||||
nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)),
|
||||
nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)),
|
||||
nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)),
|
||||
nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
|
||||
};
|
||||
|
||||
nir_ssa_def *tmp =
|
||||
build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms));
|
||||
|
||||
/* range-reduction fixup */
|
||||
tmp = nir_fadd(b, tmp,
|
||||
nir_fmul(b,
|
||||
nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
|
||||
nir_fadd(b, nir_fmul(b, tmp,
|
||||
nir_imm_float(b, -2.0f)),
|
||||
nir_imm_float(b, M_PI_2f))));
|
||||
|
||||
/* sign fixup */
|
||||
return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
|
||||
{
|
||||
nir_ssa_def *zero = nir_imm_float(b, 0.0f);
|
||||
|
||||
/* If |x| >= 1.0e-8 * |y|: */
|
||||
nir_ssa_def *condition =
|
||||
nir_fge(b, nir_fabs(b, x),
|
||||
nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
|
||||
|
||||
/* Then...call atan(y/x) and fix it up: */
|
||||
nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
|
||||
nir_ssa_def *r_then =
|
||||
nir_bcsel(b, nir_flt(b, x, zero),
|
||||
nir_fadd(b, atan1,
|
||||
nir_bcsel(b, nir_fge(b, y, zero),
|
||||
nir_imm_float(b, M_PIf),
|
||||
nir_imm_float(b, -M_PIf))),
|
||||
atan1);
|
||||
|
||||
/* Else... */
|
||||
nir_ssa_def *r_else =
|
||||
nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
|
||||
|
||||
return nir_bcsel(b, condition, r_then, r_else);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
|
||||
{
|
||||
nir_ssa_def *abs_x = nir_fabs(b, x);
|
||||
nir_ssa_def *zero = nir_imm_float(b, 0.0f);
|
||||
|
||||
/* Single-precision floating-point values are stored as
|
||||
* 1 sign bit;
|
||||
* 8 exponent bits;
|
||||
* 23 mantissa bits.
|
||||
*
|
||||
* An exponent shift of 23 will shift the mantissa out, leaving only the
|
||||
* exponent and sign bit (which itself may be zero, if the absolute value
|
||||
* was taken before the bitcast and shift.
|
||||
*/
|
||||
nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
|
||||
nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
|
||||
|
||||
nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
|
||||
|
||||
/* Exponent of floating-point values in the range [0.5, 1.0). */
|
||||
nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u);
|
||||
|
||||
nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
|
||||
|
||||
*exponent =
|
||||
nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
|
||||
nir_bcsel(b, is_not_zero, exponent_bias, zero));
|
||||
|
||||
return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
|
||||
nir_bcsel(b, is_not_zero, exponent_value, zero));
|
||||
}
|
||||
|
||||
static nir_op
|
||||
vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case GLSLstd450Round: return nir_op_fround_even;
|
||||
case GLSLstd450RoundEven: return nir_op_fround_even;
|
||||
case GLSLstd450Trunc: return nir_op_ftrunc;
|
||||
case GLSLstd450FAbs: return nir_op_fabs;
|
||||
case GLSLstd450SAbs: return nir_op_iabs;
|
||||
case GLSLstd450FSign: return nir_op_fsign;
|
||||
case GLSLstd450SSign: return nir_op_isign;
|
||||
case GLSLstd450Floor: return nir_op_ffloor;
|
||||
case GLSLstd450Ceil: return nir_op_fceil;
|
||||
case GLSLstd450Fract: return nir_op_ffract;
|
||||
case GLSLstd450Sin: return nir_op_fsin;
|
||||
case GLSLstd450Cos: return nir_op_fcos;
|
||||
case GLSLstd450Pow: return nir_op_fpow;
|
||||
case GLSLstd450Exp2: return nir_op_fexp2;
|
||||
case GLSLstd450Log2: return nir_op_flog2;
|
||||
case GLSLstd450Sqrt: return nir_op_fsqrt;
|
||||
case GLSLstd450InverseSqrt: return nir_op_frsq;
|
||||
case GLSLstd450FMin: return nir_op_fmin;
|
||||
case GLSLstd450UMin: return nir_op_umin;
|
||||
case GLSLstd450SMin: return nir_op_imin;
|
||||
case GLSLstd450FMax: return nir_op_fmax;
|
||||
case GLSLstd450UMax: return nir_op_umax;
|
||||
case GLSLstd450SMax: return nir_op_imax;
|
||||
case GLSLstd450FMix: return nir_op_flrp;
|
||||
case GLSLstd450Fma: return nir_op_ffma;
|
||||
case GLSLstd450Ldexp: return nir_op_ldexp;
|
||||
case GLSLstd450FindILsb: return nir_op_find_lsb;
|
||||
case GLSLstd450FindSMsb: return nir_op_ifind_msb;
|
||||
case GLSLstd450FindUMsb: return nir_op_ufind_msb;
|
||||
|
||||
/* Packing/Unpacking functions */
|
||||
case GLSLstd450PackSnorm4x8: return nir_op_pack_snorm_4x8;
|
||||
case GLSLstd450PackUnorm4x8: return nir_op_pack_unorm_4x8;
|
||||
case GLSLstd450PackSnorm2x16: return nir_op_pack_snorm_2x16;
|
||||
case GLSLstd450PackUnorm2x16: return nir_op_pack_unorm_2x16;
|
||||
case GLSLstd450PackHalf2x16: return nir_op_pack_half_2x16;
|
||||
case GLSLstd450UnpackSnorm4x8: return nir_op_unpack_snorm_4x8;
|
||||
case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8;
|
||||
case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16;
|
||||
case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16;
|
||||
case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16;
|
||||
|
||||
default:
|
||||
unreachable("No NIR equivalent");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
struct nir_builder *nb = &b->nb;
|
||||
const struct glsl_type *dest_type =
|
||||
vtn_value(b, w[1], vtn_value_type_type)->type->type;
|
||||
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
|
||||
val->ssa = vtn_create_ssa_value(b, dest_type);
|
||||
|
||||
/* Collect the various SSA sources */
|
||||
unsigned num_inputs = count - 5;
|
||||
nir_ssa_def *src[3] = { NULL, };
|
||||
for (unsigned i = 0; i < num_inputs; i++)
|
||||
src[i] = vtn_ssa_value(b, w[i + 5])->def;
|
||||
|
||||
switch (entrypoint) {
|
||||
case GLSLstd450Radians:
|
||||
val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251));
|
||||
return;
|
||||
case GLSLstd450Degrees:
|
||||
val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131));
|
||||
return;
|
||||
case GLSLstd450Tan:
|
||||
val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]),
|
||||
nir_fcos(nb, src[0]));
|
||||
return;
|
||||
|
||||
case GLSLstd450Modf: {
|
||||
nir_ssa_def *sign = nir_fsign(nb, src[0]);
|
||||
nir_ssa_def *abs = nir_fabs(nb, src[0]);
|
||||
val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
|
||||
nir_store_deref_var(nb, vtn_nir_deref(b, w[6]),
|
||||
nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf);
|
||||
return;
|
||||
}
|
||||
|
||||
case GLSLstd450ModfStruct: {
|
||||
nir_ssa_def *sign = nir_fsign(nb, src[0]);
|
||||
nir_ssa_def *abs = nir_fabs(nb, src[0]);
|
||||
assert(glsl_type_is_struct(val->ssa->type));
|
||||
val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
|
||||
val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs));
|
||||
return;
|
||||
}
|
||||
|
||||
case GLSLstd450Step:
|
||||
val->ssa->def = nir_sge(nb, src[1], src[0]);
|
||||
return;
|
||||
|
||||
case GLSLstd450Length:
|
||||
val->ssa->def = build_length(nb, src[0]);
|
||||
return;
|
||||
case GLSLstd450Distance:
|
||||
val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1]));
|
||||
return;
|
||||
case GLSLstd450Normalize:
|
||||
val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0]));
|
||||
return;
|
||||
|
||||
case GLSLstd450Exp:
|
||||
val->ssa->def = build_exp(nb, src[0]);
|
||||
return;
|
||||
|
||||
case GLSLstd450Log:
|
||||
val->ssa->def = build_log(nb, src[0]);
|
||||
return;
|
||||
|
||||
case GLSLstd450FClamp:
|
||||
val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]);
|
||||
return;
|
||||
case GLSLstd450UClamp:
|
||||
val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]);
|
||||
return;
|
||||
case GLSLstd450SClamp:
|
||||
val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]);
|
||||
return;
|
||||
|
||||
case GLSLstd450Cross: {
|
||||
unsigned yzx[4] = { 1, 2, 0, 0 };
|
||||
unsigned zxy[4] = { 2, 0, 1, 0 };
|
||||
val->ssa->def =
|
||||
nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true),
|
||||
nir_swizzle(nb, src[1], zxy, 3, true)),
|
||||
nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true),
|
||||
nir_swizzle(nb, src[1], yzx, 3, true)));
|
||||
return;
|
||||
}
|
||||
|
||||
case GLSLstd450SmoothStep: {
|
||||
/* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
|
||||
nir_ssa_def *t =
|
||||
build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]),
|
||||
nir_fsub(nb, src[1], src[0])),
|
||||
nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0));
|
||||
/* result = t * t * (3 - 2 * t) */
|
||||
val->ssa->def =
|
||||
nir_fmul(nb, t, nir_fmul(nb, t,
|
||||
nir_fsub(nb, nir_imm_float(nb, 3.0),
|
||||
nir_fmul(nb, nir_imm_float(nb, 2.0), t))));
|
||||
return;
|
||||
}
|
||||
|
||||
case GLSLstd450FaceForward:
|
||||
val->ssa->def =
|
||||
nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]),
|
||||
nir_imm_float(nb, 0.0)),
|
||||
src[0], nir_fneg(nb, src[0]));
|
||||
return;
|
||||
|
||||
case GLSLstd450Reflect:
|
||||
/* I - 2 * dot(N, I) * N */
|
||||
val->ssa->def =
|
||||
nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0),
|
||||
nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
|
||||
src[1])));
|
||||
return;
|
||||
|
||||
case GLSLstd450Refract: {
|
||||
nir_ssa_def *I = src[0];
|
||||
nir_ssa_def *N = src[1];
|
||||
nir_ssa_def *eta = src[2];
|
||||
nir_ssa_def *n_dot_i = nir_fdot(nb, N, I);
|
||||
nir_ssa_def *one = nir_imm_float(nb, 1.0);
|
||||
nir_ssa_def *zero = nir_imm_float(nb, 0.0);
|
||||
/* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
|
||||
nir_ssa_def *k =
|
||||
nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
|
||||
nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
|
||||
nir_ssa_def *result =
|
||||
nir_fsub(nb, nir_fmul(nb, eta, I),
|
||||
nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
|
||||
nir_fsqrt(nb, k)), N));
|
||||
/* XXX: bcsel, or if statement? */
|
||||
val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
|
||||
return;
|
||||
}
|
||||
|
||||
case GLSLstd450Sinh:
|
||||
/* 0.5 * (e^x - e^(-x)) */
|
||||
val->ssa->def =
|
||||
nir_fmul(nb, nir_imm_float(nb, 0.5f),
|
||||
nir_fsub(nb, build_exp(nb, src[0]),
|
||||
build_exp(nb, nir_fneg(nb, src[0]))));
|
||||
return;
|
||||
|
||||
case GLSLstd450Cosh:
|
||||
/* 0.5 * (e^x + e^(-x)) */
|
||||
val->ssa->def =
|
||||
nir_fmul(nb, nir_imm_float(nb, 0.5f),
|
||||
nir_fadd(nb, build_exp(nb, src[0]),
|
||||
build_exp(nb, nir_fneg(nb, src[0]))));
|
||||
return;
|
||||
|
||||
case GLSLstd450Tanh:
|
||||
/* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
|
||||
val->ssa->def =
|
||||
nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
|
||||
nir_fsub(nb, build_exp(nb, src[0]),
|
||||
build_exp(nb, nir_fneg(nb, src[0])))),
|
||||
nir_fmul(nb, nir_imm_float(nb, 0.5f),
|
||||
nir_fadd(nb, build_exp(nb, src[0]),
|
||||
build_exp(nb, nir_fneg(nb, src[0])))));
|
||||
return;
|
||||
|
||||
case GLSLstd450Asinh:
|
||||
val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
|
||||
build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
|
||||
nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),
|
||||
nir_imm_float(nb, 1.0f))))));
|
||||
return;
|
||||
case GLSLstd450Acosh:
|
||||
val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
|
||||
nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
|
||||
nir_imm_float(nb, 1.0f)))));
|
||||
return;
|
||||
case GLSLstd450Atanh: {
|
||||
nir_ssa_def *one = nir_imm_float(nb, 1.0);
|
||||
val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
|
||||
build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
|
||||
nir_fsub(nb, one, src[0]))));
|
||||
return;
|
||||
}
|
||||
|
||||
case GLSLstd450Asin:
|
||||
val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955);
|
||||
return;
|
||||
|
||||
case GLSLstd450Acos:
|
||||
val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f),
|
||||
build_asin(nb, src[0], 0.08132463, -0.02363318));
|
||||
return;
|
||||
|
||||
case GLSLstd450Atan:
|
||||
val->ssa->def = build_atan(nb, src[0]);
|
||||
return;
|
||||
|
||||
case GLSLstd450Atan2:
|
||||
val->ssa->def = build_atan2(nb, src[0], src[1]);
|
||||
return;
|
||||
|
||||
case GLSLstd450Frexp: {
|
||||
nir_ssa_def *exponent;
|
||||
val->ssa->def = build_frexp(nb, src[0], &exponent);
|
||||
nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
|
||||
return;
|
||||
}
|
||||
|
||||
case GLSLstd450FrexpStruct: {
|
||||
assert(glsl_type_is_struct(val->ssa->type));
|
||||
val->ssa->elems[0]->def = build_frexp(nb, src[0],
|
||||
&val->ssa->elems[1]->def);
|
||||
return;
|
||||
}
|
||||
|
||||
default:
|
||||
val->ssa->def =
|
||||
nir_build_alu(&b->nb, vtn_nir_alu_op_for_spirv_glsl_opcode(entrypoint),
|
||||
src[0], src[1], src[2], NULL);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
switch ((enum GLSLstd450)ext_opcode) {
|
||||
case GLSLstd450Determinant: {
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
|
||||
val->ssa = rzalloc(b, struct vtn_ssa_value);
|
||||
val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
|
||||
val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5]));
|
||||
break;
|
||||
}
|
||||
|
||||
case GLSLstd450MatrixInverse: {
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
|
||||
val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5]));
|
||||
break;
|
||||
}
|
||||
|
||||
case GLSLstd450InterpolateAtCentroid:
|
||||
case GLSLstd450InterpolateAtSample:
|
||||
case GLSLstd450InterpolateAtOffset:
|
||||
unreachable("Unhandled opcode");
|
||||
|
||||
default:
|
||||
handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
484
src/compiler/spirv/vtn_private.h
Normal file
484
src/compiler/spirv/vtn_private.h
Normal file
|
|
@ -0,0 +1,484 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Jason Ekstrand (jason@jlekstrand.net)
|
||||
*
|
||||
*/
|
||||
|
||||
#include "nir/nir.h"
|
||||
#include "nir/nir_builder.h"
|
||||
#include "nir/nir_array.h"
|
||||
#include "nir_spirv.h"
|
||||
#include "spirv.h"
|
||||
|
||||
struct vtn_builder;
|
||||
struct vtn_decoration;
|
||||
|
||||
enum vtn_value_type {
|
||||
vtn_value_type_invalid = 0,
|
||||
vtn_value_type_undef,
|
||||
vtn_value_type_string,
|
||||
vtn_value_type_decoration_group,
|
||||
vtn_value_type_type,
|
||||
vtn_value_type_constant,
|
||||
vtn_value_type_access_chain,
|
||||
vtn_value_type_function,
|
||||
vtn_value_type_block,
|
||||
vtn_value_type_ssa,
|
||||
vtn_value_type_extension,
|
||||
vtn_value_type_image_pointer,
|
||||
vtn_value_type_sampled_image,
|
||||
};
|
||||
|
||||
enum vtn_branch_type {
|
||||
vtn_branch_type_none,
|
||||
vtn_branch_type_switch_break,
|
||||
vtn_branch_type_switch_fallthrough,
|
||||
vtn_branch_type_loop_break,
|
||||
vtn_branch_type_loop_continue,
|
||||
vtn_branch_type_discard,
|
||||
vtn_branch_type_return,
|
||||
};
|
||||
|
||||
enum vtn_cf_node_type {
|
||||
vtn_cf_node_type_block,
|
||||
vtn_cf_node_type_if,
|
||||
vtn_cf_node_type_loop,
|
||||
vtn_cf_node_type_switch,
|
||||
};
|
||||
|
||||
struct vtn_cf_node {
|
||||
struct list_head link;
|
||||
enum vtn_cf_node_type type;
|
||||
};
|
||||
|
||||
struct vtn_loop {
|
||||
struct vtn_cf_node node;
|
||||
|
||||
/* The main body of the loop */
|
||||
struct list_head body;
|
||||
|
||||
/* The "continue" part of the loop. This gets executed after the body
|
||||
* and is where you go when you hit a continue.
|
||||
*/
|
||||
struct list_head cont_body;
|
||||
|
||||
SpvLoopControlMask control;
|
||||
};
|
||||
|
||||
struct vtn_if {
|
||||
struct vtn_cf_node node;
|
||||
|
||||
uint32_t condition;
|
||||
|
||||
enum vtn_branch_type then_type;
|
||||
struct list_head then_body;
|
||||
|
||||
enum vtn_branch_type else_type;
|
||||
struct list_head else_body;
|
||||
|
||||
SpvSelectionControlMask control;
|
||||
};
|
||||
|
||||
struct vtn_case {
|
||||
struct list_head link;
|
||||
|
||||
struct list_head body;
|
||||
|
||||
/* The block that starts this case */
|
||||
struct vtn_block *start_block;
|
||||
|
||||
/* The fallthrough case, if any */
|
||||
struct vtn_case *fallthrough;
|
||||
|
||||
/* The uint32_t values that map to this case */
|
||||
nir_array values;
|
||||
|
||||
/* True if this is the default case */
|
||||
bool is_default;
|
||||
|
||||
/* Initialized to false; used when sorting the list of cases */
|
||||
bool visited;
|
||||
};
|
||||
|
||||
struct vtn_switch {
|
||||
struct vtn_cf_node node;
|
||||
|
||||
uint32_t selector;
|
||||
|
||||
struct list_head cases;
|
||||
};
|
||||
|
||||
struct vtn_block {
|
||||
struct vtn_cf_node node;
|
||||
|
||||
/** A pointer to the label instruction */
|
||||
const uint32_t *label;
|
||||
|
||||
/** A pointer to the merge instruction (or NULL if non exists) */
|
||||
const uint32_t *merge;
|
||||
|
||||
/** A pointer to the branch instruction that ends this block */
|
||||
const uint32_t *branch;
|
||||
|
||||
enum vtn_branch_type branch_type;
|
||||
|
||||
/** Points to the loop that this block starts (if it starts a loop) */
|
||||
struct vtn_loop *loop;
|
||||
|
||||
/** Points to the switch case started by this block (if any) */
|
||||
struct vtn_case *switch_case;
|
||||
|
||||
/** The last block in this SPIR-V block. */
|
||||
nir_block *end_block;
|
||||
};
|
||||
|
||||
struct vtn_function {
|
||||
struct exec_node node;
|
||||
|
||||
nir_function_impl *impl;
|
||||
struct vtn_block *start_block;
|
||||
|
||||
struct list_head body;
|
||||
|
||||
const uint32_t *end;
|
||||
|
||||
SpvFunctionControlMask control;
|
||||
};
|
||||
|
||||
typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t,
|
||||
const uint32_t *, unsigned);
|
||||
|
||||
void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words,
|
||||
const uint32_t *end);
|
||||
void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func,
|
||||
vtn_instruction_handler instruction_handler);
|
||||
|
||||
const uint32_t *
|
||||
vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
|
||||
const uint32_t *end, vtn_instruction_handler handler);
|
||||
|
||||
struct vtn_ssa_value {
|
||||
union {
|
||||
nir_ssa_def *def;
|
||||
struct vtn_ssa_value **elems;
|
||||
};
|
||||
|
||||
/* For matrices, if this is non-NULL, then this value is actually the
|
||||
* transpose of some other value. The value that `transposed` points to
|
||||
* always dominates this value.
|
||||
*/
|
||||
struct vtn_ssa_value *transposed;
|
||||
|
||||
const struct glsl_type *type;
|
||||
};
|
||||
|
||||
struct vtn_type {
|
||||
const struct glsl_type *type;
|
||||
|
||||
/* The value that declares this type. Used for finding decorations */
|
||||
struct vtn_value *val;
|
||||
|
||||
/* for matrices, whether the matrix is stored row-major */
|
||||
bool row_major;
|
||||
|
||||
/* for structs, the offset of each member */
|
||||
unsigned *offsets;
|
||||
|
||||
/* for structs, whether it was decorated as a "non-SSBO-like" block */
|
||||
bool block;
|
||||
|
||||
/* for structs, whether it was decorated as an "SSBO-like" block */
|
||||
bool buffer_block;
|
||||
|
||||
/* for structs with block == true, whether this is a builtin block (i.e. a
|
||||
* block that contains only builtins).
|
||||
*/
|
||||
bool builtin_block;
|
||||
|
||||
/* Image format for image_load_store type images */
|
||||
unsigned image_format;
|
||||
|
||||
/* Access qualifier for storage images */
|
||||
SpvAccessQualifier access_qualifier;
|
||||
|
||||
/* for arrays and matrices, the array stride */
|
||||
unsigned stride;
|
||||
|
||||
/* for arrays, the vtn_type for the elements of the array */
|
||||
struct vtn_type *array_element;
|
||||
|
||||
/* for structures, the vtn_type for each member */
|
||||
struct vtn_type **members;
|
||||
|
||||
/* Whether this type, or a parent type, has been decorated as a builtin */
|
||||
bool is_builtin;
|
||||
|
||||
SpvBuiltIn builtin;
|
||||
};
|
||||
|
||||
struct vtn_variable;
|
||||
|
||||
enum vtn_access_mode {
|
||||
vtn_access_mode_id,
|
||||
vtn_access_mode_literal,
|
||||
};
|
||||
|
||||
struct vtn_access_link {
|
||||
enum vtn_access_mode mode;
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
struct vtn_access_chain {
|
||||
struct vtn_variable *var;
|
||||
|
||||
uint32_t length;
|
||||
|
||||
/* Struct elements and array offsets */
|
||||
struct vtn_access_link link[0];
|
||||
};
|
||||
|
||||
enum vtn_variable_mode {
|
||||
vtn_variable_mode_local,
|
||||
vtn_variable_mode_global,
|
||||
vtn_variable_mode_param,
|
||||
vtn_variable_mode_ubo,
|
||||
vtn_variable_mode_ssbo,
|
||||
vtn_variable_mode_push_constant,
|
||||
vtn_variable_mode_image,
|
||||
vtn_variable_mode_sampler,
|
||||
vtn_variable_mode_workgroup,
|
||||
vtn_variable_mode_input,
|
||||
vtn_variable_mode_output,
|
||||
};
|
||||
|
||||
struct vtn_variable {
|
||||
enum vtn_variable_mode mode;
|
||||
|
||||
struct vtn_type *type;
|
||||
|
||||
unsigned descriptor_set;
|
||||
unsigned binding;
|
||||
|
||||
nir_variable *var;
|
||||
nir_variable **members;
|
||||
|
||||
struct vtn_access_chain chain;
|
||||
};
|
||||
|
||||
struct vtn_image_pointer {
|
||||
struct vtn_access_chain *image;
|
||||
nir_ssa_def *coord;
|
||||
nir_ssa_def *sample;
|
||||
};
|
||||
|
||||
struct vtn_sampled_image {
|
||||
struct vtn_access_chain *image; /* Image or array of images */
|
||||
struct vtn_access_chain *sampler; /* Sampler */
|
||||
};
|
||||
|
||||
struct vtn_value {
|
||||
enum vtn_value_type value_type;
|
||||
const char *name;
|
||||
struct vtn_decoration *decoration;
|
||||
union {
|
||||
void *ptr;
|
||||
char *str;
|
||||
struct vtn_type *type;
|
||||
struct {
|
||||
nir_constant *constant;
|
||||
const struct glsl_type *const_type;
|
||||
};
|
||||
struct vtn_access_chain *access_chain;
|
||||
struct vtn_image_pointer *image;
|
||||
struct vtn_sampled_image *sampled_image;
|
||||
struct vtn_function *func;
|
||||
struct vtn_block *block;
|
||||
struct vtn_ssa_value *ssa;
|
||||
vtn_instruction_handler ext_handler;
|
||||
};
|
||||
};
|
||||
|
||||
#define VTN_DEC_DECORATION -1
|
||||
#define VTN_DEC_EXECUTION_MODE -2
|
||||
#define VTN_DEC_STRUCT_MEMBER0 0
|
||||
|
||||
struct vtn_decoration {
|
||||
struct vtn_decoration *next;
|
||||
|
||||
/* Specifies how to apply this decoration. Negative values represent a
|
||||
* decoration or execution mode. (See the VTN_DEC_ #defines above.)
|
||||
* Non-negative values specify that it applies to a structure member.
|
||||
*/
|
||||
int scope;
|
||||
|
||||
const uint32_t *literals;
|
||||
struct vtn_value *group;
|
||||
|
||||
union {
|
||||
SpvDecoration decoration;
|
||||
SpvExecutionMode exec_mode;
|
||||
};
|
||||
};
|
||||
|
||||
struct vtn_builder {
|
||||
nir_builder nb;
|
||||
|
||||
nir_shader *shader;
|
||||
nir_function_impl *impl;
|
||||
struct vtn_block *block;
|
||||
|
||||
/* Current file, line, and column. Useful for debugging. Set
|
||||
* automatically by vtn_foreach_instruction.
|
||||
*/
|
||||
char *file;
|
||||
int line, col;
|
||||
|
||||
/*
|
||||
* In SPIR-V, constants are global, whereas in NIR, the load_const
|
||||
* instruction we use is per-function. So while we parse each function, we
|
||||
* keep a hash table of constants we've resolved to nir_ssa_value's so
|
||||
* far, and we lazily resolve them when we see them used in a function.
|
||||
*/
|
||||
struct hash_table *const_table;
|
||||
|
||||
/*
|
||||
* Map from phi instructions (pointer to the start of the instruction)
|
||||
* to the variable corresponding to it.
|
||||
*/
|
||||
struct hash_table *phi_table;
|
||||
|
||||
unsigned num_specializations;
|
||||
struct nir_spirv_specialization *specializations;
|
||||
|
||||
unsigned value_id_bound;
|
||||
struct vtn_value *values;
|
||||
|
||||
gl_shader_stage entry_point_stage;
|
||||
const char *entry_point_name;
|
||||
struct vtn_value *entry_point;
|
||||
bool origin_upper_left;
|
||||
|
||||
struct vtn_function *func;
|
||||
struct exec_list functions;
|
||||
|
||||
/* Current function parameter index */
|
||||
unsigned func_param_idx;
|
||||
|
||||
bool has_loop_continue;
|
||||
};
|
||||
|
||||
static inline struct vtn_value *
|
||||
vtn_push_value(struct vtn_builder *b, uint32_t value_id,
|
||||
enum vtn_value_type value_type)
|
||||
{
|
||||
assert(value_id < b->value_id_bound);
|
||||
assert(b->values[value_id].value_type == vtn_value_type_invalid);
|
||||
|
||||
b->values[value_id].value_type = value_type;
|
||||
|
||||
return &b->values[value_id];
|
||||
}
|
||||
|
||||
static inline struct vtn_value *
|
||||
vtn_untyped_value(struct vtn_builder *b, uint32_t value_id)
|
||||
{
|
||||
assert(value_id < b->value_id_bound);
|
||||
return &b->values[value_id];
|
||||
}
|
||||
|
||||
static inline struct vtn_value *
|
||||
vtn_value(struct vtn_builder *b, uint32_t value_id,
|
||||
enum vtn_value_type value_type)
|
||||
{
|
||||
struct vtn_value *val = vtn_untyped_value(b, value_id);
|
||||
assert(val->value_type == value_type);
|
||||
return val;
|
||||
}
|
||||
|
||||
struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id);
|
||||
|
||||
struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b,
|
||||
const struct glsl_type *type);
|
||||
|
||||
struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b,
|
||||
struct vtn_ssa_value *src);
|
||||
|
||||
nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src,
|
||||
unsigned index);
|
||||
nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
|
||||
nir_ssa_def *index);
|
||||
nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src,
|
||||
nir_ssa_def *insert, unsigned index);
|
||||
nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
|
||||
nir_ssa_def *insert, nir_ssa_def *index);
|
||||
|
||||
nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id);
|
||||
|
||||
nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b,
|
||||
struct vtn_access_chain *chain);
|
||||
nir_ssa_def *
|
||||
vtn_access_chain_to_offset(struct vtn_builder *b,
|
||||
struct vtn_access_chain *chain,
|
||||
nir_ssa_def **index_out, struct vtn_type **type_out,
|
||||
unsigned *end_idx_out, bool stop_at_matrix);
|
||||
|
||||
struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src);
|
||||
|
||||
void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
|
||||
nir_deref_var *dest);
|
||||
|
||||
struct vtn_ssa_value *
|
||||
vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src);
|
||||
|
||||
void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
|
||||
struct vtn_access_chain *dest);
|
||||
|
||||
void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count);
|
||||
|
||||
|
||||
typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *,
|
||||
struct vtn_value *,
|
||||
int member,
|
||||
const struct vtn_decoration *,
|
||||
void *);
|
||||
|
||||
void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
|
||||
vtn_decoration_foreach_cb cb, void *data);
|
||||
|
||||
typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *,
|
||||
struct vtn_value *,
|
||||
const struct vtn_decoration *,
|
||||
void *);
|
||||
|
||||
void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
|
||||
vtn_execution_mode_foreach_cb cb, void *data);
|
||||
|
||||
nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap);
|
||||
|
||||
void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count);
|
||||
|
||||
bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
|
||||
const uint32_t *words, unsigned count);
|
||||
1415
src/compiler/spirv/vtn_variables.c
Normal file
1415
src/compiler/spirv/vtn_variables.c
Normal file
File diff suppressed because it is too large
Load diff
26
src/intel/Makefile.am
Normal file
26
src/intel/Makefile.am
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# Copyright © 2016 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
SUBDIRS = genxml isl
|
||||
|
||||
if HAVE_INTEL_VULKAN
|
||||
SUBDIRS += vulkan
|
||||
endif
|
||||
1
src/intel/genxml/.gitignore
vendored
Normal file
1
src/intel/genxml/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
gen*_pack.h
|
||||
32
src/intel/genxml/Makefile.am
Normal file
32
src/intel/genxml/Makefile.am
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# Copyright © 2016 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
BUILT_SOURCES = \
|
||||
gen6_pack.h \
|
||||
gen7_pack.h \
|
||||
gen75_pack.h \
|
||||
gen8_pack.h \
|
||||
gen9_pack.h
|
||||
|
||||
%_pack.h : %.xml gen_pack_header.py
|
||||
$(AM_V_GEN) $(srcdir)/gen_pack_header.py $< > $@
|
||||
|
||||
CLEANFILES = $(BUILT_SOURCES)
|
||||
60
src/intel/genxml/README
Normal file
60
src/intel/genxml/README
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
This provides some background the design of the generated headers. We
|
||||
started out trying to generate bit fields but it evolved into the pack
|
||||
functions because of a few limitations:
|
||||
|
||||
1) Bit fields still generate terrible code today. Even with modern
|
||||
optimizing compilers you get multiple load+mask+store operations
|
||||
to the same dword in memory as you set individual bits. The
|
||||
compiler also has to generate code to mask out overflowing values
|
||||
(for example, if you assign 200 to a 2 bit field). Our driver
|
||||
never writes overflowing values so that's not needed. On the
|
||||
other hand, most compiler recognize that the template struct we
|
||||
use is a temporary variable and copy propagate the individual
|
||||
fields and do amazing constant folding. You should take a look
|
||||
at the code that gets generated when you compile in release mode
|
||||
with optimizations.
|
||||
|
||||
2) For some types we need to have overlapping bit fields. For
|
||||
example, some values are 64 byte aligned 32 bit offsets. The
|
||||
lower 5 bits of the offset are always zero, so the hw packs in a
|
||||
few misc bits in the lower 5 bits there. Other times a field can
|
||||
be either a u32 or a float. I tried to do this with overlapping
|
||||
anonymous unions and it became a big mess. Also, when using
|
||||
initializers, you can only initialize one union member so this
|
||||
just doesn't work with out approach.
|
||||
|
||||
The pack functions on the other hand allows us a great deal of
|
||||
flexibility in how we combine things. In the case of overlapping
|
||||
fields (the u32 and float case), if we only set one of them in
|
||||
the pack function, the compiler will recognize that the other is
|
||||
initialized to 0 and optimize out the code to or it it.
|
||||
|
||||
3) Bit fields (and certainly overlapping anonymous unions of bit
|
||||
fields) aren't generally stable across compilers in how they're
|
||||
laid out and aligned. Our pack functions let us control exactly
|
||||
how things get packed, using only simple and unambiguous bitwise
|
||||
shifting and or'ing that works on any compiler.
|
||||
|
||||
Once we have the pack function it allows us to hook in various
|
||||
transformations and validation as we go from template struct to dwords
|
||||
in memory:
|
||||
|
||||
1) Validation: As I said above, our driver isn't supposed to write
|
||||
overflowing values to the fields, but we've of course had lots of
|
||||
cases where we make mistakes and write overflowing values. With
|
||||
the pack function, we can actually assert on that and catch it at
|
||||
runtime. bitfields would just silently truncate.
|
||||
|
||||
2) Type conversions: some times it's just a matter of writing a
|
||||
float to a u32, but we also convert from bool to bits, from
|
||||
floats to fixed point integers.
|
||||
|
||||
3) Relocations: whenever we have a pointer from one buffer to
|
||||
another (for example a pointer from the meta data for a texture
|
||||
to the raw texture data), we have to tell the kernel about it so
|
||||
it can adjust the pointer to point to the final location. That
|
||||
means extra work we have to do extra work to record and annotate
|
||||
the dword location that holds the pointer. With bit fields, we'd
|
||||
have to call a function to do this, but with the pack function we
|
||||
generate code in the pack function to do this for us. That's a
|
||||
lot less error prone and less work.
|
||||
1923
src/intel/genxml/gen6.xml
Normal file
1923
src/intel/genxml/gen6.xml
Normal file
File diff suppressed because it is too large
Load diff
2538
src/intel/genxml/gen7.xml
Normal file
2538
src/intel/genxml/gen7.xml
Normal file
File diff suppressed because it is too large
Load diff
2935
src/intel/genxml/gen75.xml
Normal file
2935
src/intel/genxml/gen75.xml
Normal file
File diff suppressed because it is too large
Load diff
3174
src/intel/genxml/gen8.xml
Normal file
3174
src/intel/genxml/gen8.xml
Normal file
File diff suppressed because it is too large
Load diff
3478
src/intel/genxml/gen9.xml
Normal file
3478
src/intel/genxml/gen9.xml
Normal file
File diff suppressed because it is too large
Load diff
40
src/intel/genxml/genX_pack.h
Normal file
40
src/intel/genxml/genX_pack.h
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef GEN_VERSIONx10
|
||||
# error "The GEN_VERSIONx10 macro must be defined"
|
||||
#endif
|
||||
|
||||
#if (GEN_VERSIONx10 == 70)
|
||||
# include "genxml/gen7_pack.h"
|
||||
#elif (GEN_VERSIONx10 == 75)
|
||||
# include "genxml/gen75_pack.h"
|
||||
#elif (GEN_VERSIONx10 == 80)
|
||||
# include "genxml/gen8_pack.h"
|
||||
#elif (GEN_VERSIONx10 == 90)
|
||||
# include "genxml/gen9_pack.h"
|
||||
#else
|
||||
# error "Need to add a pack header include for this gen"
|
||||
#endif
|
||||
90
src/intel/genxml/gen_macros.h
Normal file
90
src/intel/genxml/gen_macros.h
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/* Macros for handling per-gen compilation.
|
||||
*
|
||||
* The prefixing macros GENX() and genX() automatically prefix whatever you
|
||||
* give them by GENX_ or genX_ where X is the gen number.
|
||||
*
|
||||
* You can declare a function to be used on some range of gens like this:
|
||||
*
|
||||
* GENX_FUNC(GEN7, GEN75) void
|
||||
* genX(my_function_name)(args...)
|
||||
* {
|
||||
* // Do stuff
|
||||
* }
|
||||
*
|
||||
* If the file is compiled for any set of gens containing gen7 and gen75,
|
||||
* the function will effectively only get compiled twice as
|
||||
* gen7_my_function_nmae and gen75_my_function_name. The function has to
|
||||
* be compilable on all gens, but it will become a static inline that gets
|
||||
* discarded by the compiler on all gens not in range.
|
||||
*
|
||||
* You can do pseudo-runtime checks in your function such as
|
||||
*
|
||||
* if (GEN_GEN > 8 || GEN_IS_HASWELL) {
|
||||
* // Do something
|
||||
* }
|
||||
*
|
||||
* The contents of the if statement must be valid regardless of gen, but
|
||||
* the if will get compiled away on everything except haswell.
|
||||
*
|
||||
* For places where you really do have a compile-time conflict, you can
|
||||
* use preprocessor logic:
|
||||
*
|
||||
* #if (GEN_GEN > 8 || GEN_IS_HASWELL)
|
||||
* // Do something
|
||||
* #endif
|
||||
*
|
||||
* However, it is strongly recommended that the former be used whenever
|
||||
* possible.
|
||||
*/
|
||||
|
||||
/* Base macro defined on the command line. If we don't have this, we can't
|
||||
* do anything.
|
||||
*/
|
||||
#ifndef GEN_VERSIONx10
|
||||
# error "The GEN_VERSIONx10 macro must be defined"
|
||||
#endif
|
||||
|
||||
#define GEN_GEN ((GEN_VERSIONx10) / 10)
|
||||
#define GEN_IS_HASWELL ((GEN_VERSIONx10) == 75)
|
||||
|
||||
/* Prefixing macros */
|
||||
#if (GEN_VERSIONx10 == 70)
|
||||
# define GENX(X) GEN7_##X
|
||||
# define genX(x) gen7_##x
|
||||
#elif (GEN_VERSIONx10 == 75)
|
||||
# define GENX(X) GEN75_##X
|
||||
# define genX(x) gen75_##x
|
||||
#elif (GEN_VERSIONx10 == 80)
|
||||
# define GENX(X) GEN8_##X
|
||||
# define genX(x) gen8_##x
|
||||
#elif (GEN_VERSIONx10 == 90)
|
||||
# define GENX(X) GEN9_##X
|
||||
# define genX(x) gen9_##x
|
||||
#else
|
||||
# error "Need to add prefixing macros for this gen"
|
||||
#endif
|
||||
640
src/intel/genxml/gen_pack_header.py
Executable file
640
src/intel/genxml/gen_pack_header.py
Executable file
|
|
@ -0,0 +1,640 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import xml.parsers.expat
|
||||
import re
|
||||
import sys
|
||||
import copy
|
||||
|
||||
license = """/*
|
||||
* Copyright (C) 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
"""
|
||||
|
||||
pack_header = """%(license)s
|
||||
|
||||
/* Instructions, enums and structures for %(platform)s.
|
||||
*
|
||||
* This file has been generated, do not hand edit.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifndef __gen_validate_value
|
||||
#define __gen_validate_value(x)
|
||||
#endif
|
||||
|
||||
#ifndef __gen_field_functions
|
||||
#define __gen_field_functions
|
||||
|
||||
union __gen_value {
|
||||
float f;
|
||||
uint32_t dw;
|
||||
};
|
||||
|
||||
static inline uint64_t
|
||||
__gen_mbo(uint32_t start, uint32_t end)
|
||||
{
|
||||
return (~0ull >> (64 - (end - start + 1))) << start;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
__gen_uint(uint64_t v, uint32_t start, uint32_t end)
|
||||
{
|
||||
__gen_validate_value(v);
|
||||
|
||||
#if DEBUG
|
||||
const int width = end - start + 1;
|
||||
if (width < 64) {
|
||||
const uint64_t max = (1ull << width) - 1;
|
||||
assert(v <= max);
|
||||
}
|
||||
#endif
|
||||
|
||||
return v << start;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
__gen_sint(int64_t v, uint32_t start, uint32_t end)
|
||||
{
|
||||
const int width = end - start + 1;
|
||||
|
||||
__gen_validate_value(v);
|
||||
|
||||
#if DEBUG
|
||||
if (width < 64) {
|
||||
const int64_t max = (1ll << (width - 1)) - 1;
|
||||
const int64_t min = -(1ll << (width - 1));
|
||||
assert(min <= v && v <= max);
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint64_t mask = ~0ull >> (64 - width);
|
||||
|
||||
return (v & mask) << start;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
__gen_offset(uint64_t v, uint32_t start, uint32_t end)
|
||||
{
|
||||
__gen_validate_value(v);
|
||||
#if DEBUG
|
||||
uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start;
|
||||
|
||||
assert((v & ~mask) == 0);
|
||||
#endif
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
__gen_float(float v)
|
||||
{
|
||||
__gen_validate_value(v);
|
||||
return ((union __gen_value) { .f = (v) }).dw;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
__gen_sfixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits)
|
||||
{
|
||||
__gen_validate_value(v);
|
||||
|
||||
const float factor = (1 << fract_bits);
|
||||
|
||||
#if DEBUG
|
||||
const float max = ((1 << (end - start)) - 1) / factor;
|
||||
const float min = -(1 << (end - start)) / factor;
|
||||
assert(min <= v && v <= max);
|
||||
#endif
|
||||
|
||||
const int32_t int_val = roundf(v * factor);
|
||||
const uint64_t mask = ~0ull >> (64 - (end - start + 1));
|
||||
|
||||
return (int_val & mask) << start;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
__gen_ufixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits)
|
||||
{
|
||||
__gen_validate_value(v);
|
||||
|
||||
const float factor = (1 << fract_bits);
|
||||
|
||||
#if DEBUG
|
||||
const float max = ((1 << (end - start + 1)) - 1) / factor;
|
||||
const float min = 0.0f;
|
||||
assert(min <= v && v <= max);
|
||||
#endif
|
||||
|
||||
const uint32_t uint_val = roundf(v * factor);
|
||||
|
||||
return uint_val << start;
|
||||
}
|
||||
|
||||
#ifndef __gen_address_type
|
||||
#error #define __gen_address_type before including this file
|
||||
#endif
|
||||
|
||||
#ifndef __gen_user_data
|
||||
#error #define __gen_combine_address before including this file
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
"""
|
||||
|
||||
def to_alphanum(name):
|
||||
substitutions = {
|
||||
' ': '',
|
||||
'/': '',
|
||||
'[': '',
|
||||
']': '',
|
||||
'(': '',
|
||||
')': '',
|
||||
'-': '',
|
||||
':': '',
|
||||
'.': '',
|
||||
',': '',
|
||||
'=': '',
|
||||
'>': '',
|
||||
'#': '',
|
||||
'α': 'alpha',
|
||||
'&': '',
|
||||
'*': '',
|
||||
'"': '',
|
||||
'+': '',
|
||||
'\'': '',
|
||||
}
|
||||
|
||||
for i, j in substitutions.items():
|
||||
name = name.replace(i, j)
|
||||
|
||||
return name
|
||||
|
||||
def safe_name(name):
|
||||
name = to_alphanum(name)
|
||||
if not str.isalpha(name[0]):
|
||||
name = '_' + name
|
||||
|
||||
return name
|
||||
|
||||
def num_from_str(num_str):
|
||||
if num_str.lower().startswith('0x'):
|
||||
return int(num_str, base=16)
|
||||
else:
|
||||
assert(not num_str.startswith('0') and 'octals numbers not allowed')
|
||||
return int(num_str)
|
||||
|
||||
class Field:
|
||||
ufixed_pattern = re.compile("u(\d+)\.(\d+)")
|
||||
sfixed_pattern = re.compile("s(\d+)\.(\d+)")
|
||||
|
||||
def __init__(self, parser, attrs):
|
||||
self.parser = parser
|
||||
if "name" in attrs:
|
||||
self.name = safe_name(attrs["name"])
|
||||
self.start = int(attrs["start"])
|
||||
self.end = int(attrs["end"])
|
||||
self.type = attrs["type"]
|
||||
|
||||
if "prefix" in attrs:
|
||||
self.prefix = attrs["prefix"]
|
||||
else:
|
||||
self.prefix = None
|
||||
|
||||
if "default" in attrs:
|
||||
self.default = int(attrs["default"])
|
||||
else:
|
||||
self.default = None
|
||||
|
||||
ufixed_match = Field.ufixed_pattern.match(self.type)
|
||||
if ufixed_match:
|
||||
self.type = 'ufixed'
|
||||
self.fractional_size = int(ufixed_match.group(2))
|
||||
|
||||
sfixed_match = Field.sfixed_pattern.match(self.type)
|
||||
if sfixed_match:
|
||||
self.type = 'sfixed'
|
||||
self.fractional_size = int(sfixed_match.group(2))
|
||||
|
||||
def emit_template_struct(self, dim):
|
||||
if self.type == 'address':
|
||||
type = '__gen_address_type'
|
||||
elif self.type == 'bool':
|
||||
type = 'bool'
|
||||
elif self.type == 'float':
|
||||
type = 'float'
|
||||
elif self.type == 'ufixed':
|
||||
type = 'float'
|
||||
elif self.type == 'sfixed':
|
||||
type = 'float'
|
||||
elif self.type == 'uint' and self.end - self.start > 32:
|
||||
type = 'uint64_t'
|
||||
elif self.type == 'offset':
|
||||
type = 'uint64_t'
|
||||
elif self.type == 'int':
|
||||
type = 'int32_t'
|
||||
elif self.type == 'uint':
|
||||
type = 'uint32_t'
|
||||
elif self.type in self.parser.structs:
|
||||
type = 'struct ' + self.parser.gen_prefix(safe_name(self.type))
|
||||
elif self.type == 'mbo':
|
||||
return
|
||||
else:
|
||||
print("#error unhandled type: %s" % self.type)
|
||||
|
||||
print(" %-36s %s%s;" % (type, self.name, dim))
|
||||
|
||||
if len(self.values) > 0 and self.default == None:
|
||||
if self.prefix:
|
||||
prefix = self.prefix + "_"
|
||||
else:
|
||||
prefix = ""
|
||||
|
||||
for value in self.values:
|
||||
print("#define %-40s %d" % (prefix + value.name, value.value))
|
||||
|
||||
class Group:
|
||||
def __init__(self, parser, parent, start, count, size):
|
||||
self.parser = parser
|
||||
self.parent = parent
|
||||
self.start = start
|
||||
self.count = count
|
||||
self.size = size
|
||||
self.fields = []
|
||||
|
||||
def emit_template_struct(self, dim):
|
||||
if self.count == 0:
|
||||
print(" /* variable length fields follow */")
|
||||
else:
|
||||
if self.count > 1:
|
||||
dim = "%s[%d]" % (dim, self.count)
|
||||
|
||||
for field in self.fields:
|
||||
field.emit_template_struct(dim)
|
||||
|
||||
class DWord:
|
||||
def __init__(self):
|
||||
self.size = 32
|
||||
self.fields = []
|
||||
self.address = None
|
||||
|
||||
def collect_dwords(self, dwords, start, dim):
|
||||
for field in self.fields:
|
||||
if type(field) is Group:
|
||||
if field.count == 1:
|
||||
field.collect_dwords(dwords, start + field.start, dim)
|
||||
else:
|
||||
for i in range(field.count):
|
||||
field.collect_dwords(dwords,
|
||||
start + field.start + i * field.size,
|
||||
"%s[%d]" % (dim, i))
|
||||
continue
|
||||
|
||||
index = (start + field.start) // 32
|
||||
if not index in dwords:
|
||||
dwords[index] = self.DWord()
|
||||
|
||||
clone = copy.copy(field)
|
||||
clone.start = clone.start + start
|
||||
clone.end = clone.end + start
|
||||
clone.dim = dim
|
||||
dwords[index].fields.append(clone)
|
||||
|
||||
if field.type == "address":
|
||||
# assert dwords[index].address == None
|
||||
dwords[index].address = field
|
||||
|
||||
# Coalesce all the dwords covered by this field. The two cases we
|
||||
# handle are where multiple fields are in a 64 bit word (typically
|
||||
# and address and a few bits) or where a single struct field
|
||||
# completely covers multiple dwords.
|
||||
while index < (start + field.end) // 32:
|
||||
if index + 1 in dwords and not dwords[index] == dwords[index + 1]:
|
||||
dwords[index].fields.extend(dwords[index + 1].fields)
|
||||
dwords[index].size = 64
|
||||
dwords[index + 1] = dwords[index]
|
||||
index = index + 1
|
||||
|
||||
def emit_pack_function(self, start):
|
||||
dwords = {}
|
||||
self.collect_dwords(dwords, 0, "")
|
||||
|
||||
# Determine number of dwords in this group. If we have a size, use
|
||||
# that, since that'll account for MBZ dwords at the end of a group
|
||||
# (like dword 8 on BDW+ 3DSTATE_HS). Otherwise, use the largest dword
|
||||
# index we've seen plus one.
|
||||
if self.size > 0:
|
||||
length = self.size // 32
|
||||
else:
|
||||
length = max(dwords.keys()) + 1
|
||||
|
||||
for index in range(length):
|
||||
# Handle MBZ dwords
|
||||
if not index in dwords:
|
||||
print("")
|
||||
print(" dw[%d] = 0;" % index)
|
||||
continue
|
||||
|
||||
# For 64 bit dwords, we aliased the two dword entries in the dword
|
||||
# dict it occupies. Now that we're emitting the pack function,
|
||||
# skip the duplicate entries.
|
||||
dw = dwords[index]
|
||||
if index > 0 and index - 1 in dwords and dw == dwords[index - 1]:
|
||||
continue
|
||||
|
||||
# Special case: only one field and it's a struct at the beginning
|
||||
# of the dword. In this case we pack directly into the
|
||||
# destination. This is the only way we handle embedded structs
|
||||
# larger than 32 bits.
|
||||
if len(dw.fields) == 1:
|
||||
field = dw.fields[0]
|
||||
name = field.name + field.dim
|
||||
if field.type in self.parser.structs and field.start % 32 == 0:
|
||||
print("")
|
||||
print(" %s_pack(data, &dw[%d], &values->%s);" %
|
||||
(self.parser.gen_prefix(safe_name(field.type)), index, name))
|
||||
continue
|
||||
|
||||
# Pack any fields of struct type first so we have integer values
|
||||
# to the dword for those fields.
|
||||
field_index = 0
|
||||
for field in dw.fields:
|
||||
if type(field) is Field and field.type in self.parser.structs:
|
||||
name = field.name + field.dim
|
||||
print("")
|
||||
print(" uint32_t v%d_%d;" % (index, field_index))
|
||||
print(" %s_pack(data, &v%d_%d, &values->%s);" %
|
||||
(self.parser.gen_prefix(safe_name(field.type)), index, field_index, name))
|
||||
field_index = field_index + 1
|
||||
|
||||
print("")
|
||||
dword_start = index * 32
|
||||
if dw.address == None:
|
||||
address_count = 0
|
||||
else:
|
||||
address_count = 1
|
||||
|
||||
if dw.size == 32 and dw.address == None:
|
||||
v = None
|
||||
print(" dw[%d] =" % index)
|
||||
elif len(dw.fields) > address_count:
|
||||
v = "v%d" % index
|
||||
print(" const uint%d_t %s =" % (dw.size, v))
|
||||
else:
|
||||
v = "0"
|
||||
|
||||
field_index = 0
|
||||
for field in dw.fields:
|
||||
if field.type != "mbo":
|
||||
name = field.name + field.dim
|
||||
|
||||
if field.type == "mbo":
|
||||
s = "__gen_mbo(%d, %d)" % \
|
||||
(field.start - dword_start, field.end - dword_start)
|
||||
elif field.type == "address":
|
||||
s = None
|
||||
elif field.type == "uint":
|
||||
s = "__gen_uint(values->%s, %d, %d)" % \
|
||||
(name, field.start - dword_start, field.end - dword_start)
|
||||
elif field.type == "int":
|
||||
s = "__gen_sint(values->%s, %d, %d)" % \
|
||||
(name, field.start - dword_start, field.end - dword_start)
|
||||
elif field.type == "bool":
|
||||
s = "__gen_uint(values->%s, %d, %d)" % \
|
||||
(name, field.start - dword_start, field.end - dword_start)
|
||||
elif field.type == "float":
|
||||
s = "__gen_float(values->%s)" % name
|
||||
elif field.type == "offset":
|
||||
s = "__gen_offset(values->%s, %d, %d)" % \
|
||||
(name, field.start - dword_start, field.end - dword_start)
|
||||
elif field.type == 'ufixed':
|
||||
s = "__gen_ufixed(values->%s, %d, %d, %d)" % \
|
||||
(name, field.start - dword_start, field.end - dword_start, field.fractional_size)
|
||||
elif field.type == 'sfixed':
|
||||
s = "__gen_sfixed(values->%s, %d, %d, %d)" % \
|
||||
(name, field.start - dword_start, field.end - dword_start, field.fractional_size)
|
||||
elif field.type in self.parser.structs:
|
||||
s = "__gen_uint(v%d_%d, %d, %d)" % \
|
||||
(index, field_index, field.start - dword_start, field.end - dword_start)
|
||||
field_index = field_index + 1
|
||||
else:
|
||||
print("/* unhandled field %s, type %s */\n" % (name, field.type))
|
||||
s = None
|
||||
|
||||
if not s == None:
|
||||
if field == dw.fields[-1]:
|
||||
print(" %s;" % s)
|
||||
else:
|
||||
print(" %s |" % s)
|
||||
|
||||
if dw.size == 32:
|
||||
if dw.address:
|
||||
print(" dw[%d] = __gen_combine_address(data, &dw[%d], values->%s, %s);" % (index, index, dw.address.name, v))
|
||||
continue
|
||||
|
||||
if dw.address:
|
||||
v_address = "v%d_address" % index
|
||||
print(" const uint64_t %s =\n __gen_combine_address(data, &dw[%d], values->%s, %s);" %
|
||||
(v_address, index, dw.address.name, v))
|
||||
v = v_address
|
||||
|
||||
print(" dw[%d] = %s;" % (index, v))
|
||||
print(" dw[%d] = %s >> 32;" % (index + 1, v))
|
||||
|
||||
class Value:
|
||||
def __init__(self, attrs):
|
||||
self.name = safe_name(attrs["name"])
|
||||
self.value = int(attrs["value"])
|
||||
|
||||
class Parser:
|
||||
def __init__(self):
|
||||
self.parser = xml.parsers.expat.ParserCreate()
|
||||
self.parser.StartElementHandler = self.start_element
|
||||
self.parser.EndElementHandler = self.end_element
|
||||
|
||||
self.instruction = None
|
||||
self.structs = {}
|
||||
self.registers = {}
|
||||
|
||||
def start_element(self, name, attrs):
|
||||
if name == "genxml":
|
||||
self.platform = attrs["name"]
|
||||
self.gen = attrs["gen"].replace('.', '')
|
||||
print(pack_header % {'license': license, 'platform': self.platform})
|
||||
elif name in ("instruction", "struct", "register"):
|
||||
if name == "instruction":
|
||||
self.instruction = safe_name(attrs["name"])
|
||||
self.length_bias = int(attrs["bias"])
|
||||
elif name == "struct":
|
||||
self.struct = safe_name(attrs["name"])
|
||||
self.structs[attrs["name"]] = 1
|
||||
elif name == "register":
|
||||
self.register = safe_name(attrs["name"])
|
||||
self.reg_num = num_from_str(attrs["num"])
|
||||
self.registers[attrs["name"]] = 1
|
||||
if "length" in attrs:
|
||||
self.length = int(attrs["length"])
|
||||
size = self.length * 32
|
||||
else:
|
||||
self.length = None
|
||||
size = 0
|
||||
self.group = Group(self, None, 0, 1, size)
|
||||
|
||||
elif name == "group":
|
||||
group = Group(self, self.group,
|
||||
int(attrs["start"]), int(attrs["count"]), int(attrs["size"]))
|
||||
self.group.fields.append(group)
|
||||
self.group = group
|
||||
elif name == "field":
|
||||
self.group.fields.append(Field(self, attrs))
|
||||
self.values = []
|
||||
elif name == "enum":
|
||||
self.values = []
|
||||
self.enum = safe_name(attrs["name"])
|
||||
if "prefix" in attrs:
|
||||
self.prefix = safe_name(attrs["prefix"])
|
||||
else:
|
||||
self.prefix= None
|
||||
elif name == "value":
|
||||
self.values.append(Value(attrs))
|
||||
|
||||
def end_element(self, name):
|
||||
if name == "instruction":
|
||||
self.emit_instruction()
|
||||
self.instruction = None
|
||||
self.group = None
|
||||
elif name == "struct":
|
||||
self.emit_struct()
|
||||
self.struct = None
|
||||
self.group = None
|
||||
elif name == "register":
|
||||
self.emit_register()
|
||||
self.register = None
|
||||
self.reg_num = None
|
||||
self.group = None
|
||||
elif name == "group":
|
||||
self.group = self.group.parent
|
||||
elif name == "field":
|
||||
self.group.fields[-1].values = self.values
|
||||
elif name == "enum":
|
||||
self.emit_enum()
|
||||
self.enum = None
|
||||
|
||||
def gen_prefix(self, name):
|
||||
if name[0] == "_":
|
||||
return 'GEN%s%s' % (self.gen, name)
|
||||
else:
|
||||
return 'GEN%s_%s' % (self.gen, name)
|
||||
|
||||
def emit_template_struct(self, name, group):
|
||||
print("struct %s {" % self.gen_prefix(name))
|
||||
group.emit_template_struct("")
|
||||
print("};\n")
|
||||
|
||||
def emit_pack_function(self, name, group):
|
||||
name = self.gen_prefix(name)
|
||||
print("static inline void\n%s_pack(__gen_user_data *data, void * restrict dst,\n%sconst struct %s * restrict values)\n{" %
|
||||
(name, ' ' * (len(name) + 6), name))
|
||||
|
||||
# Cast dst to make header C++ friendly
|
||||
print(" uint32_t * restrict dw = (uint32_t * restrict) dst;")
|
||||
|
||||
group.emit_pack_function(0)
|
||||
|
||||
print("}\n")
|
||||
|
||||
def emit_instruction(self):
|
||||
name = self.instruction
|
||||
if not self.length == None:
|
||||
print('#define %-33s %6d' %
|
||||
(self.gen_prefix(name + "_length"), self.length))
|
||||
print('#define %-33s %6d' %
|
||||
(self.gen_prefix(name + "_length_bias"), self.length_bias))
|
||||
|
||||
default_fields = []
|
||||
for field in self.group.fields:
|
||||
if not type(field) is Field:
|
||||
continue
|
||||
if field.default == None:
|
||||
continue
|
||||
default_fields.append(" .%-35s = %6d" % (field.name, field.default))
|
||||
|
||||
if default_fields:
|
||||
print('#define %-40s\\' % (self.gen_prefix(name + '_header')))
|
||||
print(", \\\n".join(default_fields))
|
||||
print('')
|
||||
|
||||
self.emit_template_struct(self.instruction, self.group)
|
||||
|
||||
self.emit_pack_function(self.instruction, self.group)
|
||||
|
||||
def emit_register(self):
|
||||
name = self.register
|
||||
if not self.reg_num == None:
|
||||
print('#define %-33s 0x%04x' %
|
||||
(self.gen_prefix(name + "_num"), self.reg_num))
|
||||
|
||||
if not self.length == None:
|
||||
print('#define %-33s %6d' %
|
||||
(self.gen_prefix(name + "_length"), self.length))
|
||||
|
||||
self.emit_template_struct(self.register, self.group)
|
||||
self.emit_pack_function(self.register, self.group)
|
||||
|
||||
def emit_struct(self):
|
||||
name = self.struct
|
||||
if not self.length == None:
|
||||
print('#define %-33s %6d' %
|
||||
(self.gen_prefix(name + "_length"), self.length))
|
||||
|
||||
self.emit_template_struct(self.struct, self.group)
|
||||
self.emit_pack_function(self.struct, self.group)
|
||||
|
||||
def emit_enum(self):
|
||||
print('/* enum %s */' % self.gen_prefix(self.enum))
|
||||
for value in self.values:
|
||||
if self.prefix:
|
||||
name = self.prefix + "_" + value.name
|
||||
else:
|
||||
name = value.name
|
||||
print('#define %-36s %6d' % (name.upper(), value.value))
|
||||
print('')
|
||||
|
||||
def parse(self, filename):
|
||||
file = open(filename, "rb")
|
||||
self.parser.ParseFile(file)
|
||||
file.close()
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("No input xml file specified")
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
|
||||
p = Parser()
|
||||
p.parse(input_file)
|
||||
1
src/intel/isl/.gitignore
vendored
Normal file
1
src/intel/isl/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
/isl_format_layout.c
|
||||
123
src/intel/isl/Makefile.am
Normal file
123
src/intel/isl/Makefile.am
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
# Copyright 2015 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
SUBDIRS = .
|
||||
|
||||
|
||||
ISL_GEN_LIBS = \
|
||||
libisl-gen7.la \
|
||||
libisl-gen75.la \
|
||||
libisl-gen8.la \
|
||||
libisl-gen9.la \
|
||||
$(NULL)
|
||||
|
||||
noinst_LTLIBRARIES = $(ISL_GEN_LIBS) libisl.la
|
||||
|
||||
EXTRA_DIST = tests
|
||||
|
||||
# The gallium includes are for the util/u_math.h include from main/macros.h
|
||||
AM_CPPFLAGS = \
|
||||
$(INTEL_CFLAGS) \
|
||||
$(VALGRIND_CFLAGS) \
|
||||
$(DEFINES) \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/intel \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/i965 \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_builddir)/src \
|
||||
-I$(top_builddir)/src/intel
|
||||
|
||||
libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init
|
||||
|
||||
libisl_la_LIBADD = $(ISL_GEN_LIBS)
|
||||
|
||||
libisl_la_SOURCES = \
|
||||
isl.c \
|
||||
isl.h \
|
||||
isl_format.c \
|
||||
isl_format_layout.c \
|
||||
isl_gen4.c \
|
||||
isl_gen4.h \
|
||||
isl_gen6.c \
|
||||
isl_gen6.h \
|
||||
isl_storage_image.c \
|
||||
$(NULL)
|
||||
|
||||
libisl_gen7_la_SOURCES = \
|
||||
isl_gen7.c \
|
||||
isl_gen7.h \
|
||||
isl_surface_state.c \
|
||||
$(NULL)
|
||||
libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=70
|
||||
|
||||
libisl_gen75_la_SOURCES = \
|
||||
isl_surface_state.c \
|
||||
$(NULL)
|
||||
libisl_gen75_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=75
|
||||
|
||||
libisl_gen8_la_SOURCES = \
|
||||
isl_gen8.c \
|
||||
isl_gen8.h \
|
||||
isl_surface_state.c \
|
||||
$(NULL)
|
||||
libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=80
|
||||
|
||||
libisl_gen9_la_SOURCES = \
|
||||
isl_gen9.c \
|
||||
isl_gen9.h \
|
||||
isl_surface_state.c \
|
||||
$(NULL)
|
||||
libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=90
|
||||
|
||||
BUILT_SOURCES = \
|
||||
isl_format_layout.c
|
||||
|
||||
isl_format_layout.c: isl_format_layout_gen.bash \
|
||||
isl_format_layout.csv
|
||||
$(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \
|
||||
<$(srcdir)/isl_format_layout.csv >$@
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
TESTS = tests/isl_surf_get_image_offset_test
|
||||
|
||||
check_PROGRAMS = $(TESTS)
|
||||
|
||||
# Link tests to lib965_compiler.la for brw_get_device_info().
|
||||
tests_ldadd = \
|
||||
-lm \
|
||||
libisl.la \
|
||||
$(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la
|
||||
|
||||
tests_isl_surf_get_image_offset_test_SOURCES = \
|
||||
tests/isl_surf_get_image_offset_test.c
|
||||
tests_isl_surf_get_image_offset_test_LDADD = $(tests_ldadd)
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
include $(top_srcdir)/install-lib-links.mk
|
||||
113
src/intel/isl/README
Normal file
113
src/intel/isl/README
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
Intel Surface Layout
|
||||
|
||||
Introduction
|
||||
============
|
||||
isl is a small library that calculates the layout of Intel GPU surfaces, queries
|
||||
those layouts, and queries the properties of surface formats.
|
||||
|
||||
|
||||
Independence from User APIs
|
||||
===========================
|
||||
isl's API is independent of any user-facing graphics API, such as OpenGL and
|
||||
Vulkan. This independence allows isl to be used a shared component by multiple
|
||||
Intel drivers.
|
||||
|
||||
Rather than mimic the user-facing APIs, the isl API attempts to reflect Intel
|
||||
hardware: the actual memory layout of Intel GPU surfaces and how one programs
|
||||
the GPU to use those surfaces. For example:
|
||||
|
||||
- The tokens of `enum isl_format` (such as `ISL_FORMAT_R8G8B8A8_UNORM`)
|
||||
match those of the hardware enum `SURFACE_FORMAT` rather than the OpenGL
|
||||
or Vulkan format tokens. And the values of `isl_format` and
|
||||
`SURFACE_FORMAT` are identical.
|
||||
|
||||
- The OpenGL and Vulkan APIs contain depth and stencil formats. However the
|
||||
hardware enum `SURFACE_FORMAT` does not, and therefore neither does `enum
|
||||
isl_format`. Rather than define new pixel formats that have no hardware
|
||||
counterpart, isl records the intent to use a surface as a depth or stencil
|
||||
buffer with the usage flags `ISL_SURF_USAGE_DEPTH_BIT` and
|
||||
`ISL_SURF_USAGE_STENCIL_BIT`.
|
||||
|
||||
- `struct isl_surf` distinguishes between the surface's logical dimension
|
||||
from the user API's perspective (`enum isl_surf_dim`, which may be 1D, 2D,
|
||||
or 3D) and the layout of those dimensions in memory (`enum isl_dim_layout`).
|
||||
|
||||
|
||||
Surface Units
|
||||
=============
|
||||
|
||||
Intro
|
||||
-----
|
||||
ISL takes care in its equations to correctly handle conversion among surface
|
||||
units (such as pixels and compression blocks) and to carefully distinguish
|
||||
between a surface's logical layout in the client API and its physical layout
|
||||
in memory.
|
||||
|
||||
Symbol names often explicitly declare their unit with a suffix:
|
||||
|
||||
- px: logical pixels
|
||||
- sa: physical surface samples
|
||||
- el: physical surface elements
|
||||
- sa_rows: rows of physical surface samples
|
||||
- el_rows: rows of physical surface elements
|
||||
|
||||
Logical units are independent of hardware generation and are closely related
|
||||
to the user-facing API (OpenGL and Vulkan). Physical units are dependent on
|
||||
hardware generation and reflect the surface's layout in memory.
|
||||
|
||||
Definitions
|
||||
-----------
|
||||
- Logical Pixels (px):
|
||||
|
||||
The surface's layout from the perspective of the client API (OpenGL and
|
||||
Vulkan) is in units of logical pixels. Logical pixels are independent of the
|
||||
surface's layout in memory.
|
||||
|
||||
A surface's width and height, in units of logical pixels, is not affected by
|
||||
the surface's sample count. For example, consider a VkImage created with
|
||||
VkImageCreateInfo{width=w0, height=h0, samples=s0}. The surface's width and
|
||||
height at level 0 is, in units of logical pixels, w0 and h0 regardless of
|
||||
the value of s0.
|
||||
|
||||
For example, the logical array length of a 3D surface is always 1, even on
|
||||
Gen9 where the surface's memory layout is that of an array surface
|
||||
(ISL_DIM_LAYOUT_GEN4_2D).
|
||||
|
||||
- Physical Surface Samples (sa):
|
||||
|
||||
For a multisampled surface, this unit has the obvious meaning.
|
||||
A singlesampled surface, from ISL's perspective, is simply a multisampled
|
||||
surface whose sample count is 1.
|
||||
|
||||
For example, consider a 2D single-level non-array surface with samples=4,
|
||||
width_px=64, and height_px=64 (note that the suffix 'px' indicates logical
|
||||
pixels). If the surface's multisample layout is ISL_MSAA_LAYOUT_INTERLEAVED,
|
||||
then the extent of level 0 is, in units of physical surface samples,
|
||||
width_sa=128, height_sa=128, depth_sa=1, array_length_sa=1. If
|
||||
ISL_MSAA_LAYOUT_ARRAY, then width_sa=64, height_sa=64, depth_sa=1,
|
||||
array_length_sa=4.
|
||||
|
||||
- Physical Surface Elements (el):
|
||||
|
||||
This unit allows ISL to treat compressed and uncompressed formats
|
||||
identically in many calculations.
|
||||
|
||||
If the surface's pixel format is compressed, such as ETC2, then a surface
|
||||
element is equivalent to a compression block. If uncompressed, then
|
||||
a surface element is equivalent to a surface sample. As a corollary, for
|
||||
a given surface a surface element is at least as large as a surface sample.
|
||||
|
||||
Errata
|
||||
------
|
||||
ISL acquired the term 'surface element' from the Broadwell PRM [1], which
|
||||
defines it as follows:
|
||||
|
||||
An element is defined as a pixel in uncompresed surface formats, and as
|
||||
a compression block in compressed surface formats. For MSFMT_DEPTH_STENCIL
|
||||
type multisampled surfaces, an element is a sample.
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
[1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >>
|
||||
RENDER_SURFACE_STATE Surface Vertical Alignment (p325)
|
||||
1497
src/intel/isl/isl.c
Normal file
1497
src/intel/isl/isl.c
Normal file
File diff suppressed because it is too large
Load diff
1178
src/intel/isl/isl.h
Normal file
1178
src/intel/isl/isl.h
Normal file
File diff suppressed because it is too large
Load diff
108
src/intel/isl/isl_format.c
Normal file
108
src/intel/isl/isl_format.c
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "isl.h"
|
||||
|
||||
bool
|
||||
isl_format_has_uint_channel(enum isl_format fmt)
|
||||
{
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
|
||||
|
||||
return fmtl->channels.r.type == ISL_UINT ||
|
||||
fmtl->channels.g.type == ISL_UINT ||
|
||||
fmtl->channels.b.type == ISL_UINT ||
|
||||
fmtl->channels.a.type == ISL_UINT ||
|
||||
fmtl->channels.l.type == ISL_UINT ||
|
||||
fmtl->channels.i.type == ISL_UINT ||
|
||||
fmtl->channels.p.type == ISL_UINT;
|
||||
}
|
||||
|
||||
bool
|
||||
isl_format_has_sint_channel(enum isl_format fmt)
|
||||
{
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
|
||||
|
||||
return fmtl->channels.r.type == ISL_SINT ||
|
||||
fmtl->channels.g.type == ISL_SINT ||
|
||||
fmtl->channels.b.type == ISL_SINT ||
|
||||
fmtl->channels.a.type == ISL_SINT ||
|
||||
fmtl->channels.l.type == ISL_SINT ||
|
||||
fmtl->channels.i.type == ISL_SINT ||
|
||||
fmtl->channels.p.type == ISL_SINT;
|
||||
}
|
||||
|
||||
enum isl_format
|
||||
isl_format_rgb_to_rgba(enum isl_format rgb)
|
||||
{
|
||||
assert(isl_format_is_rgb(rgb));
|
||||
|
||||
switch (rgb) {
|
||||
case ISL_FORMAT_R32G32B32_FLOAT: return ISL_FORMAT_R32G32B32A32_FLOAT;
|
||||
case ISL_FORMAT_R32G32B32_SINT: return ISL_FORMAT_R32G32B32A32_SINT;
|
||||
case ISL_FORMAT_R32G32B32_UINT: return ISL_FORMAT_R32G32B32A32_UINT;
|
||||
case ISL_FORMAT_R32G32B32_UNORM: return ISL_FORMAT_R32G32B32A32_UNORM;
|
||||
case ISL_FORMAT_R32G32B32_SNORM: return ISL_FORMAT_R32G32B32A32_SNORM;
|
||||
case ISL_FORMAT_R32G32B32_SSCALED: return ISL_FORMAT_R32G32B32A32_SSCALED;
|
||||
case ISL_FORMAT_R32G32B32_USCALED: return ISL_FORMAT_R32G32B32A32_USCALED;
|
||||
case ISL_FORMAT_R32G32B32_SFIXED: return ISL_FORMAT_R32G32B32A32_SFIXED;
|
||||
case ISL_FORMAT_R8G8B8_UNORM: return ISL_FORMAT_R8G8B8A8_UNORM;
|
||||
case ISL_FORMAT_R8G8B8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM;
|
||||
case ISL_FORMAT_R8G8B8_SSCALED: return ISL_FORMAT_R8G8B8A8_SSCALED;
|
||||
case ISL_FORMAT_R8G8B8_USCALED: return ISL_FORMAT_R8G8B8A8_USCALED;
|
||||
case ISL_FORMAT_R16G16B16_FLOAT: return ISL_FORMAT_R16G16B16A16_FLOAT;
|
||||
case ISL_FORMAT_R16G16B16_UNORM: return ISL_FORMAT_R16G16B16A16_UNORM;
|
||||
case ISL_FORMAT_R16G16B16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM;
|
||||
case ISL_FORMAT_R16G16B16_SSCALED: return ISL_FORMAT_R16G16B16A16_SSCALED;
|
||||
case ISL_FORMAT_R16G16B16_USCALED: return ISL_FORMAT_R16G16B16A16_USCALED;
|
||||
case ISL_FORMAT_R8G8B8_UNORM_SRGB: return ISL_FORMAT_R8G8B8A8_UNORM_SRGB;
|
||||
case ISL_FORMAT_R16G16B16_UINT: return ISL_FORMAT_R16G16B16A16_UINT;
|
||||
case ISL_FORMAT_R16G16B16_SINT: return ISL_FORMAT_R16G16B16A16_SINT;
|
||||
case ISL_FORMAT_R8G8B8_UINT: return ISL_FORMAT_R8G8B8A8_UINT;
|
||||
case ISL_FORMAT_R8G8B8_SINT: return ISL_FORMAT_R8G8B8A8_SINT;
|
||||
default:
|
||||
return ISL_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
}
|
||||
|
||||
enum isl_format
|
||||
isl_format_rgb_to_rgbx(enum isl_format rgb)
|
||||
{
|
||||
assert(isl_format_is_rgb(rgb));
|
||||
|
||||
switch (rgb) {
|
||||
case ISL_FORMAT_R32G32B32_FLOAT:
|
||||
return ISL_FORMAT_R32G32B32X32_FLOAT;
|
||||
case ISL_FORMAT_R16G16B16_UNORM:
|
||||
return ISL_FORMAT_R16G16B16X16_UNORM;
|
||||
case ISL_FORMAT_R16G16B16_FLOAT:
|
||||
return ISL_FORMAT_R16G16B16X16_FLOAT;
|
||||
case ISL_FORMAT_R8G8B8_UNORM:
|
||||
return ISL_FORMAT_R8G8B8X8_UNORM;
|
||||
case ISL_FORMAT_R8G8B8_UNORM_SRGB:
|
||||
return ISL_FORMAT_R8G8B8X8_UNORM_SRGB;
|
||||
default:
|
||||
return ISL_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
}
|
||||
287
src/intel/isl/isl_format_layout.csv
Normal file
287
src/intel/isl/isl_format_layout.csv
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
# Copyright 2015 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
#
|
||||
# @file
|
||||
# @brief Layout of all hardware surface formats
|
||||
#
|
||||
# For the official list, see Broadwell PRM: Volume 2b: Command Reference:
|
||||
# Enumerations: SURFACE_FORMAT.
|
||||
#
|
||||
|
||||
|
||||
# Columns:
|
||||
# name: format name in PRM
|
||||
# bpb: bits per block
|
||||
# bw: block width, in pixels
|
||||
# bh: block height, in pixels
|
||||
# bd: block depth, in pixels
|
||||
# r: red channel, data type and bitwidth
|
||||
# g: green channel
|
||||
# b: blue channel
|
||||
# a: alpha channel
|
||||
# l: luminance channel
|
||||
# i: intensity channel
|
||||
# p: palette channel
|
||||
# space: colorspace
|
||||
# txc: texture compression
|
||||
#
|
||||
# Data Types:
|
||||
# x: void
|
||||
# r: raw
|
||||
# un: unorm
|
||||
# sn: snorm
|
||||
# uf: ufloat
|
||||
# sf: sfloat
|
||||
# ux: ufixed
|
||||
# sx: sfixed
|
||||
# ui: uint
|
||||
# si: sint
|
||||
# us: uscaled
|
||||
# ss: sscaled
|
||||
|
||||
|
||||
# Table is aligned with the Vim commands below, using the Align plugin:
|
||||
# :AlignCtrl lr+ p8000000000000P1
|
||||
# /^# name/,$ Align,
|
||||
|
||||
# name , bpb, bw, bh, bd, r, g, b, a, l, i, p, space, txc
|
||||
R32G32B32A32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, sf32, , , , linear,
|
||||
R32G32B32A32_SINT , 128, 1, 1, 1, si32, si32, si32, si32, , , , linear,
|
||||
R32G32B32A32_UINT , 128, 1, 1, 1, ui32, ui32, ui32, ui32, , , , linear,
|
||||
R32G32B32A32_UNORM , 128, 1, 1, 1, un32, un32, un32, un32, , , , linear,
|
||||
R32G32B32A32_SNORM , 128, 1, 1, 1, sn32, sn32, sn32, sn32, , , , linear,
|
||||
R64G64_FLOAT , 128, 1, 1, 1, sf64, sf64, , , , , , linear,
|
||||
R32G32B32X32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, x32, , , , linear,
|
||||
R32G32B32A32_SSCALED , 128, 1, 1, 1, ss32, ss32, ss32, ss32, , , , linear,
|
||||
R32G32B32A32_USCALED , 128, 1, 1, 1, us32, us32, us32, us32, , , , linear,
|
||||
R32G32B32A32_SFIXED , 128, 1, 1, 1, sx32, sx32, sx32, sx32, , , , linear,
|
||||
R64G64_PASSTHRU , 128, 1, 1, 1, r64, r64, , , , , , ,
|
||||
R32G32B32_FLOAT , 96, 1, 1, 1, sf32, sf32, sf32, , , , , linear,
|
||||
R32G32B32_SINT , 96, 1, 1, 1, si32, si32, si32, , , , , linear,
|
||||
R32G32B32_UINT , 96, 1, 1, 1, ui32, ui32, ui32, , , , , linear,
|
||||
R32G32B32_UNORM , 96, 1, 1, 1, un32, un32, un32, , , , , linear,
|
||||
R32G32B32_SNORM , 96, 1, 1, 1, sn32, sn32, sn32, , , , , linear,
|
||||
R32G32B32_SSCALED , 96, 1, 1, 1, ss32, ss32, ss32, , , , , linear,
|
||||
R32G32B32_USCALED , 96, 1, 1, 1, us32, us32, us32, , , , , linear,
|
||||
R32G32B32_SFIXED , 96, 1, 1, 1, sx32, sx32, sx32, , , , , linear,
|
||||
R16G16B16A16_UNORM , 64, 1, 1, 1, un16, un16, un16, un16, , , , linear,
|
||||
R16G16B16A16_SNORM , 64, 1, 1, 1, sn16, sn16, sn16, sn16, , , , linear,
|
||||
R16G16B16A16_SINT , 64, 1, 1, 1, si16, si16, si16, si16, , , , linear,
|
||||
R16G16B16A16_UINT , 64, 1, 1, 1, ui16, ui16, ui16, ui16, , , , linear,
|
||||
R16G16B16A16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, sf16, , , , linear,
|
||||
R32G32_FLOAT , 64, 1, 1, 1, sf32, sf32, , , , , , linear,
|
||||
R32G32_SINT , 64, 1, 1, 1, si32, si32, , , , , , linear,
|
||||
R32G32_UINT , 64, 1, 1, 1, ui32, ui32, , , , , , linear,
|
||||
R32_FLOAT_X8X24_TYPELESS , 64, 1, 1, 1, sf32, x8, x24, , , , , linear,
|
||||
X32_TYPELESS_G8X24_UINT , 64, 1, 1, 1, x32, ui8, x24, , , , , linear,
|
||||
L32A32_FLOAT , 64, 1, 1, 1, , , , sf32, sf32, , , linear,
|
||||
R32G32_UNORM , 64, 1, 1, 1, un32, un32, , , , , , linear,
|
||||
R32G32_SNORM , 64, 1, 1, 1, sn32, sn32, , , , , , linear,
|
||||
R64_FLOAT , 64, 1, 1, 1, sf64, , , , , , , linear,
|
||||
R16G16B16X16_UNORM , 64, 1, 1, 1, un16, un16, un16, x16, , , , linear,
|
||||
R16G16B16X16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, x16, , , , linear,
|
||||
A32X32_FLOAT , 64, 1, 1, 1, , , , sf32, x32, , , alpha,
|
||||
L32X32_FLOAT , 64, 1, 1, 1, , , , x32, sf32, , , linear,
|
||||
I32X32_FLOAT , 64, 1, 1, 1, , , , x32, , sf32, , linear,
|
||||
R16G16B16A16_SSCALED , 64, 1, 1, 1, ss16, ss16, ss16, ss16, , , , linear,
|
||||
R16G16B16A16_USCALED , 64, 1, 1, 1, us16, us16, us16, us16, , , , linear,
|
||||
R32G32_SSCALED , 64, 1, 1, 1, ss32, ss32, , , , , , linear,
|
||||
R32G32_USCALED , 64, 1, 1, 1, us32, us32, , , , , , linear,
|
||||
R32G32_SFIXED , 64, 1, 1, 1, sx32, sx32, , , , , , linear,
|
||||
R64_PASSTHRU , 64, 1, 1, 1, r64, , , , , , , ,
|
||||
B8G8R8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear,
|
||||
B8G8R8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb,
|
||||
R10G10B10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear,
|
||||
R10G10B10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb,
|
||||
R10G10B10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear,
|
||||
R10G10B10_SNORM_A2_UNORM , 32, 1, 1, 1, sn10, sn10, sn10, un2, , , , linear,
|
||||
R8G8B8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear,
|
||||
R8G8B8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb,
|
||||
R8G8B8A8_SNORM , 32, 1, 1, 1, sn8, sn8, sn8, sn8, , , , linear,
|
||||
R8G8B8A8_SINT , 32, 1, 1, 1, si8, si8, si8, si8, , , , linear,
|
||||
R8G8B8A8_UINT , 32, 1, 1, 1, ui8, ui8, ui8, ui8, , , , linear,
|
||||
R16G16_UNORM , 32, 1, 1, 1, un16, un16, , , , , , linear,
|
||||
R16G16_SNORM , 32, 1, 1, 1, sn16, sn16, , , , , , linear,
|
||||
R16G16_SINT , 32, 1, 1, 1, si16, si16, , , , , , linear,
|
||||
R16G16_UINT , 32, 1, 1, 1, ui16, ui16, , , , , , linear,
|
||||
R16G16_FLOAT , 32, 1, 1, 1, sf16, sf16, , , , , , linear,
|
||||
B10G10R10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear,
|
||||
B10G10R10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb,
|
||||
R11G11B10_FLOAT , 32, 1, 1, 1, uf11, uf11, uf10, , , , , linear,
|
||||
R32_SINT , 32, 1, 1, 1, si32, , , , , , , linear,
|
||||
R32_UINT , 32, 1, 1, 1, ui32, , , , , , , linear,
|
||||
R32_FLOAT , 32, 1, 1, 1, sf32, , , , , , , linear,
|
||||
R24_UNORM_X8_TYPELESS , 32, 1, 1, 1, un24, x8, , , , , , linear,
|
||||
X24_TYPELESS_G8_UINT , 32, 1, 1, 1, x24, ui8, , , , , , linear,
|
||||
L32_UNORM , 32, 1, 1, 1, , , , , un32, , , linear,
|
||||
A32_UNORM , 32, 1, 1, 1, , , , un32, , , , alpha,
|
||||
L16A16_UNORM , 32, 1, 1, 1, , , , un16, un16, , , linear,
|
||||
I24X8_UNORM , 32, 1, 1, 1, , , , x8, , un24, , linear,
|
||||
L24X8_UNORM , 32, 1, 1, 1, , , , x8, un24, , , linear,
|
||||
A24X8_UNORM , 32, 1, 1, 1, , , , un24, x8, , , alpha,
|
||||
I32_FLOAT , 32, 1, 1, 1, , , , , , sf32, , linear,
|
||||
L32_FLOAT , 32, 1, 1, 1, , , , , sf32, , , linear,
|
||||
A32_FLOAT , 32, 1, 1, 1, , , , sf32, , , , alpha,
|
||||
X8B8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear,
|
||||
A8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, x8, un8, , , , linear,
|
||||
B8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear,
|
||||
B8G8R8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear,
|
||||
B8G8R8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb,
|
||||
R8G8B8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear,
|
||||
R8G8B8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb,
|
||||
R9G9B9E5_SHAREDEXP , 32, 1, 1, 1, ui9, ui9, ui9, , , , , linear,
|
||||
B10G10R10X2_UNORM , 32, 1, 1, 1, un10, un10, un10, x2, , , , linear,
|
||||
L16A16_FLOAT , 32, 1, 1, 1, , , , sf16, sf16, , , linear,
|
||||
R32_UNORM , 32, 1, 1, 1, un32, , , , , , , linear,
|
||||
R32_SNORM , 32, 1, 1, 1, sn32, , , , , , , linear,
|
||||
R10G10B10X2_USCALED , 32, 1, 1, 1, us10, us10, us10, x2, , , , linear,
|
||||
R8G8B8A8_SSCALED , 32, 1, 1, 1, ss8, ss8, ss8, ss8, , , , linear,
|
||||
R8G8B8A8_USCALED , 32, 1, 1, 1, us8, us8, us8, us8, , , , linear,
|
||||
R16G16_SSCALED , 32, 1, 1, 1, ss16, ss6, , , , , , linear,
|
||||
R16G16_USCALED , 32, 1, 1, 1, us16, us16, , , , , , linear,
|
||||
R32_SSCALED , 32, 1, 1, 1, ss32, , , , , , , linear,
|
||||
R32_USCALED , 32, 1, 1, 1, us32, , , , , , , linear,
|
||||
B5G6R5_UNORM , 16, 1, 1, 1, un5, un6, un5, , , , , linear,
|
||||
B5G6R5_UNORM_SRGB , 16, 1, 1, 1, un5, un6, un5, , , , , srgb,
|
||||
B5G5R5A1_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear,
|
||||
B5G5R5A1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, un1, , , , srgb,
|
||||
B4G4R4A4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear,
|
||||
B4G4R4A4_UNORM_SRGB , 16, 1, 1, 1, un4, un4, un4, un4, , , , srgb,
|
||||
R8G8_UNORM , 16, 1, 1, 1, un8, un8, , , , , , linear,
|
||||
R8G8_SNORM , 16, 1, 1, 1, sn8, sn8, , , , , , linear,
|
||||
R8G8_SINT , 16, 1, 1, 1, si8, si8, , , , , , linear,
|
||||
R8G8_UINT , 16, 1, 1, 1, ui8, ui8, , , , , , linear,
|
||||
R16_UNORM , 16, 1, 1, 1, un16, , , , , , , linear,
|
||||
R16_SNORM , 16, 1, 1, 1, sn16, , , , , , , linear,
|
||||
R16_SINT , 16, 1, 1, 1, si16, , , , , , , linear,
|
||||
R16_UINT , 16, 1, 1, 1, ui16, , , , , , , linear,
|
||||
R16_FLOAT , 16, 1, 1, 1, sf16, , , , , , , linear,
|
||||
A8P8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear,
|
||||
A8P8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear,
|
||||
I16_UNORM , 16, 1, 1, 1, , , , , , un16, , linear,
|
||||
L16_UNORM , 16, 1, 1, 1, , , , , un16, , , linear,
|
||||
A16_UNORM , 16, 1, 1, 1, , , , un16, , , , alpha,
|
||||
L8A8_UNORM , 16, 1, 1, 1, , , , un8, un8, , , linear,
|
||||
I16_FLOAT , 16, 1, 1, 1, , , , , , sf16, , linear,
|
||||
L16_FLOAT , 16, 1, 1, 1, , , , , sf16, , , linear,
|
||||
A16_FLOAT , 16, 1, 1, 1, , , , sf16, , , , alpha,
|
||||
L8A8_UNORM_SRGB , 16, 1, 1, 1, , , , un8, un8, , , srgb,
|
||||
R5G5_SNORM_B6_UNORM , 16, 1, 1, 1, sn5, sn5, un6, , , , , linear,
|
||||
B5G5R5X1_UNORM , 16, 1, 1, 1, un5, un5, un5, x1, , , , linear,
|
||||
B5G5R5X1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, x1, , , , srgb,
|
||||
R8G8_SSCALED , 16, 1, 1, 1, ss8, ss8, , , , , , linear,
|
||||
R8G8_USCALED , 16, 1, 1, 1, us8, us8, , , , , , linear,
|
||||
R16_SSCALED , 16, 1, 1, 1, ss16, , , , , , , linear,
|
||||
R16_USCALED , 16, 1, 1, 1, us16, , , , , , , linear,
|
||||
P8A8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear,
|
||||
P8A8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear,
|
||||
A1B5G5R5_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear,
|
||||
A4B4G4R4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear,
|
||||
L8A8_UINT , 16, 1, 1, 1, , , , ui8, ui8, , , linear,
|
||||
L8A8_SINT , 16, 1, 1, 1, , , , si8, si8, , , linear,
|
||||
R8_UNORM , 8, 1, 1, 1, un8, , , , , , , linear,
|
||||
R8_SNORM , 8, 1, 1, 1, sn8, , , , , , , linear,
|
||||
R8_SINT , 8, 1, 1, 1, si8, , , , , , , linear,
|
||||
R8_UINT , 8, 1, 1, 1, ui8, , , , , , , linear,
|
||||
A8_UNORM , 8, 1, 1, 1, , , , un8, , , , alpha,
|
||||
I8_UNORM , 8, 1, 1, 1, , , , , , un8, , linear,
|
||||
L8_UNORM , 8, 1, 1, 1, , , , , un8, , , linear,
|
||||
P4A4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear,
|
||||
A4P4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear,
|
||||
R8_SSCALED , 8, 1, 1, 1, ss8, , , , , , , linear,
|
||||
R8_USCALED , 8, 1, 1, 1, us8, , , , , , , linear,
|
||||
P8_UNORM_PALETTE0 , 8, 1, 1, 1, , , , , , , un8, linear,
|
||||
L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , linear,
|
||||
P8_UNORM_PALETTE1 , 8, 1, 1, 1, , , , , , , un8, linear,
|
||||
P4A4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear,
|
||||
A4P4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear,
|
||||
Y8_UNORM , 0, 0, 0, 0, , , , , , , , yuv,
|
||||
L8_UINT , 8, 1, 1, 1, , , , , ui8, , , linear,
|
||||
L8_SINT , 8, 1, 1, 1, , , , , si8, , , linear,
|
||||
I8_UINT , 8, 1, 1, 1, , , , , , ui8, , linear,
|
||||
I8_SINT , 8, 1, 1, 1, , , , , , si8, , linear,
|
||||
DXT1_RGB_SRGB , 64, 4, 4, 1, un4, un4, un4, , , , , srgb, dxt1
|
||||
R1_UNORM , 1, 1, 1, 1, un1, , , , , , , linear,
|
||||
YCRCB_NORMAL , 0, 0, 0, 0, , , , , , , , yuv,
|
||||
YCRCB_SWAPUVY , 0, 0, 0, 0, , , , , , , , yuv,
|
||||
P2_UNORM_PALETTE0 , 2, 1, 1, 1, , , , , , , un2, linear,
|
||||
P2_UNORM_PALETTE1 , 2, 1, 1, 1, , , , , , , un2, linear,
|
||||
BC1_UNORM , 64, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt1
|
||||
BC2_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt3
|
||||
BC3_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt5
|
||||
BC4_UNORM , 64, 4, 4, 1, un8, , , , , , , linear, rgtc1
|
||||
BC5_UNORM , 128, 4, 4, 1, un8, un8, , , , , , linear, rgtc2
|
||||
BC1_UNORM_SRGB , 64, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt1
|
||||
BC2_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt3
|
||||
BC3_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt5
|
||||
MONO8 , 1, 1, 1, 1, , , , , , , , ,
|
||||
YCRCB_SWAPUV , 0, 0, 0, 0, , , , , , , , yuv,
|
||||
YCRCB_SWAPY , 0, 0, 0, 0, , , , , , , , yuv,
|
||||
DXT1_RGB , 64, 4, 4, 1, un4, un4, un4, , , , , linear, dxt1
|
||||
FXT1 , 128, 8, 4, 1, un4, un4, un4, , , , , linear, fxt1
|
||||
R8G8B8_UNORM , 24, 1, 1, 1, un8, un8, un8, , , , , linear,
|
||||
R8G8B8_SNORM , 24, 1, 1, 1, sn8, sn8, sn8, , , , , linear,
|
||||
R8G8B8_SSCALED , 24, 1, 1, 1, ss8, ss8, ss8, , , , , linear,
|
||||
R8G8B8_USCALED , 24, 1, 1, 1, us8, us8, us8, , , , , linear,
|
||||
R64G64B64A64_FLOAT , 256, 1, 1, 1, sf64, sf64, sf64, sf64, , , , linear,
|
||||
R64G64B64_FLOAT , 196, 1, 1, 1, sf64, sf64, sf64, , , , , linear,
|
||||
BC4_SNORM , 64, 4, 4, 1, sn8, , , , , , , linear, rgtc1
|
||||
BC5_SNORM , 128, 4, 4, 1, sn8, sn8, , , , , , linear, rgtc2
|
||||
R16G16B16_FLOAT , 48, 1, 1, 1, sf16, sf16, sf16, , , , , linear,
|
||||
R16G16B16_UNORM , 48, 1, 1, 1, un16, un16, un16, , , , , linear,
|
||||
R16G16B16_SNORM , 48, 1, 1, 1, sn16, sn16, sn16, , , , , linear,
|
||||
R16G16B16_SSCALED , 48, 1, 1, 1, ss16, ss16, ss16, , , , , linear,
|
||||
R16G16B16_USCALED , 48, 1, 1, 1, us16, us16, us16, , , , , linear,
|
||||
BC6H_SF16 , 128, 4, 4, 1, sf16, sf16, sf16, , , , , linear, bptc
|
||||
BC7_UNORM , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, bptc
|
||||
BC7_UNORM_SRGB , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, bptc
|
||||
BC6H_UF16 , 128, 4, 4, 1, uf16, uf16, uf16, , , , , linear, bptc
|
||||
PLANAR_420_8 , 0, 0, 0, 0, , , , , , , , yuv,
|
||||
R8G8B8_UNORM_SRGB , 24, 1, 1, 1, un8, un8, un8, , , , , srgb,
|
||||
ETC1_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc1
|
||||
ETC2_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc2
|
||||
EAC_R11 , 64, 4, 4, 1, un11, , , , , , , linear, etc2
|
||||
EAC_RG11 , 128, 4, 4, 1, un11, un11, , , , , , linear, etc2
|
||||
EAC_SIGNED_R11 , 64, 4, 4, 1, sn11, , , , , , , linear, etc2
|
||||
EAC_SIGNED_RG11 , 128, 4, 4, 1, sn11, sn11, , , , , , linear, etc2
|
||||
ETC2_SRGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , srgb, etc2
|
||||
R16G16B16_UINT , 48, 1, 1, 1, ui16, ui16, ui16, , , , , linear,
|
||||
R16G16B16_SINT , 48, 1, 1, 1, si16, si16, si16, , , , , linear,
|
||||
R32_SFIXED , 32, 1, 1, 1, sx16, , , , , , , linear,
|
||||
R10G10B10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear,
|
||||
R10G10B10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear,
|
||||
R10G10B10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear,
|
||||
R10G10B10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear,
|
||||
B10G10R10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear,
|
||||
B10G10R10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear,
|
||||
B10G10R10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear,
|
||||
B10G10R10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear,
|
||||
B10G10R10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear,
|
||||
R64G64B64A64_PASSTHRU , 256, 1, 1, 1, r64, r64, r64, r64, , , , ,
|
||||
R64G64B64_PASSTHRU , 192, 1, 1, 1, r64, r64, r64, , , , , ,
|
||||
ETC2_RGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , linear, etc2
|
||||
ETC2_SRGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , srgb, etc2
|
||||
ETC2_EAC_RGBA8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, etc2
|
||||
ETC2_EAC_SRGB8_A8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, etc2
|
||||
R8G8B8_UINT , 24, 1, 1, 1, ui8, ui8, ui8, , , , , linear,
|
||||
R8G8B8_SINT , 24, 1, 1, 1, si8, si8, si8, , , , , linear,
|
||||
RAW , 0, 0, 0, 0, , , , , , , , ,
|
||||
|
Can't render this file because it contains an unexpected character in line 4 and column 65.
|
128
src/intel/isl/isl_format_layout_gen.bash
Executable file
128
src/intel/isl/isl_format_layout_gen.bash
Executable file
|
|
@ -0,0 +1,128 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Copyright 2015 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
set -eu
|
||||
set -o pipefail
|
||||
|
||||
cat <<'EOF'
|
||||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "isl.h"
|
||||
|
||||
const struct isl_format_layout
|
||||
isl_format_layouts[] = {
|
||||
EOF
|
||||
|
||||
sed -r '
|
||||
# Delete comment lines and empty lines
|
||||
/^[[:space:]]*#/d
|
||||
/^[[:space:]]*$/d
|
||||
|
||||
# Delete spaces
|
||||
s/[[:space:]]//g
|
||||
|
||||
# Translate formats
|
||||
s/^([A-Za-z0-9_]+),*/ISL_FORMAT_\1,/
|
||||
|
||||
# Translate data type of channels
|
||||
s/\<x([0-9]+),/ISL_VOID@\1,/g
|
||||
s/\<r([0-9]+),/ISL_RAW@\1,/g
|
||||
s/\<un([0-9]+),/ISL_UNORM@\1,/g
|
||||
s/\<sn([0-9]+),/ISL_SNORM@\1,/g
|
||||
s/\<uf([0-9]+),/ISL_UFLOAT@\1,/g
|
||||
s/\<sf([0-9]+),/ISL_SFLOAT@\1,/g
|
||||
s/\<ux([0-9]+),/ISL_UFIXED@\1,/g
|
||||
s/\<sx([0-9]+),/ISL_SFIXED@\1,/g
|
||||
s/\<ui([0-9]+),/ISL_UINT@\1,/g
|
||||
s/\<si([0-9]+),/ISL_SINT@\1,/g
|
||||
s/\<us([0-9]+),/ISL_USCALED@\1,/g
|
||||
s/\<ss([0-9]+),/ISL_SSCALED@\1,/g
|
||||
|
||||
# Translate colorspaces
|
||||
# Interpret alpha-only formats as having no colorspace.
|
||||
s/\<(linear|srgb|yuv)\>/ISL_COLORSPACE_\1/
|
||||
s/\<alpha\>//
|
||||
|
||||
# Translate texture compression
|
||||
s/\<(dxt|fxt|rgtc|bptc|etc)([0-9]*)\>/ISL_TXC_\1\2/
|
||||
' |
|
||||
tr 'a-z' 'A-Z' | # Convert to uppersace
|
||||
while IFS=, read -r format bpb bw bh bd \
|
||||
red green blue alpha \
|
||||
luminance intensity palette \
|
||||
colorspace txc
|
||||
do
|
||||
: ${colorspace:=ISL_COLORSPACE_NONE}
|
||||
: ${txc:=ISL_TXC_NONE}
|
||||
|
||||
cat <<EOF
|
||||
[$format] = {
|
||||
$format,
|
||||
.bs = $((bpb/8)),
|
||||
.bw = $bw, .bh = $bh, .bd = $bd,
|
||||
.channels = {
|
||||
.r = { $red },
|
||||
.g = { $green },
|
||||
.b = { $blue },
|
||||
.a = { $alpha },
|
||||
.l = { $luminance },
|
||||
.i = { $intensity },
|
||||
.p = { $palette },
|
||||
},
|
||||
.colorspace = $colorspace,
|
||||
.txc = $txc,
|
||||
},
|
||||
|
||||
EOF
|
||||
done |
|
||||
sed -r '
|
||||
# Collapse empty channels
|
||||
s/\{ \}/{}/
|
||||
|
||||
# Split non-empty channels into two members: base type and bit size
|
||||
s/@/, /
|
||||
'
|
||||
|
||||
# Terminate the table
|
||||
printf '};\n'
|
||||
74
src/intel/isl/isl_gen4.c
Normal file
74
src/intel/isl/isl_gen4.c
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "isl_gen4.h"
|
||||
#include "isl_priv.h"
|
||||
|
||||
bool
|
||||
gen4_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout)
|
||||
{
|
||||
/* Gen4 and Gen5 do not support MSAA */
|
||||
assert(info->samples >= 1);
|
||||
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
gen4_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el)
|
||||
{
|
||||
assert(info->samples == 1);
|
||||
assert(msaa_layout == ISL_MSAA_LAYOUT_NONE);
|
||||
assert(!isl_tiling_is_std_y(tiling));
|
||||
|
||||
/* Note that neither the surface's horizontal nor vertical image alignment
|
||||
* is programmable on gen4 nor gen5.
|
||||
*
|
||||
* From the G35 PRM (2008-01), Volume 1 Graphics Core, Section 6.17.3.4
|
||||
* Alignment Unit Size:
|
||||
*
|
||||
* Note that the compressed formats are padded to a full compression
|
||||
* cell.
|
||||
*
|
||||
* +------------------------+--------+--------+
|
||||
* | format | halign | valign |
|
||||
* +------------------------+--------+--------+
|
||||
* | YUV 4:2:2 formats | 4 | 2 |
|
||||
* | uncompressed formats | 4 | 2 |
|
||||
* +------------------------+--------+--------+
|
||||
*/
|
||||
|
||||
if (isl_format_is_compressed(info->format)) {
|
||||
*image_align_el = isl_extent3d(1, 1, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
*image_align_el = isl_extent3d(4, 2, 1);
|
||||
}
|
||||
47
src/intel/isl/isl_gen4.h
Normal file
47
src/intel/isl/isl_gen4.h
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "isl_priv.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
bool
|
||||
gen4_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout);
|
||||
|
||||
void
|
||||
gen4_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
160
src/intel/isl/isl_gen6.c
Normal file
160
src/intel/isl/isl_gen6.c
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "isl_gen6.h"
|
||||
#include "isl_priv.h"
|
||||
|
||||
bool
|
||||
gen6_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout)
|
||||
{
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
|
||||
|
||||
assert(ISL_DEV_GEN(dev) == 6);
|
||||
assert(info->samples >= 1);
|
||||
|
||||
if (info->samples == 1) {
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface
|
||||
* Format:
|
||||
*
|
||||
* If Number of Multisamples is set to a value other than
|
||||
* MULTISAMPLECOUNT_1, this field cannot be set to the following
|
||||
* formats:
|
||||
*
|
||||
* - any format with greater than 64 bits per element
|
||||
* - any compressed texture format (BC*)
|
||||
* - any YCRCB* format
|
||||
*/
|
||||
if (fmtl->bs > 8)
|
||||
return false;
|
||||
if (isl_format_is_compressed(info->format))
|
||||
return false;
|
||||
if (isl_format_is_yuv(info->format))
|
||||
return false;
|
||||
|
||||
/* From the Sandybridge PRM, Volume 4 Part 1 p85, SURFACE_STATE, Number of
|
||||
* Multisamples:
|
||||
*
|
||||
* If this field is any value other than MULTISAMPLECOUNT_1 the
|
||||
* following restrictions apply:
|
||||
*
|
||||
* - the Surface Type must be SURFTYPE_2D
|
||||
* - [...]
|
||||
*/
|
||||
if (info->dim != ISL_SURF_DIM_2D)
|
||||
return false;
|
||||
|
||||
/* More obvious restrictions */
|
||||
if (isl_surf_usage_is_display(info->usage))
|
||||
return false;
|
||||
if (tiling == ISL_TILING_LINEAR)
|
||||
return false;
|
||||
if (info->levels > 1)
|
||||
return false;
|
||||
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
gen6_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el)
|
||||
{
|
||||
/* Note that the surface's horizontal image alignment is not programmable
|
||||
* on Sandybridge.
|
||||
*
|
||||
* From the Sandybridge PRM (2011-05), Volume 1, Part 1, Section 7.18.3.4
|
||||
* Alignment Unit Size:
|
||||
*
|
||||
* Note that the compressed formats are padded to a full compression cell.
|
||||
*
|
||||
* +------------------------+--------+--------+
|
||||
* | format | halign | valign |
|
||||
* +------------------------+--------+--------+
|
||||
* | YUV 4:2:2 formats | 4 | * |
|
||||
* | uncompressed formats | 4 | * |
|
||||
* +------------------------+--------+--------+
|
||||
*
|
||||
* * For these formats, the vertical alignment factor “j” is determined
|
||||
* as follows:
|
||||
* - j = 4 for any depth buffer
|
||||
* - j = 2 for separate stencil buffer
|
||||
* - j = 4 for any render target surface is multisampled (4x)
|
||||
* - j = 2 for all other render target surface
|
||||
*
|
||||
* From the Sandrybridge PRM (2011-05), Volume 4, Part 1, Section 2.11.2
|
||||
* SURFACE_STATE, Surface Vertical Alignment:
|
||||
*
|
||||
* - This field must be set to VALIGN_2 if the Surface Format is 96 bits
|
||||
* per element (BPE).
|
||||
*
|
||||
* - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL
|
||||
* (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY
|
||||
* (0x190)
|
||||
*/
|
||||
|
||||
if (isl_format_is_compressed(info->format)) {
|
||||
*image_align_el = isl_extent3d(1, 1, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isl_format_is_yuv(info->format)) {
|
||||
*image_align_el = isl_extent3d(4, 2, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info->samples > 1) {
|
||||
*image_align_el = isl_extent3d(4, 4, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isl_surf_usage_is_depth_or_stencil(info->usage) &&
|
||||
!ISL_DEV_USE_SEPARATE_STENCIL(dev)) {
|
||||
/* interleaved depthstencil buffer */
|
||||
*image_align_el = isl_extent3d(4, 4, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isl_surf_usage_is_depth(info->usage)) {
|
||||
/* separate depth buffer */
|
||||
*image_align_el = isl_extent3d(4, 4, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isl_surf_usage_is_stencil(info->usage)) {
|
||||
/* separate stencil buffer */
|
||||
*image_align_el = isl_extent3d(4, 2, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
*image_align_el = isl_extent3d(4, 2, 1);
|
||||
}
|
||||
47
src/intel/isl/isl_gen6.h
Normal file
47
src/intel/isl/isl_gen6.h
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "isl_priv.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
bool
|
||||
gen6_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout);
|
||||
|
||||
void
|
||||
gen6_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
395
src/intel/isl/isl_gen7.c
Normal file
395
src/intel/isl/isl_gen7.c
Normal file
|
|
@ -0,0 +1,395 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "isl_gen7.h"
|
||||
#include "isl_priv.h"
|
||||
|
||||
bool
|
||||
gen7_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout)
|
||||
{
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
|
||||
|
||||
bool require_array = false;
|
||||
bool require_interleaved = false;
|
||||
|
||||
assert(ISL_DEV_GEN(dev) == 7);
|
||||
assert(info->samples >= 1);
|
||||
|
||||
if (info->samples == 1) {
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p63, SURFACE_STATE, Surface
|
||||
* Format:
|
||||
*
|
||||
* If Number of Multisamples is set to a value other than
|
||||
* MULTISAMPLECOUNT_1, this field cannot be set to the following
|
||||
* formats: any format with greater than 64 bits per element, any
|
||||
* compressed texture format (BC*), and any YCRCB* format.
|
||||
*/
|
||||
if (fmtl->bs > 8)
|
||||
return false;
|
||||
if (isl_format_is_compressed(info->format))
|
||||
return false;
|
||||
if (isl_format_is_yuv(info->format))
|
||||
return false;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of
|
||||
* Multisamples:
|
||||
*
|
||||
* - If this field is any value other than MULTISAMPLECOUNT_1, the
|
||||
* Surface Type must be SURFTYPE_2D.
|
||||
*
|
||||
* - If this field is any value other than MULTISAMPLECOUNT_1, Surface
|
||||
* Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero
|
||||
*/
|
||||
if (info->dim != ISL_SURF_DIM_2D)
|
||||
return false;
|
||||
if (info->levels > 1)
|
||||
return false;
|
||||
|
||||
/* The Ivyrbridge PRM insists twice that signed integer formats cannot be
|
||||
* multisampled.
|
||||
*
|
||||
* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of
|
||||
* Multisamples:
|
||||
*
|
||||
* - This field must be set to MULTISAMPLECOUNT_1 for SINT MSRTs when
|
||||
* all RT channels are not written.
|
||||
*
|
||||
* And errata from the Ivybridge PRM, Volume 4 Part 1 p77,
|
||||
* RENDER_SURFACE_STATE, MCS Enable:
|
||||
*
|
||||
* This field must be set to 0 [MULTISAMPLECOUNT_1] for all SINT MSRTs
|
||||
* when all RT channels are not written.
|
||||
*
|
||||
* Note that the above SINT restrictions apply only to *MSRTs* (that is,
|
||||
* *multisampled* render targets). The restrictions seem to permit an MCS
|
||||
* if the render target is singlesampled.
|
||||
*/
|
||||
if (isl_format_has_sint_channel(info->format))
|
||||
return false;
|
||||
|
||||
/* More obvious restrictions */
|
||||
if (isl_surf_usage_is_display(info->usage))
|
||||
return false;
|
||||
if (tiling == ISL_TILING_LINEAR)
|
||||
return false;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
|
||||
* Suface Storage Format:
|
||||
*
|
||||
* +---------------------+----------------------------------------------------------------+
|
||||
* | MSFMT_MSS | Multsampled surface was/is rendered as a render target |
|
||||
* | MSFMT_DEPTH_STENCIL | Multisampled surface was rendered as a depth or stencil buffer |
|
||||
* +---------------------+----------------------------------------------------------------+
|
||||
*
|
||||
* In the table above, MSFMT_MSS refers to ISL_MSAA_LAYOUT_ARRAY, and
|
||||
* MSFMT_DEPTH_STENCIL refers to ISL_MSAA_LAYOUT_INTERLEAVED.
|
||||
*/
|
||||
if (isl_surf_usage_is_depth_or_stencil(info->usage))
|
||||
require_interleaved = true;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
|
||||
* Suface Storage Format:
|
||||
*
|
||||
* If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width
|
||||
* is >= 8192 (meaning the actual surface width is >= 8193 pixels), this
|
||||
* field must be set to MSFMT_MSS.
|
||||
*/
|
||||
if (info->samples == 8 && info->width == 8192)
|
||||
require_array = true;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
|
||||
* Suface Storage Format:
|
||||
*
|
||||
* If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8,
|
||||
* ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number
|
||||
* of Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is
|
||||
* > 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL.
|
||||
*/
|
||||
if ((info->samples == 8 && info->height > 4194304u) ||
|
||||
(info->samples == 4 && info->height > 8388608u))
|
||||
require_interleaved = true;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
|
||||
* Suface Storage Format:
|
||||
*
|
||||
* This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is
|
||||
* one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or
|
||||
* R24_UNORM_X8_TYPELESS.
|
||||
*/
|
||||
if (info->format == ISL_FORMAT_I24X8_UNORM ||
|
||||
info->format == ISL_FORMAT_L24X8_UNORM ||
|
||||
info->format == ISL_FORMAT_A24X8_UNORM ||
|
||||
info->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS)
|
||||
require_interleaved = true;
|
||||
|
||||
if (require_array && require_interleaved)
|
||||
return false;
|
||||
|
||||
if (require_interleaved) {
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Default to the array layout because it permits multisample
|
||||
* compression.
|
||||
*/
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_ARRAY;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
gen7_format_needs_valign2(const struct isl_device *dev,
|
||||
enum isl_format format)
|
||||
{
|
||||
/* This workaround applies only to gen7 */
|
||||
if (ISL_DEV_GEN(dev) > 7)
|
||||
return false;
|
||||
|
||||
/* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1,
|
||||
* RENDER_SURFACE_STATE Surface Vertical Alignment:
|
||||
*
|
||||
* - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL
|
||||
* (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY
|
||||
* (0x190)
|
||||
*
|
||||
* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT.
|
||||
*/
|
||||
return isl_format_is_yuv(format) ||
|
||||
format == ISL_FORMAT_R32G32B32_FLOAT;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Filter out tiling flags that are incompatible with the surface.
|
||||
*
|
||||
* The resultant outgoing @a flags is a subset of the incoming @a flags. The
|
||||
* outgoing flags may be empty (0x0) if the incoming flags were too
|
||||
* restrictive.
|
||||
*
|
||||
* For example, if the surface will be used for a display
|
||||
* (ISL_SURF_USAGE_DISPLAY_BIT), then this function filters out all tiling
|
||||
* flags except ISL_TILING_X_BIT and ISL_TILING_LINEAR_BIT.
|
||||
*/
|
||||
void
|
||||
gen7_filter_tiling(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
isl_tiling_flags_t *flags)
|
||||
{
|
||||
/* IVB+ requires separate stencil */
|
||||
assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
|
||||
|
||||
/* Clear flags unsupported on this hardware */
|
||||
if (ISL_DEV_GEN(dev) < 9) {
|
||||
*flags &= ~ISL_TILING_Yf_BIT;
|
||||
*flags &= ~ISL_TILING_Ys_BIT;
|
||||
}
|
||||
|
||||
/* And... clear the Yf and Ys bits anyway because Anvil doesn't support
|
||||
* them yet.
|
||||
*/
|
||||
*flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */
|
||||
*flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */
|
||||
|
||||
if (isl_surf_usage_is_depth(info->usage)) {
|
||||
/* Depth requires Y. */
|
||||
*flags &= ISL_TILING_ANY_Y_MASK;
|
||||
}
|
||||
|
||||
/* Separate stencil requires W tiling, and W tiling requires separate
|
||||
* stencil.
|
||||
*/
|
||||
if (isl_surf_usage_is_stencil(info->usage)) {
|
||||
*flags &= ISL_TILING_W_BIT;
|
||||
} else {
|
||||
*flags &= ~ISL_TILING_W_BIT;
|
||||
}
|
||||
|
||||
if (info->usage & (ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT |
|
||||
ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT |
|
||||
ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT)) {
|
||||
assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT);
|
||||
isl_finishme("%s:%s: handle rotated display surfaces",
|
||||
__FILE__, __func__);
|
||||
}
|
||||
|
||||
if (info->usage & (ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT |
|
||||
ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT)) {
|
||||
assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT);
|
||||
isl_finishme("%s:%s: handle flipped display surfaces",
|
||||
__FILE__, __func__);
|
||||
}
|
||||
|
||||
if (info->usage & ISL_SURF_USAGE_DISPLAY_BIT) {
|
||||
/* Before Skylake, the display engine does not accept Y */
|
||||
/* FINISHME[SKL]: Y tiling for display surfaces */
|
||||
*flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT);
|
||||
}
|
||||
|
||||
if (info->samples > 1) {
|
||||
/* From the Sandybridge PRM, Volume 4 Part 1, SURFACE_STATE Tiled
|
||||
* Surface:
|
||||
*
|
||||
* For multisample render targets, this field must be 1 (true). MSRTs
|
||||
* can only be tiled.
|
||||
*
|
||||
* Multisample surfaces never require X tiling, and Y tiling generally
|
||||
* performs better than X. So choose Y. (Unless it's stencil, then it
|
||||
* must be W).
|
||||
*/
|
||||
*flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT);
|
||||
}
|
||||
|
||||
/* workaround */
|
||||
if (ISL_DEV_GEN(dev) == 7 &&
|
||||
gen7_format_needs_valign2(dev, info->format) &&
|
||||
(info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&
|
||||
info->samples == 1) {
|
||||
/* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1,
|
||||
* SURFACE_STATE Surface Vertical Alignment:
|
||||
*
|
||||
* This field must be set to VALIGN_4 for all tiled Y Render Target
|
||||
* surfaces.
|
||||
*/
|
||||
*flags &= ~ISL_TILING_Y0_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose horizontal subimage alignment, in units of surface elements.
|
||||
*/
|
||||
static uint32_t
|
||||
gen7_choose_halign_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info)
|
||||
{
|
||||
if (isl_format_is_compressed(info->format))
|
||||
return 1;
|
||||
|
||||
/* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1,
|
||||
* RENDER_SURFACE_STATE Surface Hoizontal Alignment:
|
||||
*
|
||||
* - This field is intended to be set to HALIGN_8 only if the surface
|
||||
* was rendered as a depth buffer with Z16 format or a stencil buffer,
|
||||
* since these surfaces support only alignment of 8. Use of HALIGN_8
|
||||
* for other surfaces is supported, but uses more memory.
|
||||
*/
|
||||
if (isl_surf_info_is_z16(info) ||
|
||||
isl_surf_usage_is_stencil(info->usage))
|
||||
return 8;
|
||||
|
||||
return 4;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose vertical subimage alignment, in units of surface elements.
|
||||
*/
|
||||
static uint32_t
|
||||
gen7_choose_valign_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling)
|
||||
{
|
||||
bool require_valign2 = false;
|
||||
bool require_valign4 = false;
|
||||
|
||||
if (isl_format_is_compressed(info->format))
|
||||
return 1;
|
||||
|
||||
if (gen7_format_needs_valign2(dev, info->format))
|
||||
require_valign2 = true;
|
||||
|
||||
/* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1:
|
||||
* RENDER_SURFACE_STATE Surface Vertical Alignment:
|
||||
*
|
||||
* - This field is intended to be set to VALIGN_4 if the surface was
|
||||
* rendered as a depth buffer, for a multisampled (4x) render target,
|
||||
* or for a multisampled (8x) render target, since these surfaces
|
||||
* support only alignment of 4. Use of VALIGN_4 for other surfaces is
|
||||
* supported, but uses more memory. This field must be set to
|
||||
* VALIGN_4 for all tiled Y Render Target surfaces.
|
||||
*
|
||||
*/
|
||||
if (isl_surf_usage_is_depth(info->usage) ||
|
||||
info->samples > 1 ||
|
||||
tiling == ISL_TILING_Y0) {
|
||||
require_valign4 = true;
|
||||
}
|
||||
|
||||
if (isl_surf_usage_is_stencil(info->usage)) {
|
||||
/* The Ivybridge PRM states that the stencil buffer's vertical alignment
|
||||
* is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment
|
||||
* Unit Size]. However, valign=8 is outside the set of valid values of
|
||||
* RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2
|
||||
* (0x0) and VALIGN_4 (0x1).
|
||||
*
|
||||
* The PRM is generally confused about the width, height, and alignment
|
||||
* of the stencil buffer; and this confusion appears elsewhere. For
|
||||
* example, the following PRM text effectively converts the stencil
|
||||
* buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM,
|
||||
* Volume 1, Part 1, Section
|
||||
* 6.18.4.2 Base Address and LOD Calculation]:
|
||||
*
|
||||
* For separate stencil buffer, the width must be mutiplied by 2 and
|
||||
* height divided by 2 as follows:
|
||||
*
|
||||
* w_L = 2*i*ceil(W_L/i)
|
||||
* h_L = 1/2*j*ceil(H_L/j)
|
||||
*
|
||||
* The root of the confusion is that, in W tiling, each pair of rows is
|
||||
* interleaved into one.
|
||||
*
|
||||
* FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API
|
||||
* is more polished.
|
||||
*/
|
||||
require_valign4 = true;
|
||||
}
|
||||
|
||||
assert(!require_valign2 || !require_valign4);
|
||||
|
||||
if (require_valign4)
|
||||
return 4;
|
||||
|
||||
/* Prefer VALIGN_2 because it conserves memory. */
|
||||
return 2;
|
||||
}
|
||||
|
||||
void
|
||||
gen7_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el)
|
||||
{
|
||||
/* IVB+ does not support combined depthstencil. */
|
||||
assert(!isl_surf_usage_is_depth_and_stencil(info->usage));
|
||||
|
||||
*image_align_el = (struct isl_extent3d) {
|
||||
.w = gen7_choose_halign_el(dev, info),
|
||||
.h = gen7_choose_valign_el(dev, info, tiling),
|
||||
.d = 1,
|
||||
};
|
||||
}
|
||||
52
src/intel/isl/isl_gen7.h
Normal file
52
src/intel/isl/isl_gen7.h
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "isl_priv.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void
|
||||
gen7_filter_tiling(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
isl_tiling_flags_t *flags);
|
||||
|
||||
bool
|
||||
gen7_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout);
|
||||
|
||||
void
|
||||
gen7_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
229
src/intel/isl/isl_gen8.c
Normal file
229
src/intel/isl/isl_gen8.c
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "isl_gen8.h"
|
||||
#include "isl_priv.h"
|
||||
|
||||
bool
|
||||
gen8_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout)
|
||||
{
|
||||
bool require_array = false;
|
||||
bool require_interleaved = false;
|
||||
|
||||
assert(info->samples >= 1);
|
||||
|
||||
if (info->samples == 1) {
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* From the Broadwell PRM >> Volume2d: Command Structures >>
|
||||
* RENDER_SURFACE_STATE Tile Mode:
|
||||
*
|
||||
* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
|
||||
* must be YMAJOR.
|
||||
*
|
||||
* As usual, though, stencil is special.
|
||||
*/
|
||||
if (!isl_tiling_is_any_y(tiling) && !isl_surf_usage_is_stencil(info->usage))
|
||||
return false;
|
||||
|
||||
/* From the Broadwell PRM >> Volume2d: Command Structures >>
|
||||
* RENDER_SURFACE_STATE Multisampled Surface Storage Format:
|
||||
*
|
||||
* All multisampled render target surfaces must have this field set to
|
||||
* MSFMT_MSS
|
||||
*/
|
||||
if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)
|
||||
require_array = true;
|
||||
|
||||
/* From the Broadwell PRM >> Volume2d: Command Structures >>
|
||||
* RENDER_SURFACE_STATE Number of Multisamples:
|
||||
*
|
||||
* - If this field is any value other than MULTISAMPLECOUNT_1, the
|
||||
* Surface Type must be SURFTYPE_2D This field must be set to
|
||||
* MULTISAMPLECOUNT_1 unless the surface is a Sampling Engine surface
|
||||
* or Render Target surface.
|
||||
*
|
||||
* - If this field is any value other than MULTISAMPLECOUNT_1, Surface
|
||||
* Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero.
|
||||
*/
|
||||
if (info->dim != ISL_SURF_DIM_2D)
|
||||
return false;
|
||||
if (info->levels > 1)
|
||||
return false;
|
||||
|
||||
/* More obvious restrictions */
|
||||
if (isl_surf_usage_is_display(info->usage))
|
||||
return false;
|
||||
if (isl_format_is_compressed(info->format))
|
||||
return false;
|
||||
if (isl_format_is_yuv(info->format))
|
||||
return false;
|
||||
|
||||
if (isl_surf_usage_is_depth_or_stencil(info->usage))
|
||||
require_interleaved = true;
|
||||
|
||||
if (require_array && require_interleaved)
|
||||
return false;
|
||||
|
||||
if (require_interleaved) {
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;
|
||||
return true;
|
||||
}
|
||||
|
||||
*msaa_layout = ISL_MSAA_LAYOUT_ARRAY;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose horizontal subimage alignment, in units of surface elements.
|
||||
*/
|
||||
static uint32_t
|
||||
gen8_choose_halign_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info)
|
||||
{
|
||||
if (isl_format_is_compressed(info->format))
|
||||
return 1;
|
||||
|
||||
/* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
|
||||
* RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
|
||||
*
|
||||
* - This field is intended to be set to HALIGN_8 only if the surface
|
||||
* was rendered as a depth buffer with Z16 format or a stencil buffer.
|
||||
* In this case it must be set to HALIGN_8 since these surfaces
|
||||
* support only alignment of 8. [...]
|
||||
*/
|
||||
if (isl_surf_info_is_z16(info))
|
||||
return 8;
|
||||
if (isl_surf_usage_is_stencil(info->usage))
|
||||
return 8;
|
||||
|
||||
/* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
|
||||
* RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
|
||||
*
|
||||
* [...] For Z32 formats it must be set to HALIGN_4.
|
||||
*/
|
||||
if (isl_surf_usage_is_depth(info->usage))
|
||||
return 4;
|
||||
|
||||
if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
|
||||
/* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
|
||||
* RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
|
||||
*
|
||||
* - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E,
|
||||
* HALIGN 16 must be used.
|
||||
*
|
||||
* This case handles color surfaces that may own an auxiliary MCS, CCS_D,
|
||||
* or CCS_E. Depth buffers, including those that own an auxiliary HiZ
|
||||
* surface, are handled above and do not require HALIGN_16.
|
||||
*/
|
||||
assert(!isl_surf_usage_is_depth(info->usage));
|
||||
return 16;
|
||||
}
|
||||
|
||||
/* XXX(chadv): I believe the hardware requires each image to be
|
||||
* cache-aligned. If that's true, then defaulting to halign=4 is wrong for
|
||||
* many formats. Depending on the format's block size, we may need to
|
||||
* increase halign to 8.
|
||||
*/
|
||||
return 4;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose vertical subimage alignment, in units of surface elements.
|
||||
*/
|
||||
static uint32_t
|
||||
gen8_choose_valign_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info)
|
||||
{
|
||||
/* From the Broadwell PRM > Volume 2d: Command Reference: Structures
|
||||
* > RENDER_SURFACE_STATE Surface Vertical Alignment (p325):
|
||||
*
|
||||
* - For Sampling Engine and Render Target Surfaces: This field
|
||||
* specifies the vertical alignment requirement in elements for the
|
||||
* surface. [...] An element is defined as a pixel in uncompresed
|
||||
* surface formats, and as a compression block in compressed surface
|
||||
* formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
|
||||
* element is a sample.
|
||||
*
|
||||
* - This field is intended to be set to VALIGN_4 if the surface was
|
||||
* rendered as a depth buffer, for a multisampled (4x) render target,
|
||||
* or for a multisampled (8x) render target, since these surfaces
|
||||
* support only alignment of 4. Use of VALIGN_4 for other surfaces is
|
||||
* supported, but increases memory usage.
|
||||
*
|
||||
* - This field is intended to be set to VALIGN_8 only if the surface
|
||||
* was rendered as a stencil buffer, since stencil buffer surfaces
|
||||
* support only alignment of 8. If set to VALIGN_8, Surface Format
|
||||
* must be R8_UINT.
|
||||
*/
|
||||
|
||||
if (isl_format_is_compressed(info->format))
|
||||
return 1;
|
||||
|
||||
if (isl_surf_usage_is_stencil(info->usage))
|
||||
return 8;
|
||||
|
||||
return 4;
|
||||
}
|
||||
|
||||
void
|
||||
gen8_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el)
|
||||
{
|
||||
assert(!isl_tiling_is_std_y(tiling));
|
||||
|
||||
/* The below text from the Broadwell PRM provides some insight into the
|
||||
* hardware's requirements for LOD alignment. From the Broadwell PRM >>
|
||||
* Volume 5: Memory Views >> Surface Layout >> 2D Surfaces:
|
||||
*
|
||||
* These [2D surfaces] must adhere to the following memory organization
|
||||
* rules:
|
||||
*
|
||||
* - For non-compressed texture formats, each mipmap must start on an
|
||||
* even row within the monolithic rectangular area. For
|
||||
* 1-texel-high mipmaps, this may require a row of padding below
|
||||
* the previous mipmap. This restriction does not apply to any
|
||||
* compressed texture formats; each subsequent (lower-res)
|
||||
* compressed mipmap is positioned directly below the previous
|
||||
* mipmap.
|
||||
*
|
||||
* - Vertical alignment restrictions vary with memory tiling type:
|
||||
* 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled
|
||||
* mipmaps are not required to start at the left edge of a tile
|
||||
* row.)
|
||||
*/
|
||||
|
||||
*image_align_el = (struct isl_extent3d) {
|
||||
.w = gen8_choose_halign_el(dev, info),
|
||||
.h = gen8_choose_valign_el(dev, info),
|
||||
.d = 1,
|
||||
};
|
||||
}
|
||||
47
src/intel/isl/isl_gen8.h
Normal file
47
src/intel/isl/isl_gen8.h
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "isl_priv.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
bool
|
||||
gen8_choose_msaa_layout(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout *msaa_layout);
|
||||
|
||||
void
|
||||
gen8_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
185
src/intel/isl/isl_gen9.c
Normal file
185
src/intel/isl/isl_gen9.c
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "isl_gen8.h"
|
||||
#include "isl_gen9.h"
|
||||
#include "isl_priv.h"
|
||||
|
||||
/**
|
||||
* Calculate the surface's subimage alignment, in units of surface samples,
|
||||
* for the standard tiling formats Yf and Ys.
|
||||
*/
|
||||
static void
|
||||
gen9_calc_std_image_alignment_sa(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *align_sa)
|
||||
{
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
|
||||
|
||||
assert(isl_tiling_is_std_y(tiling));
|
||||
|
||||
const uint32_t bs = fmtl->bs;
|
||||
const uint32_t is_Ys = tiling == ISL_TILING_Ys;
|
||||
|
||||
switch (info->dim) {
|
||||
case ISL_SURF_DIM_1D:
|
||||
/* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface
|
||||
* Layout and Tiling > 1D Surfaces > 1D Alignment Requirements.
|
||||
*/
|
||||
*align_sa = (struct isl_extent3d) {
|
||||
.w = 1 << (12 - (ffs(bs) - 1) + (4 * is_Ys)),
|
||||
.h = 1,
|
||||
.d = 1,
|
||||
};
|
||||
return;
|
||||
case ISL_SURF_DIM_2D:
|
||||
/* See the Skylake BSpec > Memory Views > Common Surface Formats >
|
||||
* Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment
|
||||
* Requirements.
|
||||
*/
|
||||
*align_sa = (struct isl_extent3d) {
|
||||
.w = 1 << (6 - ((ffs(bs) - 1) / 2) + (4 * is_Ys)),
|
||||
.h = 1 << (6 - ((ffs(bs) - 0) / 2) + (4 * is_Ys)),
|
||||
.d = 1,
|
||||
};
|
||||
|
||||
if (is_Ys) {
|
||||
/* FINISHME(chadv): I don't trust this code. Untested. */
|
||||
isl_finishme("%s:%s: [SKL+] multisample TileYs", __FILE__, __func__);
|
||||
|
||||
switch (msaa_layout) {
|
||||
case ISL_MSAA_LAYOUT_NONE:
|
||||
case ISL_MSAA_LAYOUT_INTERLEAVED:
|
||||
break;
|
||||
case ISL_MSAA_LAYOUT_ARRAY:
|
||||
align_sa->w >>= (ffs(info->samples) - 0) / 2;
|
||||
align_sa->h >>= (ffs(info->samples) - 1) / 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
case ISL_SURF_DIM_3D:
|
||||
/* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface
|
||||
* Layout and Tiling > 1D Surfaces > 1D Alignment Requirements.
|
||||
*/
|
||||
*align_sa = (struct isl_extent3d) {
|
||||
.w = 1 << (4 - ((ffs(bs) + 1) / 3) + (4 * is_Ys)),
|
||||
.h = 1 << (4 - ((ffs(bs) - 1) / 3) + (2 * is_Ys)),
|
||||
.d = 1 << (4 - ((ffs(bs) - 0) / 3) + (2 * is_Ys)),
|
||||
};
|
||||
return;
|
||||
}
|
||||
|
||||
unreachable("bad isl_surface_type");
|
||||
}
|
||||
|
||||
void
|
||||
gen9_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el)
|
||||
{
|
||||
/* This BSpec text provides some insight into the hardware's alignment
|
||||
* requirements [Skylake BSpec > Memory Views > Common Surface Formats >
|
||||
* Surface Layout and Tiling > 2D Surfaces]:
|
||||
*
|
||||
* An LOD must be aligned to a cache-line except for some special cases
|
||||
* related to Planar YUV surfaces. In general, the cache-alignment
|
||||
* restriction implies there is a minimum height for an LOD of 4 texels.
|
||||
* So, LODs which are smaller than 4 high are padded.
|
||||
*
|
||||
* From the Skylake BSpec, RENDER_SURFACE_STATE Surface Vertical Alignment:
|
||||
*
|
||||
* - For Sampling Engine and Render Target Surfaces: This field
|
||||
* specifies the vertical alignment requirement in elements for the
|
||||
* surface. [...] An element is defined as a pixel in uncompresed
|
||||
* surface formats, and as a compression block in compressed surface
|
||||
* formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
|
||||
* element is a sample.
|
||||
*
|
||||
* - This field is used for 2D, CUBE, and 3D surface alignment when Tiled
|
||||
* Resource Mode is TRMODE_NONE (Tiled Resource Mode is disabled).
|
||||
* This field is ignored for 1D surfaces and also when Tiled Resource
|
||||
* Mode is not TRMODE_NONE (e.g. Tiled Resource Mode is enabled).
|
||||
*
|
||||
* See the appropriate Alignment table in the "Surface Layout and
|
||||
* Tiling" section under Common Surface Formats for the table of
|
||||
* alignment values for Tiled Resrouces.
|
||||
*
|
||||
* - For uncompressed surfaces, the units of "j" are rows of pixels on
|
||||
* the physical surface. For compressed texture formats, the units of
|
||||
* "j" are in compression blocks, thus each increment in "j" is equal
|
||||
* to h pixels, where h is the height of the compression block in
|
||||
* pixels.
|
||||
*
|
||||
* - Valid Values: VALIGN_4, VALIGN_8, VALIGN_16
|
||||
*
|
||||
* From the Skylake BSpec, RENDER_SURFACE_STATE Surface Horizontal
|
||||
* Alignment:
|
||||
*
|
||||
* - For uncompressed surfaces, the units of "i" are pixels on the
|
||||
* physical surface. For compressed texture formats, the units of "i"
|
||||
* are in compression blocks, thus each increment in "i" is equal to
|
||||
* w pixels, where w is the width of the compression block in pixels.
|
||||
*
|
||||
* - Valid Values: HALIGN_4, HALIGN_8, HALIGN_16
|
||||
*/
|
||||
|
||||
if (isl_tiling_is_std_y(tiling)) {
|
||||
struct isl_extent3d image_align_sa;
|
||||
gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout,
|
||||
&image_align_sa);
|
||||
|
||||
*image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa);
|
||||
return;
|
||||
}
|
||||
|
||||
if (info->dim == ISL_SURF_DIM_1D) {
|
||||
/* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface
|
||||
* Layout and Tiling > 1D Surfaces > 1D Alignment Requirements.
|
||||
*/
|
||||
*image_align_el = isl_extent3d(64, 1, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (isl_format_is_compressed(info->format)) {
|
||||
/* On Gen9, the meaning of RENDER_SURFACE_STATE's
|
||||
* SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for
|
||||
* compressed formats. They now indicate a multiple of the compression
|
||||
* block. For example, if the compression mode is ETC2 then HALIGN_4
|
||||
* indicates a horizontal alignment of 16 pixels.
|
||||
*
|
||||
* To avoid wasting memory, choose the smallest alignment possible:
|
||||
* HALIGN_4 and VALIGN_4.
|
||||
*/
|
||||
*image_align_el = isl_extent3d(4, 4, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout,
|
||||
image_align_el);
|
||||
}
|
||||
41
src/intel/isl/isl_gen9.h
Normal file
41
src/intel/isl/isl_gen9.h
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "isl_priv.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void
|
||||
gen9_choose_image_alignment_el(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
enum isl_tiling tiling,
|
||||
enum isl_msaa_layout msaa_layout,
|
||||
struct isl_extent3d *image_align_el);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
170
src/intel/isl/isl_priv.h
Normal file
170
src/intel/isl/isl_priv.h
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "brw_device_info.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
#include "isl.h"
|
||||
|
||||
#define isl_finishme(format, ...) \
|
||||
__isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__)
|
||||
|
||||
void PRINTFLIKE(3, 4) UNUSED
|
||||
__isl_finishme(const char *file, int line, const char *fmt, ...);
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
static inline uint32_t
|
||||
ffs(uint32_t n) {
|
||||
return __builtin_ffs(n);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
isl_is_pow2(uintmax_t n)
|
||||
{
|
||||
return !(n & (n - 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Alignment must be a power of 2.
|
||||
*/
|
||||
static inline bool
|
||||
isl_is_aligned(uintmax_t n, uintmax_t a)
|
||||
{
|
||||
assert(isl_is_pow2(a));
|
||||
return (n & (a - 1)) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Alignment must be a power of 2.
|
||||
*/
|
||||
static inline uintmax_t
|
||||
isl_align(uintmax_t n, uintmax_t a)
|
||||
{
|
||||
assert(a != 0 && isl_is_pow2(a));
|
||||
return (n + a - 1) & ~(a - 1);
|
||||
}
|
||||
|
||||
static inline uintmax_t
|
||||
isl_align_npot(uintmax_t n, uintmax_t a)
|
||||
{
|
||||
assert(a > 0);
|
||||
return ((n + a - 1) / a) * a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Alignment must be a power of 2.
|
||||
*/
|
||||
static inline uintmax_t
|
||||
isl_align_div(uintmax_t n, uintmax_t a)
|
||||
{
|
||||
return isl_align(n, a) / a;
|
||||
}
|
||||
|
||||
static inline uintmax_t
|
||||
isl_align_div_npot(uintmax_t n, uintmax_t a)
|
||||
{
|
||||
return isl_align_npot(n, a) / a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Log base 2, rounding towards zero.
|
||||
*/
|
||||
static inline uint32_t
|
||||
isl_log2u(uint32_t n)
|
||||
{
|
||||
assert(n != 0);
|
||||
return 31 - __builtin_clz(n);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
isl_minify(uint32_t n, uint32_t levels)
|
||||
{
|
||||
if (unlikely(n == 0))
|
||||
return 0;
|
||||
else
|
||||
return MAX(n >> levels, 1);
|
||||
}
|
||||
|
||||
static inline struct isl_extent3d
|
||||
isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa)
|
||||
{
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
|
||||
|
||||
assert(extent_sa.w % fmtl->bw == 0);
|
||||
assert(extent_sa.h % fmtl->bh == 0);
|
||||
assert(extent_sa.d % fmtl->bd == 0);
|
||||
|
||||
return (struct isl_extent3d) {
|
||||
.w = extent_sa.w / fmtl->bw,
|
||||
.h = extent_sa.h / fmtl->bh,
|
||||
.d = extent_sa.d / fmtl->bd,
|
||||
};
|
||||
}
|
||||
|
||||
static inline struct isl_extent3d
|
||||
isl_extent3d_el_to_sa(enum isl_format fmt, struct isl_extent3d extent_el)
|
||||
{
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
|
||||
|
||||
return (struct isl_extent3d) {
|
||||
.w = extent_el.w * fmtl->bw,
|
||||
.h = extent_el.h * fmtl->bh,
|
||||
.d = extent_el.d * fmtl->bd,
|
||||
};
|
||||
}
|
||||
|
||||
void
|
||||
isl_gen7_surf_fill_state_s(const struct isl_device *dev, void *state,
|
||||
const struct isl_surf_fill_state_info *restrict info);
|
||||
|
||||
void
|
||||
isl_gen75_surf_fill_state_s(const struct isl_device *dev, void *state,
|
||||
const struct isl_surf_fill_state_info *restrict info);
|
||||
void
|
||||
isl_gen8_surf_fill_state_s(const struct isl_device *dev, void *state,
|
||||
const struct isl_surf_fill_state_info *restrict info);
|
||||
void
|
||||
isl_gen9_surf_fill_state_s(const struct isl_device *dev, void *state,
|
||||
const struct isl_surf_fill_state_info *restrict info);
|
||||
|
||||
void
|
||||
isl_gen7_buffer_fill_state_s(void *state,
|
||||
const struct isl_buffer_fill_state_info *restrict info);
|
||||
|
||||
void
|
||||
isl_gen75_buffer_fill_state_s(void *state,
|
||||
const struct isl_buffer_fill_state_info *restrict info);
|
||||
|
||||
void
|
||||
isl_gen8_buffer_fill_state_s(void *state,
|
||||
const struct isl_buffer_fill_state_info *restrict info);
|
||||
|
||||
void
|
||||
isl_gen9_buffer_fill_state_s(void *state,
|
||||
const struct isl_buffer_fill_state_info *restrict info);
|
||||
293
src/intel/isl/isl_storage_image.c
Normal file
293
src/intel/isl/isl_storage_image.c
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "isl_priv.h"
|
||||
#include "brw_compiler.h"
|
||||
|
||||
bool
|
||||
isl_is_storage_image_format(enum isl_format format)
|
||||
{
|
||||
/* XXX: Maybe we should put this in the CSV? */
|
||||
|
||||
switch (format) {
|
||||
case ISL_FORMAT_R32G32B32A32_UINT:
|
||||
case ISL_FORMAT_R32G32B32A32_SINT:
|
||||
case ISL_FORMAT_R32G32B32A32_FLOAT:
|
||||
case ISL_FORMAT_R32_UINT:
|
||||
case ISL_FORMAT_R32_SINT:
|
||||
case ISL_FORMAT_R32_FLOAT:
|
||||
case ISL_FORMAT_R16G16B16A16_UINT:
|
||||
case ISL_FORMAT_R16G16B16A16_SINT:
|
||||
case ISL_FORMAT_R16G16B16A16_FLOAT:
|
||||
case ISL_FORMAT_R32G32_UINT:
|
||||
case ISL_FORMAT_R32G32_SINT:
|
||||
case ISL_FORMAT_R32G32_FLOAT:
|
||||
case ISL_FORMAT_R8G8B8A8_UINT:
|
||||
case ISL_FORMAT_R8G8B8A8_SINT:
|
||||
case ISL_FORMAT_R16G16_UINT:
|
||||
case ISL_FORMAT_R16G16_SINT:
|
||||
case ISL_FORMAT_R16G16_FLOAT:
|
||||
case ISL_FORMAT_R8G8_UINT:
|
||||
case ISL_FORMAT_R8G8_SINT:
|
||||
case ISL_FORMAT_R16_UINT:
|
||||
case ISL_FORMAT_R16_FLOAT:
|
||||
case ISL_FORMAT_R16_SINT:
|
||||
case ISL_FORMAT_R8_UINT:
|
||||
case ISL_FORMAT_R8_SINT:
|
||||
case ISL_FORMAT_R10G10B10A2_UINT:
|
||||
case ISL_FORMAT_R10G10B10A2_UNORM:
|
||||
case ISL_FORMAT_R11G11B10_FLOAT:
|
||||
case ISL_FORMAT_R16G16B16A16_UNORM:
|
||||
case ISL_FORMAT_R16G16B16A16_SNORM:
|
||||
case ISL_FORMAT_R8G8B8A8_UNORM:
|
||||
case ISL_FORMAT_R8G8B8A8_SNORM:
|
||||
case ISL_FORMAT_R16G16_UNORM:
|
||||
case ISL_FORMAT_R16G16_SNORM:
|
||||
case ISL_FORMAT_R8G8_UNORM:
|
||||
case ISL_FORMAT_R8G8_SNORM:
|
||||
case ISL_FORMAT_R16_UNORM:
|
||||
case ISL_FORMAT_R16_SNORM:
|
||||
case ISL_FORMAT_R8_UNORM:
|
||||
case ISL_FORMAT_R8_SNORM:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
enum isl_format
|
||||
isl_lower_storage_image_format(const struct isl_device *dev,
|
||||
enum isl_format format)
|
||||
{
|
||||
switch (format) {
|
||||
/* These are never lowered. Up to BDW we'll have to fall back to untyped
|
||||
* surface access for 128bpp formats.
|
||||
*/
|
||||
case ISL_FORMAT_R32G32B32A32_UINT:
|
||||
case ISL_FORMAT_R32G32B32A32_SINT:
|
||||
case ISL_FORMAT_R32G32B32A32_FLOAT:
|
||||
case ISL_FORMAT_R32_UINT:
|
||||
case ISL_FORMAT_R32_SINT:
|
||||
case ISL_FORMAT_R32_FLOAT:
|
||||
return format;
|
||||
|
||||
/* From HSW to BDW the only 64bpp format supported for typed access is
|
||||
* RGBA_UINT16. IVB falls back to untyped.
|
||||
*/
|
||||
case ISL_FORMAT_R16G16B16A16_UINT:
|
||||
case ISL_FORMAT_R16G16B16A16_SINT:
|
||||
case ISL_FORMAT_R16G16B16A16_FLOAT:
|
||||
case ISL_FORMAT_R32G32_UINT:
|
||||
case ISL_FORMAT_R32G32_SINT:
|
||||
case ISL_FORMAT_R32G32_FLOAT:
|
||||
return (ISL_DEV_GEN(dev) >= 9 ? format :
|
||||
ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
|
||||
ISL_FORMAT_R16G16B16A16_UINT :
|
||||
ISL_FORMAT_R32G32_UINT);
|
||||
|
||||
/* Up to BDW no SINT or FLOAT formats of less than 32 bits per component
|
||||
* are supported. IVB doesn't support formats with more than one component
|
||||
* for typed access. For 8 and 16 bpp formats IVB relies on the
|
||||
* undocumented behavior that typed reads from R_UINT8 and R_UINT16
|
||||
* surfaces actually do a 32-bit misaligned read. The alternative would be
|
||||
* to use two surface state entries with different formats for each image,
|
||||
* one for reading (using R_UINT32) and another one for writing (using
|
||||
* R_UINT8 or R_UINT16), but that would complicate the shaders we generate
|
||||
* even more.
|
||||
*/
|
||||
case ISL_FORMAT_R8G8B8A8_UINT:
|
||||
case ISL_FORMAT_R8G8B8A8_SINT:
|
||||
return (ISL_DEV_GEN(dev) >= 9 ? format :
|
||||
ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
|
||||
ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT);
|
||||
|
||||
case ISL_FORMAT_R16G16_UINT:
|
||||
case ISL_FORMAT_R16G16_SINT:
|
||||
case ISL_FORMAT_R16G16_FLOAT:
|
||||
return (ISL_DEV_GEN(dev) >= 9 ? format :
|
||||
ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
|
||||
ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT);
|
||||
|
||||
case ISL_FORMAT_R8G8_UINT:
|
||||
case ISL_FORMAT_R8G8_SINT:
|
||||
return (ISL_DEV_GEN(dev) >= 9 ? format :
|
||||
ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
|
||||
ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT);
|
||||
|
||||
case ISL_FORMAT_R16_UINT:
|
||||
case ISL_FORMAT_R16_FLOAT:
|
||||
case ISL_FORMAT_R16_SINT:
|
||||
return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT);
|
||||
|
||||
case ISL_FORMAT_R8_UINT:
|
||||
case ISL_FORMAT_R8_SINT:
|
||||
return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT);
|
||||
|
||||
/* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported
|
||||
* by the hardware.
|
||||
*/
|
||||
case ISL_FORMAT_R10G10B10A2_UINT:
|
||||
case ISL_FORMAT_R10G10B10A2_UNORM:
|
||||
case ISL_FORMAT_R11G11B10_FLOAT:
|
||||
return ISL_FORMAT_R32_UINT;
|
||||
|
||||
/* No normalized fixed-point formats are supported by the hardware. */
|
||||
case ISL_FORMAT_R16G16B16A16_UNORM:
|
||||
case ISL_FORMAT_R16G16B16A16_SNORM:
|
||||
return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
|
||||
ISL_FORMAT_R16G16B16A16_UINT :
|
||||
ISL_FORMAT_R32G32_UINT);
|
||||
|
||||
case ISL_FORMAT_R8G8B8A8_UNORM:
|
||||
case ISL_FORMAT_R8G8B8A8_SNORM:
|
||||
return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
|
||||
ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT);
|
||||
|
||||
case ISL_FORMAT_R16G16_UNORM:
|
||||
case ISL_FORMAT_R16G16_SNORM:
|
||||
return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
|
||||
ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT);
|
||||
|
||||
case ISL_FORMAT_R8G8_UNORM:
|
||||
case ISL_FORMAT_R8G8_SNORM:
|
||||
return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
|
||||
ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT);
|
||||
|
||||
case ISL_FORMAT_R16_UNORM:
|
||||
case ISL_FORMAT_R16_SNORM:
|
||||
return ISL_FORMAT_R16_UINT;
|
||||
|
||||
case ISL_FORMAT_R8_UNORM:
|
||||
case ISL_FORMAT_R8_SNORM:
|
||||
return ISL_FORMAT_R8_UINT;
|
||||
|
||||
default:
|
||||
assert(!"Unknown image format");
|
||||
return ISL_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct brw_image_param image_param_defaults = {
|
||||
/* Set the swizzling shifts to all-ones to effectively disable
|
||||
* swizzling -- See emit_address_calculation() in
|
||||
* brw_fs_surface_builder.cpp for a more detailed explanation of
|
||||
* these parameters.
|
||||
*/
|
||||
.swizzling = { 0xff, 0xff },
|
||||
};
|
||||
|
||||
void
|
||||
isl_surf_fill_image_param(const struct isl_device *dev,
|
||||
struct brw_image_param *param,
|
||||
const struct isl_surf *surf,
|
||||
const struct isl_view *view)
|
||||
{
|
||||
*param = image_param_defaults;
|
||||
|
||||
param->size[0] = isl_minify(surf->logical_level0_px.w, view->base_level);
|
||||
param->size[1] = isl_minify(surf->logical_level0_px.h, view->base_level);
|
||||
if (surf->dim == ISL_SURF_DIM_3D) {
|
||||
param->size[2] = isl_minify(surf->logical_level0_px.d, view->base_level);
|
||||
} else {
|
||||
param->size[2] = surf->logical_level0_px.array_len -
|
||||
view->base_array_layer;
|
||||
}
|
||||
|
||||
isl_surf_get_image_offset_el(surf, view->base_level, view->base_array_layer,
|
||||
0, ¶m->offset[0], ¶m->offset[1]);
|
||||
|
||||
const int cpp = isl_format_get_layout(surf->format)->bs;
|
||||
param->stride[0] = cpp;
|
||||
param->stride[1] = surf->row_pitch / cpp;
|
||||
|
||||
const struct isl_extent3d image_align_sa =
|
||||
isl_surf_get_image_alignment_sa(surf);
|
||||
if (ISL_DEV_GEN(dev) < 9 && surf->dim == ISL_SURF_DIM_3D) {
|
||||
param->stride[2] = isl_align_npot(param->size[0], image_align_sa.w);
|
||||
param->stride[3] = isl_align_npot(param->size[1], image_align_sa.h);
|
||||
} else {
|
||||
param->stride[2] = 0;
|
||||
param->stride[3] = isl_surf_get_array_pitch_el_rows(surf);
|
||||
}
|
||||
|
||||
switch (surf->tiling) {
|
||||
case ISL_TILING_LINEAR:
|
||||
/* image_param_defaults is good enough */
|
||||
break;
|
||||
|
||||
case ISL_TILING_X:
|
||||
/* An X tile is a rectangular block of 512x8 bytes. */
|
||||
param->tiling[0] = isl_log2u(512 / cpp);
|
||||
param->tiling[1] = isl_log2u(8);
|
||||
|
||||
if (dev->has_bit6_swizzling) {
|
||||
/* Right shifts required to swizzle bits 9 and 10 of the memory
|
||||
* address with bit 6.
|
||||
*/
|
||||
param->swizzling[0] = 3;
|
||||
param->swizzling[1] = 4;
|
||||
}
|
||||
break;
|
||||
|
||||
case ISL_TILING_Y0:
|
||||
/* The layout of a Y-tiled surface in memory isn't really fundamentally
|
||||
* different to the layout of an X-tiled surface, we simply pretend that
|
||||
* the surface is broken up in a number of smaller 16Bx32 tiles, each
|
||||
* one arranged in X-major order just like is the case for X-tiling.
|
||||
*/
|
||||
param->tiling[0] = isl_log2u(16 / cpp);
|
||||
param->tiling[1] = isl_log2u(32);
|
||||
|
||||
if (dev->has_bit6_swizzling) {
|
||||
/* Right shift required to swizzle bit 9 of the memory address with
|
||||
* bit 6.
|
||||
*/
|
||||
param->swizzling[0] = 3;
|
||||
param->swizzling[1] = 0xff;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(!"Unhandled storage image tiling");
|
||||
}
|
||||
|
||||
/* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
|
||||
* address calculation algorithm (emit_address_calculation() in
|
||||
* brw_fs_surface_builder.cpp) handles this as a sort of tiling with
|
||||
* modulus equal to the LOD.
|
||||
*/
|
||||
param->tiling[2] = (ISL_DEV_GEN(dev) < 9 && surf->dim == ISL_SURF_DIM_3D ?
|
||||
view->base_level : 0);
|
||||
}
|
||||
|
||||
void
|
||||
isl_buffer_fill_image_param(const struct isl_device *dev,
|
||||
struct brw_image_param *param,
|
||||
enum isl_format format,
|
||||
uint64_t size)
|
||||
{
|
||||
*param = image_param_defaults;
|
||||
|
||||
param->stride[0] = isl_format_layouts[format].bs;
|
||||
param->size[0] = size / param->stride[0];
|
||||
}
|
||||
480
src/intel/isl/isl_surface_state.c
Normal file
480
src/intel/isl/isl_surface_state.c
Normal file
|
|
@ -0,0 +1,480 @@
|
|||
/*
|
||||
* Copyright 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define __gen_address_type uint64_t
|
||||
#define __gen_user_data void
|
||||
|
||||
static inline uint64_t
|
||||
__gen_combine_address(void *data, void *loc, uint64_t addr, uint32_t delta)
|
||||
{
|
||||
return addr + delta;
|
||||
}
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
#include "isl_priv.h"
|
||||
|
||||
#define __PASTE2(x, y) x ## y
|
||||
#define __PASTE(x, y) __PASTE2(x, y)
|
||||
#define isl_genX(x) __PASTE(isl_, genX(x))
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
static const uint8_t isl_to_gen_halign[] = {
|
||||
[4] = HALIGN4,
|
||||
[8] = HALIGN8,
|
||||
[16] = HALIGN16,
|
||||
};
|
||||
|
||||
static const uint8_t isl_to_gen_valign[] = {
|
||||
[4] = VALIGN4,
|
||||
[8] = VALIGN8,
|
||||
[16] = VALIGN16,
|
||||
};
|
||||
#else
|
||||
static const uint8_t isl_to_gen_halign[] = {
|
||||
[4] = HALIGN_4,
|
||||
[8] = HALIGN_8,
|
||||
};
|
||||
|
||||
static const uint8_t isl_to_gen_valign[] = {
|
||||
[2] = VALIGN_2,
|
||||
[4] = VALIGN_4,
|
||||
};
|
||||
#endif
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
static const uint8_t isl_to_gen_tiling[] = {
|
||||
[ISL_TILING_LINEAR] = LINEAR,
|
||||
[ISL_TILING_X] = XMAJOR,
|
||||
[ISL_TILING_Y0] = YMAJOR,
|
||||
[ISL_TILING_Yf] = YMAJOR,
|
||||
[ISL_TILING_Ys] = YMAJOR,
|
||||
[ISL_TILING_W] = WMAJOR,
|
||||
};
|
||||
#endif
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
static const uint32_t isl_to_gen_multisample_layout[] = {
|
||||
[ISL_MSAA_LAYOUT_NONE] = MSS,
|
||||
[ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL,
|
||||
[ISL_MSAA_LAYOUT_ARRAY] = MSS,
|
||||
};
|
||||
#else
|
||||
static const uint32_t isl_to_gen_multisample_layout[] = {
|
||||
[ISL_MSAA_LAYOUT_NONE] = MSFMT_MSS,
|
||||
[ISL_MSAA_LAYOUT_INTERLEAVED] = MSFMT_DEPTH_STENCIL,
|
||||
[ISL_MSAA_LAYOUT_ARRAY] = MSFMT_MSS,
|
||||
};
|
||||
#endif
|
||||
|
||||
static const uint8_t
|
||||
get_surftype(enum isl_surf_dim dim, isl_surf_usage_flags_t usage)
|
||||
{
|
||||
switch (dim) {
|
||||
default:
|
||||
unreachable("bad isl_surf_dim");
|
||||
case ISL_SURF_DIM_1D:
|
||||
assert(!(usage & ISL_SURF_USAGE_CUBE_BIT));
|
||||
return SURFTYPE_1D;
|
||||
case ISL_SURF_DIM_2D:
|
||||
if (usage & ISL_SURF_USAGE_STORAGE_BIT) {
|
||||
/* Storage images are always plain 2-D, not cube */
|
||||
return SURFTYPE_2D;
|
||||
} else if (usage & ISL_SURF_USAGE_CUBE_BIT) {
|
||||
return SURFTYPE_CUBE;
|
||||
} else {
|
||||
return SURFTYPE_2D;
|
||||
}
|
||||
case ISL_SURF_DIM_3D:
|
||||
assert(!(usage & ISL_SURF_USAGE_CUBE_BIT));
|
||||
return SURFTYPE_3D;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment
|
||||
* and SurfaceVerticalAlignment.
|
||||
*/
|
||||
static void
|
||||
get_halign_valign(const struct isl_surf *surf,
|
||||
uint32_t *halign, uint32_t *valign)
|
||||
{
|
||||
if (GEN_GEN >= 9) {
|
||||
if (isl_tiling_is_std_y(surf->tiling) ||
|
||||
surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
|
||||
/* The hardware ignores the alignment values. Anyway, the surface's
|
||||
* true alignment is likely outside the enum range of HALIGN* and
|
||||
* VALIGN*.
|
||||
*/
|
||||
*halign = 0;
|
||||
*valign = 0;
|
||||
} else {
|
||||
/* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units
|
||||
* of surface elements (not pixels nor samples). For compressed formats,
|
||||
* a "surface element" is defined as a compression block. For example,
|
||||
* if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2
|
||||
* format (ETC2 has a block height of 4), then the vertical alignment is
|
||||
* 4 compression blocks or, equivalently, 16 pixels.
|
||||
*/
|
||||
struct isl_extent3d image_align_el
|
||||
= isl_surf_get_image_alignment_el(surf);
|
||||
|
||||
*halign = isl_to_gen_halign[image_align_el.width];
|
||||
*valign = isl_to_gen_valign[image_align_el.height];
|
||||
}
|
||||
} else {
|
||||
/* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in
|
||||
* units of surface samples. For example, if SurfaceVerticalAlignment
|
||||
* is VALIGN_4 and the surface is singlesampled, then for any surface
|
||||
* format (compressed or not) the vertical alignment is
|
||||
* 4 pixels.
|
||||
*/
|
||||
struct isl_extent3d image_align_sa
|
||||
= isl_surf_get_image_alignment_sa(surf);
|
||||
|
||||
*halign = isl_to_gen_halign[image_align_sa.width];
|
||||
*valign = isl_to_gen_valign[image_align_sa.height];
|
||||
}
|
||||
}
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
static uint32_t
|
||||
get_qpitch(const struct isl_surf *surf)
|
||||
{
|
||||
switch (surf->dim) {
|
||||
default:
|
||||
assert(!"Bad isl_surf_dim");
|
||||
case ISL_SURF_DIM_1D:
|
||||
if (GEN_GEN >= 9) {
|
||||
/* QPitch is usually expressed as rows of surface elements (where
|
||||
* a surface element is an compression block or a single surface
|
||||
* sample). Skylake 1D is an outlier.
|
||||
*
|
||||
* From the Skylake BSpec >> Memory Views >> Common Surface
|
||||
* Formats >> Surface Layout and Tiling >> 1D Surfaces:
|
||||
*
|
||||
* Surface QPitch specifies the distance in pixels between array
|
||||
* slices.
|
||||
*/
|
||||
return isl_surf_get_array_pitch_el(surf);
|
||||
} else {
|
||||
return isl_surf_get_array_pitch_el_rows(surf);
|
||||
}
|
||||
case ISL_SURF_DIM_2D:
|
||||
case ISL_SURF_DIM_3D:
|
||||
if (GEN_GEN >= 9) {
|
||||
return isl_surf_get_array_pitch_el_rows(surf);
|
||||
} else {
|
||||
/* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch
|
||||
*
|
||||
* "This field must be set to an integer multiple of the Surface
|
||||
* Vertical Alignment. For compressed textures (BC*, FXT1,
|
||||
* ETC*, and EAC* Surface Formats), this field is in units of
|
||||
* rows in the uncompressed surface, and must be set to an
|
||||
* integer multiple of the vertical alignment parameter "j"
|
||||
* defined in the Common Surface Formats section."
|
||||
*/
|
||||
return isl_surf_get_array_pitch_sa_rows(surf);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* GEN_GEN >= 8 */
|
||||
|
||||
void
|
||||
isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
|
||||
const struct isl_surf_fill_state_info *restrict info)
|
||||
{
|
||||
uint32_t halign, valign;
|
||||
get_halign_valign(info->surf, &halign, &valign);
|
||||
|
||||
struct GENX(RENDER_SURFACE_STATE) s = {
|
||||
.SurfaceType = get_surftype(info->surf->dim, info->view->usage),
|
||||
.SurfaceArray = info->surf->phys_level0_sa.array_len > 1,
|
||||
.SurfaceVerticalAlignment = valign,
|
||||
.SurfaceHorizontalAlignment = halign,
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
.TileMode = isl_to_gen_tiling[info->surf->tiling],
|
||||
#else
|
||||
.TiledSurface = info->surf->tiling != ISL_TILING_LINEAR,
|
||||
.TileWalk = info->surf->tiling == ISL_TILING_X ? TILEWALK_XMAJOR :
|
||||
TILEWALK_YMAJOR,
|
||||
#endif
|
||||
|
||||
.VerticalLineStride = 0,
|
||||
.VerticalLineStrideOffset = 0,
|
||||
|
||||
#if (GEN_GEN == 7)
|
||||
.SurfaceArraySpacing = info->surf->array_pitch_span ==
|
||||
ISL_ARRAY_PITCH_SPAN_COMPACT,
|
||||
#endif
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
.SamplerL2BypassModeDisable = true,
|
||||
#endif
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
.RenderCacheReadWriteMode = WriteOnlyCache,
|
||||
#else
|
||||
.RenderCacheReadWriteMode = 0,
|
||||
#endif
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
.CubeFaceEnablePositiveZ = 1,
|
||||
.CubeFaceEnableNegativeZ = 1,
|
||||
.CubeFaceEnablePositiveY = 1,
|
||||
.CubeFaceEnableNegativeY = 1,
|
||||
.CubeFaceEnablePositiveX = 1,
|
||||
.CubeFaceEnableNegativeX = 1,
|
||||
#else
|
||||
.CubeFaceEnables = 0x3f,
|
||||
#endif
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
.SurfaceQPitch = get_qpitch(info->surf) >> 2,
|
||||
#endif
|
||||
|
||||
.Width = info->surf->logical_level0_px.width - 1,
|
||||
.Height = info->surf->logical_level0_px.height - 1,
|
||||
.Depth = 0, /* TEMPLATE */
|
||||
|
||||
.RenderTargetViewExtent = 0, /* TEMPLATE */
|
||||
.MinimumArrayElement = 0, /* TEMPLATE */
|
||||
|
||||
.MultisampledSurfaceStorageFormat =
|
||||
isl_to_gen_multisample_layout[info->surf->msaa_layout],
|
||||
.NumberofMultisamples = ffs(info->surf->samples) - 1,
|
||||
.MultisamplePositionPaletteIndex = 0, /* UNUSED */
|
||||
|
||||
.XOffset = 0,
|
||||
.YOffset = 0,
|
||||
|
||||
.ResourceMinLOD = 0.0,
|
||||
|
||||
.MIPCountLOD = 0, /* TEMPLATE */
|
||||
.SurfaceMinLOD = 0, /* TEMPLATE */
|
||||
|
||||
#if (GEN_GEN >= 8 || GEN_IS_HASWELL)
|
||||
.ShaderChannelSelectRed = info->view->channel_select[0],
|
||||
.ShaderChannelSelectGreen = info->view->channel_select[1],
|
||||
.ShaderChannelSelectBlue = info->view->channel_select[2],
|
||||
.ShaderChannelSelectAlpha = info->view->channel_select[3],
|
||||
#endif
|
||||
|
||||
.SurfaceBaseAddress = info->address,
|
||||
.MOCS = info->mocs,
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
.AuxiliarySurfaceMode = AUX_NONE,
|
||||
#else
|
||||
.MCSEnable = false,
|
||||
#endif
|
||||
};
|
||||
|
||||
if (info->surf->tiling == ISL_TILING_W) {
|
||||
/* From the Broadwell PRM documentation for this field:
|
||||
*
|
||||
* "If the surface is a stencil buffer (and thus has Tile Mode set
|
||||
* to TILEMODE_WMAJOR), the pitch must be set to 2x the value
|
||||
* computed based on width, as the stencil buffer is stored with
|
||||
* two rows interleaved."
|
||||
*/
|
||||
s.SurfacePitch = info->surf->row_pitch * 2 - 1;
|
||||
} else {
|
||||
s.SurfacePitch = info->surf->row_pitch - 1;
|
||||
}
|
||||
|
||||
if (info->view->usage & ISL_SURF_USAGE_STORAGE_BIT) {
|
||||
s.SurfaceFormat = isl_lower_storage_image_format(dev, info->view->format);
|
||||
} else {
|
||||
s.SurfaceFormat = info->view->format;
|
||||
}
|
||||
|
||||
switch (s.SurfaceType) {
|
||||
case SURFTYPE_1D:
|
||||
case SURFTYPE_2D:
|
||||
s.MinimumArrayElement = info->view->base_array_layer;
|
||||
|
||||
/* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth:
|
||||
*
|
||||
* For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced
|
||||
* by one for each increase from zero of Minimum Array Element. For
|
||||
* example, if Minimum Array Element is set to 1024 on a 2D surface,
|
||||
* the range of this field is reduced to [0,1023].
|
||||
*
|
||||
* In other words, 'Depth' is the number of array layers.
|
||||
*/
|
||||
s.Depth = info->view->array_len - 1;
|
||||
|
||||
/* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent:
|
||||
*
|
||||
* For Render Target and Typed Dataport 1D and 2D Surfaces:
|
||||
* This field must be set to the same value as the Depth field.
|
||||
*/
|
||||
s.RenderTargetViewExtent = s.Depth;
|
||||
break;
|
||||
case SURFTYPE_CUBE:
|
||||
s.MinimumArrayElement = info->view->base_array_layer;
|
||||
/* Same as SURFTYPE_2D, but divided by 6 */
|
||||
s.Depth = info->view->array_len / 6 - 1;
|
||||
s.RenderTargetViewExtent = s.Depth;
|
||||
break;
|
||||
case SURFTYPE_3D:
|
||||
s.MinimumArrayElement = info->view->base_array_layer;
|
||||
|
||||
/* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth:
|
||||
*
|
||||
* If the volume texture is MIP-mapped, this field specifies the
|
||||
* depth of the base MIP level.
|
||||
*/
|
||||
s.Depth = info->surf->logical_level0_px.depth - 1;
|
||||
|
||||
/* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent:
|
||||
*
|
||||
* For Render Target and Typed Dataport 3D Surfaces: This field
|
||||
* indicates the extent of the accessible 'R' coordinates minus 1 on
|
||||
* the LOD currently being rendered to.
|
||||
*/
|
||||
s.RenderTargetViewExtent = isl_minify(info->surf->logical_level0_px.depth,
|
||||
info->view->base_level) - 1;
|
||||
break;
|
||||
default:
|
||||
unreachable(!"bad SurfaceType");
|
||||
}
|
||||
|
||||
if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
|
||||
/* For render target surfaces, the hardware interprets field
|
||||
* MIPCount/LOD as LOD. The Broadwell PRM says:
|
||||
*
|
||||
* MIPCountLOD defines the LOD that will be rendered into.
|
||||
* SurfaceMinLOD is ignored.
|
||||
*/
|
||||
s.MIPCountLOD = info->view->base_level;
|
||||
s.SurfaceMinLOD = 0;
|
||||
} else {
|
||||
/* For non render target surfaces, the hardware interprets field
|
||||
* MIPCount/LOD as MIPCount. The range of levels accessible by the
|
||||
* sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD].
|
||||
*/
|
||||
s.SurfaceMinLOD = info->view->base_level;
|
||||
s.MIPCountLOD = MAX(info->view->levels, 1) - 1;
|
||||
}
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
/* From the CHV PRM, Volume 2d, page 321 (RENDER_SURFACE_STATE dword 0
|
||||
* bit 9 "Sampler L2 Bypass Mode Disable" Programming Notes):
|
||||
*
|
||||
* This bit must be set for the following surface types: BC2_UNORM
|
||||
* BC3_UNORM BC5_UNORM BC5_SNORM BC7_UNORM
|
||||
*/
|
||||
if (GEN_GEN >= 9 || dev->info->is_cherryview) {
|
||||
switch (info->view->format) {
|
||||
case ISL_FORMAT_BC2_UNORM:
|
||||
case ISL_FORMAT_BC3_UNORM:
|
||||
case ISL_FORMAT_BC5_UNORM:
|
||||
case ISL_FORMAT_BC5_SNORM:
|
||||
case ISL_FORMAT_BC7_UNORM:
|
||||
s.SamplerL2BypassModeDisable = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (GEN_GEN <= 8) {
|
||||
/* Prior to Sky Lake, we only have one bit for the clear color which
|
||||
* gives us 0 or 1 in whatever the surface's format happens to be.
|
||||
*/
|
||||
if (isl_format_has_int_channel(info->view->format)) {
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
assert(info->clear_color.u32[i] == 0 ||
|
||||
info->clear_color.u32[i] == 1);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
assert(info->clear_color.f32[i] == 0.0f ||
|
||||
info->clear_color.f32[i] == 1.0f);
|
||||
}
|
||||
}
|
||||
s.RedClearColor = info->clear_color.u32[0] != 0;
|
||||
s.GreenClearColor = info->clear_color.u32[1] != 0;
|
||||
s.BlueClearColor = info->clear_color.u32[2] != 0;
|
||||
s.AlphaClearColor = info->clear_color.u32[3] != 0;
|
||||
} else {
|
||||
s.RedClearColor = info->clear_color.u32[0];
|
||||
s.GreenClearColor = info->clear_color.u32[1];
|
||||
s.BlueClearColor = info->clear_color.u32[2];
|
||||
s.AlphaClearColor = info->clear_color.u32[3];
|
||||
}
|
||||
|
||||
GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s);
|
||||
}
|
||||
|
||||
void
|
||||
isl_genX(buffer_fill_state_s)(void *state,
|
||||
const struct isl_buffer_fill_state_info *restrict info)
|
||||
{
|
||||
uint32_t num_elements = info->size / info->stride;
|
||||
|
||||
struct GENX(RENDER_SURFACE_STATE) surface_state = {
|
||||
.SurfaceType = SURFTYPE_BUFFER,
|
||||
.SurfaceArray = false,
|
||||
.SurfaceFormat = info->format,
|
||||
.SurfaceVerticalAlignment = isl_to_gen_valign[4],
|
||||
.SurfaceHorizontalAlignment = isl_to_gen_halign[4],
|
||||
.Height = ((num_elements - 1) >> 7) & 0x3fff,
|
||||
.Width = (num_elements - 1) & 0x7f,
|
||||
.Depth = ((num_elements - 1) >> 21) & 0x3f,
|
||||
.SurfacePitch = info->stride - 1,
|
||||
.NumberofMultisamples = MULTISAMPLECOUNT_1,
|
||||
|
||||
#if (GEN_GEN >= 8)
|
||||
.TileMode = LINEAR,
|
||||
#else
|
||||
.TiledSurface = false,
|
||||
#endif
|
||||
|
||||
#if (GEN_GEN >= 8)
|
||||
.SamplerL2BypassModeDisable = true,
|
||||
.RenderCacheReadWriteMode = WriteOnlyCache,
|
||||
#else
|
||||
.RenderCacheReadWriteMode = 0,
|
||||
#endif
|
||||
|
||||
.MOCS = info->mocs,
|
||||
|
||||
#if (GEN_GEN >= 8 || GEN_IS_HASWELL)
|
||||
.ShaderChannelSelectRed = SCS_RED,
|
||||
.ShaderChannelSelectGreen = SCS_GREEN,
|
||||
.ShaderChannelSelectBlue = SCS_BLUE,
|
||||
.ShaderChannelSelectAlpha = SCS_ALPHA,
|
||||
#endif
|
||||
.SurfaceBaseAddress = info->address,
|
||||
};
|
||||
|
||||
GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state);
|
||||
}
|
||||
1
src/intel/isl/tests/.gitignore
vendored
Normal file
1
src/intel/isl/tests/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
/isl_surf_get_image_offset_test
|
||||
278
src/intel/isl/tests/isl_surf_get_image_offset_test.c
Normal file
278
src/intel/isl/tests/isl_surf_get_image_offset_test.c
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
/*
|
||||
* Copyright 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "brw_device_info.h"
|
||||
#include "isl.h"
|
||||
#include "isl_priv.h"
|
||||
|
||||
#define BDW_GT2_DEVID 0x161a
|
||||
|
||||
// An asssert that works regardless of NDEBUG.
|
||||
#define t_assert(cond) \
|
||||
do { \
|
||||
if (!(cond)) { \
|
||||
fprintf(stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
t_assert_extent4d(const struct isl_extent4d *e, uint32_t width,
|
||||
uint32_t height, uint32_t depth, uint32_t array_len)
|
||||
{
|
||||
t_assert(e->width == width);
|
||||
t_assert(e->height == height);
|
||||
t_assert(e->depth == depth);
|
||||
t_assert(e->array_len == array_len);
|
||||
}
|
||||
|
||||
static void
|
||||
t_assert_image_alignment_el(const struct isl_surf *surf,
|
||||
uint32_t w, uint32_t h, uint32_t d)
|
||||
{
|
||||
struct isl_extent3d align_el;
|
||||
|
||||
align_el = isl_surf_get_image_alignment_el(surf);
|
||||
t_assert(align_el.w == w);
|
||||
t_assert(align_el.h == h);
|
||||
t_assert(align_el.d == d);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
t_assert_image_alignment_sa(const struct isl_surf *surf,
|
||||
uint32_t w, uint32_t h, uint32_t d)
|
||||
{
|
||||
struct isl_extent3d align_sa;
|
||||
|
||||
align_sa = isl_surf_get_image_alignment_sa(surf);
|
||||
t_assert(align_sa.w == w);
|
||||
t_assert(align_sa.h == h);
|
||||
t_assert(align_sa.d == d);
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
t_assert_offset_el(const struct isl_surf *surf,
|
||||
uint32_t level,
|
||||
uint32_t logical_array_layer,
|
||||
uint32_t logical_z_offset_px,
|
||||
uint32_t expected_x_offset_el,
|
||||
uint32_t expected_y_offset_el)
|
||||
{
|
||||
uint32_t x, y;
|
||||
isl_surf_get_image_offset_el(surf, level, logical_array_layer,
|
||||
logical_z_offset_px, &x, &y);
|
||||
|
||||
t_assert(x == expected_x_offset_el);
|
||||
t_assert(y == expected_y_offset_el);
|
||||
}
|
||||
|
||||
static void
|
||||
t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width,
|
||||
uint32_t height, uint32_t depth, uint32_t array_len)
|
||||
{
|
||||
t_assert_extent4d(&surf->phys_level0_sa, width, height, depth, array_len);
|
||||
}
|
||||
|
||||
static void
|
||||
t_assert_gen4_3d_layer(const struct isl_surf *surf,
|
||||
uint32_t level,
|
||||
uint32_t aligned_width,
|
||||
uint32_t aligned_height,
|
||||
uint32_t depth,
|
||||
uint32_t horiz_layers,
|
||||
uint32_t vert_layers,
|
||||
uint32_t *base_y)
|
||||
{
|
||||
for (uint32_t z = 0; z < depth; ++z) {
|
||||
t_assert_offset_el(surf, level, 0, z,
|
||||
aligned_width * (z % horiz_layers),
|
||||
*base_y + aligned_height * (z / horiz_layers));
|
||||
}
|
||||
|
||||
*base_y += aligned_height * vert_layers;
|
||||
}
|
||||
|
||||
static void
|
||||
test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void)
|
||||
{
|
||||
bool ok;
|
||||
|
||||
struct isl_device dev;
|
||||
isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID),
|
||||
/*bit6_swizzle*/ false);
|
||||
|
||||
struct isl_surf surf;
|
||||
ok = isl_surf_init(&dev, &surf,
|
||||
.dim = ISL_SURF_DIM_2D,
|
||||
.format = ISL_FORMAT_R8G8B8A8_UNORM,
|
||||
.width = 512,
|
||||
.height = 512,
|
||||
.depth = 1,
|
||||
.levels = 10,
|
||||
.array_len = 1,
|
||||
.samples = 1,
|
||||
.usage = ISL_SURF_USAGE_TEXTURE_BIT |
|
||||
ISL_SURF_USAGE_DISABLE_AUX_BIT,
|
||||
.tiling_flags = ISL_TILING_Y0_BIT);
|
||||
t_assert(ok);
|
||||
|
||||
t_assert_image_alignment_el(&surf, 4, 4, 1);
|
||||
t_assert_image_alignment_sa(&surf, 4, 4, 1);
|
||||
t_assert_phys_level0_sa(&surf, 512, 512, 1, 1);
|
||||
t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 772);
|
||||
t_assert(isl_surf_get_array_pitch_el_rows(&surf) ==
|
||||
isl_surf_get_array_pitch_sa_rows(&surf));
|
||||
|
||||
/* Row pitch should be minimal possible */
|
||||
t_assert(surf.row_pitch == 2048);
|
||||
|
||||
t_assert_offset_el(&surf, 0, 0, 0, 0, 0); // +0, +0
|
||||
t_assert_offset_el(&surf, 1, 0, 0, 0, 512); // +0, +512
|
||||
t_assert_offset_el(&surf, 2, 0, 0, 256, 512); // +256, +0
|
||||
t_assert_offset_el(&surf, 3, 0, 0, 256, 640); // +0, +128
|
||||
t_assert_offset_el(&surf, 4, 0, 0, 256, 704); // +0, +64
|
||||
t_assert_offset_el(&surf, 5, 0, 0, 256, 736); // +0, +32
|
||||
t_assert_offset_el(&surf, 6, 0, 0, 256, 752); // +0, +16
|
||||
t_assert_offset_el(&surf, 7, 0, 0, 256, 760); // +0, +8
|
||||
t_assert_offset_el(&surf, 8, 0, 0, 256, 764); // +0, +4
|
||||
t_assert_offset_el(&surf, 9, 0, 0, 256, 768); // +0, +4
|
||||
}
|
||||
|
||||
static void
|
||||
test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void)
|
||||
{
|
||||
bool ok;
|
||||
|
||||
struct isl_device dev;
|
||||
isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID),
|
||||
/*bit6_swizzle*/ false);
|
||||
|
||||
struct isl_surf surf;
|
||||
ok = isl_surf_init(&dev, &surf,
|
||||
.dim = ISL_SURF_DIM_2D,
|
||||
.format = ISL_FORMAT_R8G8B8A8_UNORM,
|
||||
.width = 1024,
|
||||
.height = 1024,
|
||||
.depth = 1,
|
||||
.levels = 11,
|
||||
.array_len = 6,
|
||||
.samples = 1,
|
||||
.usage = ISL_SURF_USAGE_TEXTURE_BIT |
|
||||
ISL_SURF_USAGE_DISABLE_AUX_BIT,
|
||||
.tiling_flags = ISL_TILING_Y0_BIT);
|
||||
t_assert(ok);
|
||||
|
||||
t_assert_image_alignment_el(&surf, 4, 4, 1);
|
||||
t_assert_image_alignment_sa(&surf, 4, 4, 1);
|
||||
|
||||
t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 1540);
|
||||
t_assert(isl_surf_get_array_pitch_el_rows(&surf) ==
|
||||
isl_surf_get_array_pitch_sa_rows(&surf));
|
||||
|
||||
/* Row pitch should be minimal possible */
|
||||
t_assert(surf.row_pitch == 4096);
|
||||
|
||||
for (uint32_t a = 0; a < 6; ++a) {
|
||||
uint32_t b = a * isl_surf_get_array_pitch_sa_rows(&surf);
|
||||
|
||||
t_assert_offset_el(&surf, 0, a, 0, 0, b + 0); // +0, +0
|
||||
t_assert_offset_el(&surf, 1, a, 0, 0, b + 1024); // +0, +1024
|
||||
t_assert_offset_el(&surf, 2, a, 0, 512, b + 1024); // +512, +0
|
||||
t_assert_offset_el(&surf, 3, a, 0, 512, b + 1280); // +0, +256
|
||||
t_assert_offset_el(&surf, 4, a, 0, 512, b + 1408); // +0, +128
|
||||
t_assert_offset_el(&surf, 5, a, 0, 512, b + 1472); // +0, +64
|
||||
t_assert_offset_el(&surf, 6, a, 0, 512, b + 1504); // +0, +32
|
||||
t_assert_offset_el(&surf, 7, a, 0, 512, b + 1520); // +0, +16
|
||||
t_assert_offset_el(&surf, 8, a, 0, 512, b + 1528); // +0, +8
|
||||
t_assert_offset_el(&surf, 9, a, 0, 512, b + 1532); // +0, +4
|
||||
t_assert_offset_el(&surf, 10, a, 0, 512, b + 1536); // +0, +4
|
||||
|
||||
}
|
||||
|
||||
/* The layout below assumes a specific array pitch. It will need updating
|
||||
* if isl's array pitch calculations ever change.
|
||||
*/
|
||||
t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 1540);
|
||||
|
||||
/* skip the remaining array layers */
|
||||
}
|
||||
|
||||
static void
|
||||
test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(void)
|
||||
{
|
||||
bool ok;
|
||||
|
||||
struct isl_device dev;
|
||||
isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID),
|
||||
/*bit6_swizzle*/ false);
|
||||
|
||||
struct isl_surf surf;
|
||||
ok = isl_surf_init(&dev, &surf,
|
||||
.dim = ISL_SURF_DIM_3D,
|
||||
.format = ISL_FORMAT_R8G8B8A8_UNORM,
|
||||
.width = 256,
|
||||
.height = 256,
|
||||
.depth = 256,
|
||||
.levels = 9,
|
||||
.array_len = 1,
|
||||
.samples = 1,
|
||||
.usage = ISL_SURF_USAGE_TEXTURE_BIT |
|
||||
ISL_SURF_USAGE_DISABLE_AUX_BIT,
|
||||
.tiling_flags = ISL_TILING_Y0_BIT);
|
||||
t_assert(ok);
|
||||
|
||||
t_assert_image_alignment_el(&surf, 4, 4, 1);
|
||||
t_assert_image_alignment_sa(&surf, 4, 4, 1);
|
||||
t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 74916);
|
||||
t_assert(isl_surf_get_array_pitch_sa_rows(&surf) ==
|
||||
isl_surf_get_array_pitch_el_rows(&surf));
|
||||
|
||||
uint32_t base_y = 0;
|
||||
|
||||
t_assert_gen4_3d_layer(&surf, 0, 256, 256, 256, 1, 256, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 1, 128, 128, 128, 2, 64, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 2, 64, 64, 64, 4, 16, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 3, 32, 32, 32, 8, 4, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 4, 16, 16, 16, 16, 1, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 5, 8, 8, 8, 32, 1, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 6, 4, 4, 4, 64, 1, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 7, 4, 4, 2, 128, 1, &base_y);
|
||||
t_assert_gen4_3d_layer(&surf, 8, 4, 4, 1, 256, 1, &base_y);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
/* FINISHME: Add tests for npot sizes */
|
||||
/* FINISHME: Add tests for 1D surfaces */
|
||||
|
||||
test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0();
|
||||
test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0();
|
||||
test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0();
|
||||
}
|
||||
8
src/intel/vulkan/.gitignore
vendored
Normal file
8
src/intel/vulkan/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
# Generated source files
|
||||
/*_spirv_autogen.h
|
||||
/anv_entrypoints.c
|
||||
/anv_entrypoints.h
|
||||
/wayland-drm-protocol.c
|
||||
/wayland-drm-client-protocol.h
|
||||
/dev_icd.json
|
||||
/gen*_pack.h
|
||||
212
src/intel/vulkan/Makefile.am
Normal file
212
src/intel/vulkan/Makefile.am
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
# Copyright © 2015 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
SUBDIRS = . tests
|
||||
|
||||
vulkan_includedir = $(includedir)/vulkan
|
||||
|
||||
vulkan_include_HEADERS = \
|
||||
$(top_srcdir)/include/vulkan/vk_platform.h \
|
||||
$(top_srcdir)/include/vulkan/vulkan.h \
|
||||
$(top_srcdir)/include/vulkan/vulkan_intel.h
|
||||
|
||||
# Used when generating entrypoints to filter out unwanted extensions
|
||||
VULKAN_ENTRYPOINT_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include/vulkan \
|
||||
-DVK_USE_PLATFORM_XCB_KHR \
|
||||
-DVK_USE_PLATFORM_WAYLAND_KHR
|
||||
|
||||
lib_LTLIBRARIES = libvulkan_intel.la
|
||||
|
||||
check_LTLIBRARIES = libvulkan-test.la
|
||||
|
||||
PER_GEN_LIBS = \
|
||||
libanv-gen7.la \
|
||||
libanv-gen75.la \
|
||||
libanv-gen8.la \
|
||||
libanv-gen9.la
|
||||
|
||||
noinst_LTLIBRARIES = $(PER_GEN_LIBS)
|
||||
|
||||
# The gallium includes are for the util/u_math.h include from main/macros.h
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
$(INTEL_CFLAGS) \
|
||||
$(VALGRIND_CFLAGS) \
|
||||
$(DEFINES) \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/compiler \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/i965 \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_srcdir)/src/intel/ \
|
||||
-I$(top_builddir)/src \
|
||||
-I$(top_builddir)/src/compiler \
|
||||
-I$(top_builddir)/src/compiler/nir \
|
||||
-I$(top_builddir)/src/intel
|
||||
|
||||
libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init
|
||||
|
||||
VULKAN_SOURCES = \
|
||||
anv_allocator.c \
|
||||
anv_cmd_buffer.c \
|
||||
anv_batch_chain.c \
|
||||
anv_descriptor_set.c \
|
||||
anv_device.c \
|
||||
anv_dump.c \
|
||||
anv_entrypoints.c \
|
||||
anv_entrypoints.h \
|
||||
anv_formats.c \
|
||||
anv_image.c \
|
||||
anv_intel.c \
|
||||
anv_meta.c \
|
||||
anv_meta_blit.c \
|
||||
anv_meta_blit2d.c \
|
||||
anv_meta_clear.c \
|
||||
anv_meta_copy.c \
|
||||
anv_meta_resolve.c \
|
||||
anv_nir_apply_dynamic_offsets.c \
|
||||
anv_nir_apply_pipeline_layout.c \
|
||||
anv_nir_lower_push_constants.c \
|
||||
anv_pass.c \
|
||||
anv_pipeline.c \
|
||||
anv_pipeline_cache.c \
|
||||
anv_private.h \
|
||||
anv_query.c \
|
||||
anv_util.c \
|
||||
anv_wsi.c \
|
||||
anv_wsi_x11.c
|
||||
|
||||
BUILT_SOURCES = \
|
||||
anv_entrypoints.h \
|
||||
anv_entrypoints.c
|
||||
|
||||
libanv_gen7_la_SOURCES = \
|
||||
genX_cmd_buffer.c \
|
||||
genX_pipeline.c \
|
||||
gen7_cmd_buffer.c \
|
||||
gen7_pipeline.c \
|
||||
genX_state.c
|
||||
libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=70
|
||||
|
||||
libanv_gen75_la_SOURCES = \
|
||||
genX_cmd_buffer.c \
|
||||
genX_pipeline.c \
|
||||
gen7_cmd_buffer.c \
|
||||
gen7_pipeline.c \
|
||||
genX_state.c
|
||||
libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=75
|
||||
|
||||
libanv_gen8_la_SOURCES = \
|
||||
genX_cmd_buffer.c \
|
||||
genX_pipeline.c \
|
||||
gen8_cmd_buffer.c \
|
||||
gen8_pipeline.c \
|
||||
genX_state.c
|
||||
libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=80
|
||||
|
||||
libanv_gen9_la_SOURCES = \
|
||||
genX_cmd_buffer.c \
|
||||
genX_pipeline.c \
|
||||
gen8_cmd_buffer.c \
|
||||
gen8_pipeline.c \
|
||||
genX_state.c
|
||||
libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=90
|
||||
|
||||
if HAVE_EGL_PLATFORM_WAYLAND
|
||||
BUILT_SOURCES += \
|
||||
wayland-drm-protocol.c \
|
||||
wayland-drm-client-protocol.h
|
||||
|
||||
%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml
|
||||
$(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@
|
||||
|
||||
%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml
|
||||
$(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@
|
||||
|
||||
AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm
|
||||
VULKAN_SOURCES += \
|
||||
wayland-drm-protocol.c \
|
||||
anv_wsi_wayland.c
|
||||
libvulkan_intel_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM
|
||||
endif
|
||||
|
||||
libvulkan_intel_la_SOURCES = \
|
||||
$(VULKAN_SOURCES) \
|
||||
anv_gem.c
|
||||
|
||||
anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
|
||||
$(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@
|
||||
|
||||
anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
|
||||
$(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@
|
||||
|
||||
CLEANFILES = $(BUILT_SOURCES)
|
||||
|
||||
libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) \
|
||||
-lxcb -lxcb-dri3 -lxcb-present -lxcb-sync -lxshmfence \
|
||||
$(top_builddir)/src/intel/isl/libisl.la \
|
||||
$(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \
|
||||
$(top_builddir)/src/mesa/libmesa.la \
|
||||
$(top_builddir)/src/mesa/drivers/dri/common/libdri_test_stubs.la \
|
||||
-lpthread -ldl -lstdc++ \
|
||||
$(PER_GEN_LIBS)
|
||||
|
||||
libvulkan_intel_la_LDFLAGS = \
|
||||
-module -avoid-version -shared -shrext .so
|
||||
|
||||
|
||||
# Generate icd files. It would be nice to just be able to add these to
|
||||
# AC_CONFIG_FILES, but @libdir@ typically expands to '${exec_prefix}/lib64',
|
||||
# which we can't put in the icd file. When running sed from the Makefile we
|
||||
# can use ${libdir}, which expands completely and we avoid putting Makefile
|
||||
# variables in the icd file.
|
||||
|
||||
icdconfdir=$(sysconfdir)/vulkan/icd.d
|
||||
icdconf_DATA = intel_icd.json
|
||||
noinst_DATA = dev_icd.json
|
||||
|
||||
%.json : %.json.in
|
||||
$(AM_V_GEN) $(SED) \
|
||||
-e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
|
||||
-e "s#@install_libdir@#${libdir}#" < $< > $@
|
||||
|
||||
|
||||
# Libvulkan with dummy gem. Used for unit tests.
|
||||
|
||||
libvulkan_test_la_SOURCES = \
|
||||
$(VULKAN_SOURCES) \
|
||||
anv_gem_stubs.c
|
||||
|
||||
libvulkan_test_la_CFLAGS = \
|
||||
-I$(top_srcdir)/src/intel/vulkan \
|
||||
$(libvulkan_intel_la_CFLAGS)
|
||||
|
||||
libvulkan_test_la_LIBADD = $(libvulkan_intel_la_LIBADD)
|
||||
|
||||
include $(top_srcdir)/install-lib-links.mk
|
||||
|
||||
install-data-local:
|
||||
$(INSTALL_DATA) -D $(srcdir)/intel_icd.json $(VULKAN_ICD_INSTALL_DIR)/intel_icd.json
|
||||
880
src/intel/vulkan/anv_allocator.c
Normal file
880
src/intel/vulkan/anv_allocator.c
Normal file
|
|
@ -0,0 +1,880 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define _DEFAULT_SOURCE
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <values.h>
|
||||
#include <assert.h>
|
||||
#include <linux/futex.h>
|
||||
#include <linux/memfd.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#ifdef HAVE_VALGRIND
|
||||
#define VG_NOACCESS_READ(__ptr) ({ \
|
||||
VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \
|
||||
__typeof(*(__ptr)) __val = *(__ptr); \
|
||||
VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\
|
||||
__val; \
|
||||
})
|
||||
#define VG_NOACCESS_WRITE(__ptr, __val) ({ \
|
||||
VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \
|
||||
*(__ptr) = (__val); \
|
||||
VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \
|
||||
})
|
||||
#else
|
||||
#define VG_NOACCESS_READ(__ptr) (*(__ptr))
|
||||
#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val))
|
||||
#endif
|
||||
|
||||
/* Design goals:
|
||||
*
|
||||
* - Lock free (except when resizing underlying bos)
|
||||
*
|
||||
* - Constant time allocation with typically only one atomic
|
||||
*
|
||||
* - Multiple allocation sizes without fragmentation
|
||||
*
|
||||
* - Can grow while keeping addresses and offset of contents stable
|
||||
*
|
||||
* - All allocations within one bo so we can point one of the
|
||||
* STATE_BASE_ADDRESS pointers at it.
|
||||
*
|
||||
* The overall design is a two-level allocator: top level is a fixed size, big
|
||||
* block (8k) allocator, which operates out of a bo. Allocation is done by
|
||||
* either pulling a block from the free list or growing the used range of the
|
||||
* bo. Growing the range may run out of space in the bo which we then need to
|
||||
* grow. Growing the bo is tricky in a multi-threaded, lockless environment:
|
||||
* we need to keep all pointers and contents in the old map valid. GEM bos in
|
||||
* general can't grow, but we use a trick: we create a memfd and use ftruncate
|
||||
* to grow it as necessary. We mmap the new size and then create a gem bo for
|
||||
* it using the new gem userptr ioctl. Without heavy-handed locking around
|
||||
* our allocation fast-path, there isn't really a way to munmap the old mmap,
|
||||
* so we just keep it around until garbage collection time. While the block
|
||||
* allocator is lockless for normal operations, we block other threads trying
|
||||
* to allocate while we're growing the map. It sholdn't happen often, and
|
||||
* growing is fast anyway.
|
||||
*
|
||||
* At the next level we can use various sub-allocators. The state pool is a
|
||||
* pool of smaller, fixed size objects, which operates much like the block
|
||||
* pool. It uses a free list for freeing objects, but when it runs out of
|
||||
* space it just allocates a new block from the block pool. This allocator is
|
||||
* intended for longer lived state objects such as SURFACE_STATE and most
|
||||
* other persistent state objects in the API. We may need to track more info
|
||||
* with these object and a pointer back to the CPU object (eg VkImage). In
|
||||
* those cases we just allocate a slightly bigger object and put the extra
|
||||
* state after the GPU state object.
|
||||
*
|
||||
* The state stream allocator works similar to how the i965 DRI driver streams
|
||||
* all its state. Even with Vulkan, we need to emit transient state (whether
|
||||
* surface state base or dynamic state base), and for that we can just get a
|
||||
* block and fill it up. These cases are local to a command buffer and the
|
||||
* sub-allocator need not be thread safe. The streaming allocator gets a new
|
||||
* block when it runs out of space and chains them together so they can be
|
||||
* easily freed.
|
||||
*/
|
||||
|
||||
/* Allocations are always at least 64 byte aligned, so 1 is an invalid value.
|
||||
* We use it to indicate the free list is empty. */
|
||||
#define EMPTY 1
|
||||
|
||||
struct anv_mmap_cleanup {
|
||||
void *map;
|
||||
size_t size;
|
||||
uint32_t gem_handle;
|
||||
};
|
||||
|
||||
#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0})
|
||||
|
||||
static inline long
|
||||
sys_futex(void *addr1, int op, int val1,
|
||||
struct timespec *timeout, void *addr2, int val3)
|
||||
{
|
||||
return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
|
||||
}
|
||||
|
||||
static inline int
|
||||
futex_wake(uint32_t *addr, int count)
|
||||
{
|
||||
return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
static inline int
|
||||
futex_wait(uint32_t *addr, int32_t value)
|
||||
{
|
||||
return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
static inline int
|
||||
memfd_create(const char *name, unsigned int flags)
|
||||
{
|
||||
return syscall(SYS_memfd_create, name, flags);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
ilog2_round_up(uint32_t value)
|
||||
{
|
||||
assert(value != 0);
|
||||
return 32 - __builtin_clz(value - 1);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
round_to_power_of_two(uint32_t value)
|
||||
{
|
||||
return 1 << ilog2_round_up(value);
|
||||
}
|
||||
|
||||
static bool
|
||||
anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset)
|
||||
{
|
||||
union anv_free_list current, new, old;
|
||||
|
||||
current.u64 = list->u64;
|
||||
while (current.offset != EMPTY) {
|
||||
/* We have to add a memory barrier here so that the list head (and
|
||||
* offset) gets read before we read the map pointer. This way we
|
||||
* know that the map pointer is valid for the given offset at the
|
||||
* point where we read it.
|
||||
*/
|
||||
__sync_synchronize();
|
||||
|
||||
int32_t *next_ptr = *map + current.offset;
|
||||
new.offset = VG_NOACCESS_READ(next_ptr);
|
||||
new.count = current.count + 1;
|
||||
old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
|
||||
if (old.u64 == current.u64) {
|
||||
*offset = current.offset;
|
||||
return true;
|
||||
}
|
||||
current = old;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_free_list_push(union anv_free_list *list, void *map, int32_t offset)
|
||||
{
|
||||
union anv_free_list current, old, new;
|
||||
int32_t *next_ptr = map + offset;
|
||||
|
||||
old = *list;
|
||||
do {
|
||||
current = old;
|
||||
VG_NOACCESS_WRITE(next_ptr, current.offset);
|
||||
new.offset = offset;
|
||||
new.count = current.count + 1;
|
||||
old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
|
||||
} while (old.u64 != current.u64);
|
||||
}
|
||||
|
||||
/* All pointers in the ptr_free_list are assumed to be page-aligned. This
|
||||
* means that the bottom 12 bits should all be zero.
|
||||
*/
|
||||
#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff)
|
||||
#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~(uintptr_t)0xfff))
|
||||
#define PFL_PACK(ptr, count) ({ \
|
||||
(void *)(((uintptr_t)(ptr) & ~(uintptr_t)0xfff) | ((count) & 0xfff)); \
|
||||
})
|
||||
|
||||
static bool
|
||||
anv_ptr_free_list_pop(void **list, void **elem)
|
||||
{
|
||||
void *current = *list;
|
||||
while (PFL_PTR(current) != NULL) {
|
||||
void **next_ptr = PFL_PTR(current);
|
||||
void *new_ptr = VG_NOACCESS_READ(next_ptr);
|
||||
unsigned new_count = PFL_COUNT(current) + 1;
|
||||
void *new = PFL_PACK(new_ptr, new_count);
|
||||
void *old = __sync_val_compare_and_swap(list, current, new);
|
||||
if (old == current) {
|
||||
*elem = PFL_PTR(current);
|
||||
return true;
|
||||
}
|
||||
current = old;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_ptr_free_list_push(void **list, void *elem)
|
||||
{
|
||||
void *old, *current;
|
||||
void **next_ptr = elem;
|
||||
|
||||
/* The pointer-based free list requires that the pointer be
|
||||
* page-aligned. This is because we use the bottom 12 bits of the
|
||||
* pointer to store a counter to solve the ABA concurrency problem.
|
||||
*/
|
||||
assert(((uintptr_t)elem & 0xfff) == 0);
|
||||
|
||||
old = *list;
|
||||
do {
|
||||
current = old;
|
||||
VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current));
|
||||
unsigned new_count = PFL_COUNT(current) + 1;
|
||||
void *new = PFL_PACK(elem, new_count);
|
||||
old = __sync_val_compare_and_swap(list, current, new);
|
||||
} while (old != current);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state);
|
||||
|
||||
void
|
||||
anv_block_pool_init(struct anv_block_pool *pool,
|
||||
struct anv_device *device, uint32_t block_size)
|
||||
{
|
||||
assert(util_is_power_of_two(block_size));
|
||||
|
||||
pool->device = device;
|
||||
pool->bo.gem_handle = 0;
|
||||
pool->bo.offset = 0;
|
||||
pool->bo.size = 0;
|
||||
pool->bo.is_winsys_bo = false;
|
||||
pool->block_size = block_size;
|
||||
pool->free_list = ANV_FREE_LIST_EMPTY;
|
||||
pool->back_free_list = ANV_FREE_LIST_EMPTY;
|
||||
|
||||
pool->fd = memfd_create("block pool", MFD_CLOEXEC);
|
||||
if (pool->fd == -1)
|
||||
return;
|
||||
|
||||
/* Just make it 2GB up-front. The Linux kernel won't actually back it
|
||||
* with pages until we either map and fault on one of them or we use
|
||||
* userptr and send a chunk of it off to the GPU.
|
||||
*/
|
||||
if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1)
|
||||
return;
|
||||
|
||||
anv_vector_init(&pool->mmap_cleanups,
|
||||
round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128);
|
||||
|
||||
pool->state.next = 0;
|
||||
pool->state.end = 0;
|
||||
pool->back_state.next = 0;
|
||||
pool->back_state.end = 0;
|
||||
|
||||
/* Immediately grow the pool so we'll have a backing bo. */
|
||||
pool->state.end = anv_block_pool_grow(pool, &pool->state);
|
||||
}
|
||||
|
||||
void
|
||||
anv_block_pool_finish(struct anv_block_pool *pool)
|
||||
{
|
||||
struct anv_mmap_cleanup *cleanup;
|
||||
|
||||
anv_vector_foreach(cleanup, &pool->mmap_cleanups) {
|
||||
if (cleanup->map)
|
||||
munmap(cleanup->map, cleanup->size);
|
||||
if (cleanup->gem_handle)
|
||||
anv_gem_close(pool->device, cleanup->gem_handle);
|
||||
}
|
||||
|
||||
anv_vector_finish(&pool->mmap_cleanups);
|
||||
|
||||
close(pool->fd);
|
||||
}
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
/** Grows and re-centers the block pool.
|
||||
*
|
||||
* We grow the block pool in one or both directions in such a way that the
|
||||
* following conditions are met:
|
||||
*
|
||||
* 1) The size of the entire pool is always a power of two.
|
||||
*
|
||||
* 2) The pool only grows on both ends. Neither end can get
|
||||
* shortened.
|
||||
*
|
||||
* 3) At the end of the allocation, we have about twice as much space
|
||||
* allocated for each end as we have used. This way the pool doesn't
|
||||
* grow too far in one direction or the other.
|
||||
*
|
||||
* 4) If the _alloc_back() has never been called, then the back portion of
|
||||
* the pool retains a size of zero. (This makes it easier for users of
|
||||
* the block pool that only want a one-sided pool.)
|
||||
*
|
||||
* 5) We have enough space allocated for at least one more block in
|
||||
* whichever side `state` points to.
|
||||
*
|
||||
* 6) The center of the pool is always aligned to both the block_size of
|
||||
* the pool and a 4K CPU page.
|
||||
*/
|
||||
static uint32_t
|
||||
anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
|
||||
{
|
||||
size_t size;
|
||||
void *map;
|
||||
uint32_t gem_handle;
|
||||
struct anv_mmap_cleanup *cleanup;
|
||||
|
||||
pthread_mutex_lock(&pool->device->mutex);
|
||||
|
||||
assert(state == &pool->state || state == &pool->back_state);
|
||||
|
||||
/* Gather a little usage information on the pool. Since we may have
|
||||
* threadsd waiting in queue to get some storage while we resize, it's
|
||||
* actually possible that total_used will be larger than old_size. In
|
||||
* particular, block_pool_alloc() increments state->next prior to
|
||||
* calling block_pool_grow, so this ensures that we get enough space for
|
||||
* which ever side tries to grow the pool.
|
||||
*
|
||||
* We align to a page size because it makes it easier to do our
|
||||
* calculations later in such a way that we state page-aigned.
|
||||
*/
|
||||
uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE);
|
||||
uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE);
|
||||
uint32_t total_used = front_used + back_used;
|
||||
|
||||
assert(state == &pool->state || back_used > 0);
|
||||
|
||||
size_t old_size = pool->bo.size;
|
||||
|
||||
if (old_size != 0 &&
|
||||
back_used * 2 <= pool->center_bo_offset &&
|
||||
front_used * 2 <= (old_size - pool->center_bo_offset)) {
|
||||
/* If we're in this case then this isn't the firsta allocation and we
|
||||
* already have enough space on both sides to hold double what we
|
||||
* have allocated. There's nothing for us to do.
|
||||
*/
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (old_size == 0) {
|
||||
/* This is the first allocation */
|
||||
size = MAX2(32 * pool->block_size, PAGE_SIZE);
|
||||
} else {
|
||||
size = old_size * 2;
|
||||
}
|
||||
|
||||
/* We can't have a block pool bigger than 1GB because we use signed
|
||||
* 32-bit offsets in the free list and we don't want overflow. We
|
||||
* should never need a block pool bigger than 1GB anyway.
|
||||
*/
|
||||
assert(size <= (1u << 31));
|
||||
|
||||
/* We compute a new center_bo_offset such that, when we double the size
|
||||
* of the pool, we maintain the ratio of how much is used by each side.
|
||||
* This way things should remain more-or-less balanced.
|
||||
*/
|
||||
uint32_t center_bo_offset;
|
||||
if (back_used == 0) {
|
||||
/* If we're in this case then we have never called alloc_back(). In
|
||||
* this case, we want keep the offset at 0 to make things as simple
|
||||
* as possible for users that don't care about back allocations.
|
||||
*/
|
||||
center_bo_offset = 0;
|
||||
} else {
|
||||
/* Try to "center" the allocation based on how much is currently in
|
||||
* use on each side of the center line.
|
||||
*/
|
||||
center_bo_offset = ((uint64_t)size * back_used) / total_used;
|
||||
|
||||
/* Align down to a multiple of both the block size and page size */
|
||||
uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE);
|
||||
assert(util_is_power_of_two(granularity));
|
||||
center_bo_offset &= ~(granularity - 1);
|
||||
|
||||
assert(center_bo_offset >= back_used);
|
||||
|
||||
/* Make sure we don't shrink the back end of the pool */
|
||||
if (center_bo_offset < pool->back_state.end)
|
||||
center_bo_offset = pool->back_state.end;
|
||||
|
||||
/* Make sure that we don't shrink the front end of the pool */
|
||||
if (size - center_bo_offset < pool->state.end)
|
||||
center_bo_offset = size - pool->state.end;
|
||||
}
|
||||
|
||||
assert(center_bo_offset % pool->block_size == 0);
|
||||
assert(center_bo_offset % PAGE_SIZE == 0);
|
||||
|
||||
/* Assert that we only ever grow the pool */
|
||||
assert(center_bo_offset >= pool->back_state.end);
|
||||
assert(size - center_bo_offset >= pool->state.end);
|
||||
|
||||
cleanup = anv_vector_add(&pool->mmap_cleanups);
|
||||
if (!cleanup)
|
||||
goto fail;
|
||||
*cleanup = ANV_MMAP_CLEANUP_INIT;
|
||||
|
||||
/* Just leak the old map until we destroy the pool. We can't munmap it
|
||||
* without races or imposing locking on the block allocate fast path. On
|
||||
* the whole the leaked maps adds up to less than the size of the
|
||||
* current map. MAP_POPULATE seems like the right thing to do, but we
|
||||
* should try to get some numbers.
|
||||
*/
|
||||
map = mmap(NULL, size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, pool->fd,
|
||||
BLOCK_POOL_MEMFD_CENTER - center_bo_offset);
|
||||
cleanup->map = map;
|
||||
cleanup->size = size;
|
||||
|
||||
if (map == MAP_FAILED)
|
||||
goto fail;
|
||||
|
||||
gem_handle = anv_gem_userptr(pool->device, map, size);
|
||||
if (gem_handle == 0)
|
||||
goto fail;
|
||||
cleanup->gem_handle = gem_handle;
|
||||
|
||||
#if 0
|
||||
/* Regular objects are created I915_CACHING_CACHED on LLC platforms and
|
||||
* I915_CACHING_NONE on non-LLC platforms. However, userptr objects are
|
||||
* always created as I915_CACHING_CACHED, which on non-LLC means
|
||||
* snooped. That can be useful but comes with a bit of overheard. Since
|
||||
* we're eplicitly clflushing and don't want the overhead we need to turn
|
||||
* it off. */
|
||||
if (!pool->device->info.has_llc) {
|
||||
anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE);
|
||||
anv_gem_set_domain(pool->device, gem_handle,
|
||||
I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Now that we successfull allocated everything, we can write the new
|
||||
* values back into pool. */
|
||||
pool->map = map + center_bo_offset;
|
||||
pool->center_bo_offset = center_bo_offset;
|
||||
pool->bo.gem_handle = gem_handle;
|
||||
pool->bo.size = size;
|
||||
pool->bo.map = map;
|
||||
pool->bo.index = 0;
|
||||
|
||||
done:
|
||||
pthread_mutex_unlock(&pool->device->mutex);
|
||||
|
||||
/* Return the appropreate new size. This function never actually
|
||||
* updates state->next. Instead, we let the caller do that because it
|
||||
* needs to do so in order to maintain its concurrency model.
|
||||
*/
|
||||
if (state == &pool->state) {
|
||||
return pool->bo.size - pool->center_bo_offset;
|
||||
} else {
|
||||
assert(pool->center_bo_offset > 0);
|
||||
return pool->center_bo_offset;
|
||||
}
|
||||
|
||||
fail:
|
||||
pthread_mutex_unlock(&pool->device->mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
anv_block_pool_alloc_new(struct anv_block_pool *pool,
|
||||
struct anv_block_state *pool_state)
|
||||
{
|
||||
struct anv_block_state state, old, new;
|
||||
|
||||
while (1) {
|
||||
state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size);
|
||||
if (state.next < state.end) {
|
||||
assert(pool->map);
|
||||
return state.next;
|
||||
} else if (state.next == state.end) {
|
||||
/* We allocated the first block outside the pool, we have to grow it.
|
||||
* pool_state->next acts a mutex: threads who try to allocate now will
|
||||
* get block indexes above the current limit and hit futex_wait
|
||||
* below. */
|
||||
new.next = state.next + pool->block_size;
|
||||
new.end = anv_block_pool_grow(pool, pool_state);
|
||||
assert(new.end >= new.next && new.end % pool->block_size == 0);
|
||||
old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64);
|
||||
if (old.next != state.next)
|
||||
futex_wake(&pool_state->end, INT_MAX);
|
||||
return state.next;
|
||||
} else {
|
||||
futex_wait(&pool_state->end, state.end);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int32_t
|
||||
anv_block_pool_alloc(struct anv_block_pool *pool)
|
||||
{
|
||||
int32_t offset;
|
||||
|
||||
/* Try free list first. */
|
||||
if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) {
|
||||
assert(offset >= 0);
|
||||
assert(pool->map);
|
||||
return offset;
|
||||
}
|
||||
|
||||
return anv_block_pool_alloc_new(pool, &pool->state);
|
||||
}
|
||||
|
||||
/* Allocates a block out of the back of the block pool.
|
||||
*
|
||||
* This will allocated a block earlier than the "start" of the block pool.
|
||||
* The offsets returned from this function will be negative but will still
|
||||
* be correct relative to the block pool's map pointer.
|
||||
*
|
||||
* If you ever use anv_block_pool_alloc_back, then you will have to do
|
||||
* gymnastics with the block pool's BO when doing relocations.
|
||||
*/
|
||||
int32_t
|
||||
anv_block_pool_alloc_back(struct anv_block_pool *pool)
|
||||
{
|
||||
int32_t offset;
|
||||
|
||||
/* Try free list first. */
|
||||
if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) {
|
||||
assert(offset < 0);
|
||||
assert(pool->map);
|
||||
return offset;
|
||||
}
|
||||
|
||||
offset = anv_block_pool_alloc_new(pool, &pool->back_state);
|
||||
|
||||
/* The offset we get out of anv_block_pool_alloc_new() is actually the
|
||||
* number of bytes downwards from the middle to the end of the block.
|
||||
* We need to turn it into a (negative) offset from the middle to the
|
||||
* start of the block.
|
||||
*/
|
||||
assert(offset >= 0);
|
||||
return -(offset + pool->block_size);
|
||||
}
|
||||
|
||||
void
|
||||
anv_block_pool_free(struct anv_block_pool *pool, int32_t offset)
|
||||
{
|
||||
if (offset < 0) {
|
||||
anv_free_list_push(&pool->back_free_list, pool->map, offset);
|
||||
} else {
|
||||
anv_free_list_push(&pool->free_list, pool->map, offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool,
|
||||
size_t state_size)
|
||||
{
|
||||
/* At least a cache line and must divide the block size. */
|
||||
assert(state_size >= 64 && util_is_power_of_two(state_size));
|
||||
|
||||
pool->state_size = state_size;
|
||||
pool->free_list = ANV_FREE_LIST_EMPTY;
|
||||
pool->block.next = 0;
|
||||
pool->block.end = 0;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool,
|
||||
struct anv_block_pool *block_pool)
|
||||
{
|
||||
int32_t offset;
|
||||
struct anv_block_state block, old, new;
|
||||
|
||||
/* Try free list first. */
|
||||
if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) {
|
||||
assert(offset >= 0);
|
||||
return offset;
|
||||
}
|
||||
|
||||
/* If free list was empty (or somebody raced us and took the items) we
|
||||
* allocate a new item from the end of the block */
|
||||
restart:
|
||||
block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size);
|
||||
|
||||
if (block.next < block.end) {
|
||||
return block.next;
|
||||
} else if (block.next == block.end) {
|
||||
offset = anv_block_pool_alloc(block_pool);
|
||||
new.next = offset + pool->state_size;
|
||||
new.end = offset + block_pool->block_size;
|
||||
old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
|
||||
if (old.next != block.next)
|
||||
futex_wake(&pool->block.end, INT_MAX);
|
||||
return offset;
|
||||
} else {
|
||||
futex_wait(&pool->block.end, block.end);
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool,
|
||||
struct anv_block_pool *block_pool,
|
||||
uint32_t offset)
|
||||
{
|
||||
anv_free_list_push(&pool->free_list, block_pool->map, offset);
|
||||
}
|
||||
|
||||
void
|
||||
anv_state_pool_init(struct anv_state_pool *pool,
|
||||
struct anv_block_pool *block_pool)
|
||||
{
|
||||
pool->block_pool = block_pool;
|
||||
for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) {
|
||||
size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i);
|
||||
anv_fixed_size_state_pool_init(&pool->buckets[i], size);
|
||||
}
|
||||
VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false));
|
||||
}
|
||||
|
||||
void
|
||||
anv_state_pool_finish(struct anv_state_pool *pool)
|
||||
{
|
||||
VG(VALGRIND_DESTROY_MEMPOOL(pool));
|
||||
}
|
||||
|
||||
struct anv_state
|
||||
anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align)
|
||||
{
|
||||
unsigned size_log2 = ilog2_round_up(size < align ? align : size);
|
||||
assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2);
|
||||
if (size_log2 < ANV_MIN_STATE_SIZE_LOG2)
|
||||
size_log2 = ANV_MIN_STATE_SIZE_LOG2;
|
||||
unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2;
|
||||
|
||||
struct anv_state state;
|
||||
state.alloc_size = 1 << size_log2;
|
||||
state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket],
|
||||
pool->block_pool);
|
||||
state.map = pool->block_pool->map + state.offset;
|
||||
VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size));
|
||||
return state;
|
||||
}
|
||||
|
||||
void
|
||||
anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state)
|
||||
{
|
||||
assert(util_is_power_of_two(state.alloc_size));
|
||||
unsigned size_log2 = ilog2_round_up(state.alloc_size);
|
||||
assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 &&
|
||||
size_log2 <= ANV_MAX_STATE_SIZE_LOG2);
|
||||
unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2;
|
||||
|
||||
VG(VALGRIND_MEMPOOL_FREE(pool, state.map));
|
||||
anv_fixed_size_state_pool_free(&pool->buckets[bucket],
|
||||
pool->block_pool, state.offset);
|
||||
}
|
||||
|
||||
#define NULL_BLOCK 1
|
||||
struct anv_state_stream_block {
|
||||
/* The next block */
|
||||
struct anv_state_stream_block *next;
|
||||
|
||||
/* The offset into the block pool at which this block starts */
|
||||
uint32_t offset;
|
||||
|
||||
#ifdef HAVE_VALGRIND
|
||||
/* A pointer to the first user-allocated thing in this block. This is
|
||||
* what valgrind sees as the start of the block.
|
||||
*/
|
||||
void *_vg_ptr;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* The state stream allocator is a one-shot, single threaded allocator for
|
||||
* variable sized blocks. We use it for allocating dynamic state.
|
||||
*/
|
||||
void
|
||||
anv_state_stream_init(struct anv_state_stream *stream,
|
||||
struct anv_block_pool *block_pool)
|
||||
{
|
||||
stream->block_pool = block_pool;
|
||||
stream->block = NULL;
|
||||
|
||||
/* Ensure that next + whatever > end. This way the first call to
|
||||
* state_stream_alloc fetches a new block.
|
||||
*/
|
||||
stream->next = 1;
|
||||
stream->end = 0;
|
||||
|
||||
VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false));
|
||||
}
|
||||
|
||||
void
|
||||
anv_state_stream_finish(struct anv_state_stream *stream)
|
||||
{
|
||||
VG(const uint32_t block_size = stream->block_pool->block_size);
|
||||
|
||||
struct anv_state_stream_block *next = stream->block;
|
||||
while (next != NULL) {
|
||||
VG(VALGRIND_MAKE_MEM_DEFINED(next, sizeof(*next)));
|
||||
struct anv_state_stream_block sb = VG_NOACCESS_READ(next);
|
||||
VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr));
|
||||
VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size));
|
||||
anv_block_pool_free(stream->block_pool, sb.offset);
|
||||
next = sb.next;
|
||||
}
|
||||
|
||||
VG(VALGRIND_DESTROY_MEMPOOL(stream));
|
||||
}
|
||||
|
||||
struct anv_state
|
||||
anv_state_stream_alloc(struct anv_state_stream *stream,
|
||||
uint32_t size, uint32_t alignment)
|
||||
{
|
||||
struct anv_state_stream_block *sb = stream->block;
|
||||
|
||||
struct anv_state state;
|
||||
|
||||
state.offset = align_u32(stream->next, alignment);
|
||||
if (state.offset + size > stream->end) {
|
||||
uint32_t block = anv_block_pool_alloc(stream->block_pool);
|
||||
sb = stream->block_pool->map + block;
|
||||
|
||||
VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb)));
|
||||
sb->next = stream->block;
|
||||
sb->offset = block;
|
||||
VG(sb->_vg_ptr = NULL);
|
||||
VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size));
|
||||
|
||||
stream->block = sb;
|
||||
stream->start = block;
|
||||
stream->next = block + sizeof(*sb);
|
||||
stream->end = block + stream->block_pool->block_size;
|
||||
|
||||
state.offset = align_u32(stream->next, alignment);
|
||||
assert(state.offset + size <= stream->end);
|
||||
}
|
||||
|
||||
assert(state.offset > stream->start);
|
||||
state.map = (void *)sb + (state.offset - stream->start);
|
||||
state.alloc_size = size;
|
||||
|
||||
#ifdef HAVE_VALGRIND
|
||||
void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr);
|
||||
if (vg_ptr == NULL) {
|
||||
vg_ptr = state.map;
|
||||
VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr);
|
||||
VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size);
|
||||
} else {
|
||||
void *state_end = state.map + state.alloc_size;
|
||||
/* This only updates the mempool. The newly allocated chunk is still
|
||||
* marked as NOACCESS. */
|
||||
VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, state_end - vg_ptr);
|
||||
/* Mark the newly allocated chunk as undefined */
|
||||
VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
stream->next = state.offset + size;
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
struct bo_pool_bo_link {
|
||||
struct bo_pool_bo_link *next;
|
||||
struct anv_bo bo;
|
||||
};
|
||||
|
||||
void
|
||||
anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device)
|
||||
{
|
||||
pool->device = device;
|
||||
memset(pool->free_list, 0, sizeof(pool->free_list));
|
||||
|
||||
VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false));
|
||||
}
|
||||
|
||||
void
|
||||
anv_bo_pool_finish(struct anv_bo_pool *pool)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) {
|
||||
struct bo_pool_bo_link *link = PFL_PTR(pool->free_list[i]);
|
||||
while (link != NULL) {
|
||||
struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link);
|
||||
|
||||
anv_gem_munmap(link_copy.bo.map, link_copy.bo.size);
|
||||
anv_gem_close(pool->device, link_copy.bo.gem_handle);
|
||||
link = link_copy.next;
|
||||
}
|
||||
}
|
||||
|
||||
VG(VALGRIND_DESTROY_MEMPOOL(pool));
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
const unsigned size_log2 = size < 4096 ? 12 : ilog2_round_up(size);
|
||||
const unsigned pow2_size = 1 << size_log2;
|
||||
const unsigned bucket = size_log2 - 12;
|
||||
assert(bucket < ARRAY_SIZE(pool->free_list));
|
||||
|
||||
void *next_free_void;
|
||||
if (anv_ptr_free_list_pop(&pool->free_list[bucket], &next_free_void)) {
|
||||
struct bo_pool_bo_link *next_free = next_free_void;
|
||||
*bo = VG_NOACCESS_READ(&next_free->bo);
|
||||
assert(bo->map == next_free);
|
||||
assert(size <= bo->size);
|
||||
|
||||
VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size));
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct anv_bo new_bo;
|
||||
|
||||
result = anv_bo_init_new(&new_bo, pool->device, pow2_size);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
assert(new_bo.size == pow2_size);
|
||||
|
||||
new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 0);
|
||||
if (new_bo.map == NULL) {
|
||||
anv_gem_close(pool->device, new_bo.gem_handle);
|
||||
return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
|
||||
}
|
||||
|
||||
*bo = new_bo;
|
||||
|
||||
VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size));
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo_in)
|
||||
{
|
||||
/* Make a copy in case the anv_bo happens to be storred in the BO */
|
||||
struct anv_bo bo = *bo_in;
|
||||
struct bo_pool_bo_link *link = bo.map;
|
||||
link->bo = bo;
|
||||
|
||||
assert(util_is_power_of_two(bo.size));
|
||||
const unsigned size_log2 = ilog2_round_up(bo.size);
|
||||
const unsigned bucket = size_log2 - 12;
|
||||
assert(bucket < ARRAY_SIZE(pool->free_list));
|
||||
|
||||
VG(VALGRIND_MEMPOOL_FREE(pool, bo.map));
|
||||
anv_ptr_free_list_push(&pool->free_list[bucket], link);
|
||||
}
|
||||
1138
src/intel/vulkan/anv_batch_chain.c
Normal file
1138
src/intel/vulkan/anv_batch_chain.c
Normal file
File diff suppressed because it is too large
Load diff
1227
src/intel/vulkan/anv_cmd_buffer.c
Normal file
1227
src/intel/vulkan/anv_cmd_buffer.c
Normal file
File diff suppressed because it is too large
Load diff
655
src/intel/vulkan/anv_descriptor_set.c
Normal file
655
src/intel/vulkan/anv_descriptor_set.c
Normal file
|
|
@ -0,0 +1,655 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
/*
|
||||
* Descriptor set layouts.
|
||||
*/
|
||||
|
||||
VkResult anv_CreateDescriptorSetLayout(
|
||||
VkDevice _device,
|
||||
const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkDescriptorSetLayout* pSetLayout)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_descriptor_set_layout *set_layout;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
|
||||
|
||||
uint32_t max_binding = 0;
|
||||
uint32_t immutable_sampler_count = 0;
|
||||
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
|
||||
max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
|
||||
if (pCreateInfo->pBindings[j].pImmutableSamplers)
|
||||
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
|
||||
}
|
||||
|
||||
size_t size = sizeof(struct anv_descriptor_set_layout) +
|
||||
(max_binding + 1) * sizeof(set_layout->binding[0]) +
|
||||
immutable_sampler_count * sizeof(struct anv_sampler *);
|
||||
|
||||
set_layout = anv_alloc2(&device->alloc, pAllocator, size, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!set_layout)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* We just allocate all the samplers at the end of the struct */
|
||||
struct anv_sampler **samplers =
|
||||
(struct anv_sampler **)&set_layout->binding[max_binding + 1];
|
||||
|
||||
set_layout->binding_count = max_binding + 1;
|
||||
set_layout->shader_stages = 0;
|
||||
set_layout->size = 0;
|
||||
|
||||
for (uint32_t b = 0; b <= max_binding; b++) {
|
||||
/* Initialize all binding_layout entries to -1 */
|
||||
memset(&set_layout->binding[b], -1, sizeof(set_layout->binding[b]));
|
||||
|
||||
set_layout->binding[b].immutable_samplers = NULL;
|
||||
}
|
||||
|
||||
/* Initialize all samplers to 0 */
|
||||
memset(samplers, 0, immutable_sampler_count * sizeof(*samplers));
|
||||
|
||||
uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, };
|
||||
uint32_t surface_count[MESA_SHADER_STAGES] = { 0, };
|
||||
uint32_t image_count[MESA_SHADER_STAGES] = { 0, };
|
||||
uint32_t buffer_count = 0;
|
||||
uint32_t dynamic_offset_count = 0;
|
||||
|
||||
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
|
||||
const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j];
|
||||
uint32_t b = binding->binding;
|
||||
|
||||
assert(binding->descriptorCount > 0);
|
||||
set_layout->binding[b].array_size = binding->descriptorCount;
|
||||
set_layout->binding[b].descriptor_index = set_layout->size;
|
||||
set_layout->size += binding->descriptorCount;
|
||||
|
||||
switch (binding->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
anv_foreach_stage(s, binding->stageFlags) {
|
||||
set_layout->binding[b].stage[s].sampler_index = sampler_count[s];
|
||||
sampler_count[s] += binding->descriptorCount;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (binding->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
set_layout->binding[b].buffer_index = buffer_count;
|
||||
buffer_count += binding->descriptorCount;
|
||||
/* fall through */
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
anv_foreach_stage(s, binding->stageFlags) {
|
||||
set_layout->binding[b].stage[s].surface_index = surface_count[s];
|
||||
surface_count[s] += binding->descriptorCount;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (binding->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
set_layout->binding[b].dynamic_offset_index = dynamic_offset_count;
|
||||
dynamic_offset_count += binding->descriptorCount;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (binding->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
anv_foreach_stage(s, binding->stageFlags) {
|
||||
set_layout->binding[b].stage[s].image_index = image_count[s];
|
||||
image_count[s] += binding->descriptorCount;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (binding->pImmutableSamplers) {
|
||||
set_layout->binding[b].immutable_samplers = samplers;
|
||||
samplers += binding->descriptorCount;
|
||||
|
||||
for (uint32_t i = 0; i < binding->descriptorCount; i++)
|
||||
set_layout->binding[b].immutable_samplers[i] =
|
||||
anv_sampler_from_handle(binding->pImmutableSamplers[i]);
|
||||
} else {
|
||||
set_layout->binding[b].immutable_samplers = NULL;
|
||||
}
|
||||
|
||||
set_layout->shader_stages |= binding->stageFlags;
|
||||
}
|
||||
|
||||
set_layout->buffer_count = buffer_count;
|
||||
set_layout->dynamic_offset_count = dynamic_offset_count;
|
||||
|
||||
*pSetLayout = anv_descriptor_set_layout_to_handle(set_layout);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_DestroyDescriptorSetLayout(
|
||||
VkDevice _device,
|
||||
VkDescriptorSetLayout _set_layout,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout);
|
||||
|
||||
anv_free2(&device->alloc, pAllocator, set_layout);
|
||||
}
|
||||
|
||||
/*
|
||||
* Pipeline layouts. These have nothing to do with the pipeline. They are
|
||||
* just muttiple descriptor set layouts pasted together
|
||||
*/
|
||||
|
||||
VkResult anv_CreatePipelineLayout(
|
||||
VkDevice _device,
|
||||
const VkPipelineLayoutCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkPipelineLayout* pPipelineLayout)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_pipeline_layout *layout;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
|
||||
|
||||
layout = anv_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (layout == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
layout->num_sets = pCreateInfo->setLayoutCount;
|
||||
|
||||
unsigned dynamic_offset_count = 0;
|
||||
|
||||
memset(layout->stage, 0, sizeof(layout->stage));
|
||||
for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
|
||||
ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout,
|
||||
pCreateInfo->pSetLayouts[set]);
|
||||
layout->set[set].layout = set_layout;
|
||||
|
||||
layout->set[set].dynamic_offset_start = dynamic_offset_count;
|
||||
for (uint32_t b = 0; b < set_layout->binding_count; b++) {
|
||||
if (set_layout->binding[b].dynamic_offset_index < 0)
|
||||
continue;
|
||||
|
||||
dynamic_offset_count += set_layout->binding[b].array_size;
|
||||
for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) {
|
||||
if (set_layout->binding[b].stage[s].surface_index >= 0)
|
||||
layout->stage[s].has_dynamic_offsets = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*pPipelineLayout = anv_pipeline_layout_to_handle(layout);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_DestroyPipelineLayout(
|
||||
VkDevice _device,
|
||||
VkPipelineLayout _pipelineLayout,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout);
|
||||
|
||||
anv_free2(&device->alloc, pAllocator, pipeline_layout);
|
||||
}
|
||||
|
||||
/*
|
||||
* Descriptor pools.
|
||||
*
|
||||
* These are implemented using a big pool of memory and a free-list for the
|
||||
* host memory allocations and a state_stream and a free list for the buffer
|
||||
* view surface state. The spec allows us to fail to allocate due to
|
||||
* fragmentation in all cases but two: 1) after pool reset, allocating up
|
||||
* until the pool size with no freeing must succeed and 2) allocating and
|
||||
* freeing only descriptor sets with the same layout. Case 1) is easy enogh,
|
||||
* and the free lists lets us recycle blocks for case 2).
|
||||
*/
|
||||
|
||||
#define EMPTY 1
|
||||
|
||||
VkResult anv_CreateDescriptorPool(
|
||||
VkDevice _device,
|
||||
const VkDescriptorPoolCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkDescriptorPool* pDescriptorPool)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_descriptor_pool *pool;
|
||||
|
||||
uint32_t descriptor_count = 0;
|
||||
uint32_t buffer_count = 0;
|
||||
for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
|
||||
switch (pCreateInfo->pPoolSizes[i].type) {
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount;
|
||||
default:
|
||||
descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t size =
|
||||
sizeof(*pool) +
|
||||
pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
|
||||
descriptor_count * sizeof(struct anv_descriptor) +
|
||||
buffer_count * sizeof(struct anv_buffer_view);
|
||||
|
||||
pool = anv_alloc2(&device->alloc, pAllocator, size, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!pool)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pool->size = size;
|
||||
pool->next = 0;
|
||||
pool->free_list = EMPTY;
|
||||
|
||||
anv_state_stream_init(&pool->surface_state_stream,
|
||||
&device->surface_state_block_pool);
|
||||
pool->surface_state_free_list = NULL;
|
||||
|
||||
*pDescriptorPool = anv_descriptor_pool_to_handle(pool);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_DestroyDescriptorPool(
|
||||
VkDevice _device,
|
||||
VkDescriptorPool _pool,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_descriptor_pool, pool, _pool);
|
||||
|
||||
anv_state_stream_finish(&pool->surface_state_stream);
|
||||
anv_free2(&device->alloc, pAllocator, pool);
|
||||
}
|
||||
|
||||
VkResult anv_ResetDescriptorPool(
|
||||
VkDevice _device,
|
||||
VkDescriptorPool descriptorPool,
|
||||
VkDescriptorPoolResetFlags flags)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool);
|
||||
|
||||
pool->next = 0;
|
||||
pool->free_list = EMPTY;
|
||||
anv_state_stream_finish(&pool->surface_state_stream);
|
||||
anv_state_stream_init(&pool->surface_state_stream,
|
||||
&device->surface_state_block_pool);
|
||||
pool->surface_state_free_list = NULL;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct pool_free_list_entry {
|
||||
uint32_t next;
|
||||
uint32_t size;
|
||||
};
|
||||
|
||||
static size_t
|
||||
layout_size(const struct anv_descriptor_set_layout *layout)
|
||||
{
|
||||
return
|
||||
sizeof(struct anv_descriptor_set) +
|
||||
layout->size * sizeof(struct anv_descriptor) +
|
||||
layout->buffer_count * sizeof(struct anv_buffer_view);
|
||||
}
|
||||
|
||||
struct surface_state_free_list_entry {
|
||||
void *next;
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
VkResult
|
||||
anv_descriptor_set_create(struct anv_device *device,
|
||||
struct anv_descriptor_pool *pool,
|
||||
const struct anv_descriptor_set_layout *layout,
|
||||
struct anv_descriptor_set **out_set)
|
||||
{
|
||||
struct anv_descriptor_set *set;
|
||||
const size_t size = layout_size(layout);
|
||||
|
||||
set = NULL;
|
||||
if (size <= pool->size - pool->next) {
|
||||
set = (struct anv_descriptor_set *) (pool->data + pool->next);
|
||||
pool->next += size;
|
||||
} else {
|
||||
struct pool_free_list_entry *entry;
|
||||
uint32_t *link = &pool->free_list;
|
||||
for (uint32_t f = pool->free_list; f != EMPTY; f = entry->next) {
|
||||
entry = (struct pool_free_list_entry *) (pool->data + f);
|
||||
if (size <= entry->size) {
|
||||
*link = entry->next;
|
||||
set = (struct anv_descriptor_set *) entry;
|
||||
break;
|
||||
}
|
||||
link = &entry->next;
|
||||
}
|
||||
}
|
||||
|
||||
if (set == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
set->size = size;
|
||||
set->layout = layout;
|
||||
set->buffer_views =
|
||||
(struct anv_buffer_view *) &set->descriptors[layout->size];
|
||||
set->buffer_count = layout->buffer_count;
|
||||
|
||||
/* Go through and fill out immutable samplers if we have any */
|
||||
struct anv_descriptor *desc = set->descriptors;
|
||||
for (uint32_t b = 0; b < layout->binding_count; b++) {
|
||||
if (layout->binding[b].immutable_samplers) {
|
||||
for (uint32_t i = 0; i < layout->binding[b].array_size; i++) {
|
||||
/* The type will get changed to COMBINED_IMAGE_SAMPLER in
|
||||
* UpdateDescriptorSets if needed. However, if the descriptor
|
||||
* set has an immutable sampler, UpdateDescriptorSets may never
|
||||
* touch it, so we need to make sure it's 100% valid now.
|
||||
*/
|
||||
desc[i] = (struct anv_descriptor) {
|
||||
.type = VK_DESCRIPTOR_TYPE_SAMPLER,
|
||||
.sampler = layout->binding[b].immutable_samplers[i],
|
||||
};
|
||||
}
|
||||
}
|
||||
desc += layout->binding[b].array_size;
|
||||
}
|
||||
|
||||
/* Allocate surface state for the buffer views. */
|
||||
for (uint32_t b = 0; b < layout->buffer_count; b++) {
|
||||
struct surface_state_free_list_entry *entry =
|
||||
pool->surface_state_free_list;
|
||||
struct anv_state state;
|
||||
|
||||
if (entry) {
|
||||
state.map = entry;
|
||||
state.offset = entry->offset;
|
||||
state.alloc_size = 64;
|
||||
pool->surface_state_free_list = entry->next;
|
||||
} else {
|
||||
state = anv_state_stream_alloc(&pool->surface_state_stream, 64, 64);
|
||||
}
|
||||
|
||||
set->buffer_views[b].surface_state = state;
|
||||
}
|
||||
|
||||
*out_set = set;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_descriptor_set_destroy(struct anv_device *device,
|
||||
struct anv_descriptor_pool *pool,
|
||||
struct anv_descriptor_set *set)
|
||||
{
|
||||
/* Put the buffer view surface state back on the free list. */
|
||||
for (uint32_t b = 0; b < set->buffer_count; b++) {
|
||||
struct surface_state_free_list_entry *entry =
|
||||
set->buffer_views[b].surface_state.map;
|
||||
entry->next = pool->surface_state_free_list;
|
||||
pool->surface_state_free_list = entry;
|
||||
}
|
||||
|
||||
/* Put the descriptor set allocation back on the free list. */
|
||||
const uint32_t index = (char *) set - pool->data;
|
||||
if (index + set->size == pool->next) {
|
||||
pool->next = index;
|
||||
} else {
|
||||
struct pool_free_list_entry *entry = (struct pool_free_list_entry *) set;
|
||||
entry->next = pool->free_list;
|
||||
entry->size = set->size;
|
||||
pool->free_list = (char *) entry - pool->data;
|
||||
}
|
||||
}
|
||||
|
||||
VkResult anv_AllocateDescriptorSets(
|
||||
VkDevice _device,
|
||||
const VkDescriptorSetAllocateInfo* pAllocateInfo,
|
||||
VkDescriptorSet* pDescriptorSets)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_descriptor_pool, pool, pAllocateInfo->descriptorPool);
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
struct anv_descriptor_set *set;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
|
||||
ANV_FROM_HANDLE(anv_descriptor_set_layout, layout,
|
||||
pAllocateInfo->pSetLayouts[i]);
|
||||
|
||||
result = anv_descriptor_set_create(device, pool, layout, &set);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
|
||||
pDescriptorSets[i] = anv_descriptor_set_to_handle(set);
|
||||
}
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
anv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool,
|
||||
i, pDescriptorSets);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult anv_FreeDescriptorSets(
|
||||
VkDevice _device,
|
||||
VkDescriptorPool descriptorPool,
|
||||
uint32_t count,
|
||||
const VkDescriptorSet* pDescriptorSets)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool);
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
|
||||
|
||||
anv_descriptor_set_destroy(device, pool, set);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_UpdateDescriptorSets(
|
||||
VkDevice _device,
|
||||
uint32_t descriptorWriteCount,
|
||||
const VkWriteDescriptorSet* pDescriptorWrites,
|
||||
uint32_t descriptorCopyCount,
|
||||
const VkCopyDescriptorSet* pDescriptorCopies)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
|
||||
for (uint32_t i = 0; i < descriptorWriteCount; i++) {
|
||||
const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
|
||||
ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet);
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&set->layout->binding[write->dstBinding];
|
||||
struct anv_descriptor *desc =
|
||||
&set->descriptors[bind_layout->descriptor_index];
|
||||
desc += write->dstArrayElement;
|
||||
|
||||
switch (write->descriptorType) {
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
ANV_FROM_HANDLE(anv_sampler, sampler,
|
||||
write->pImageInfo[j].sampler);
|
||||
|
||||
desc[j] = (struct anv_descriptor) {
|
||||
.type = VK_DESCRIPTOR_TYPE_SAMPLER,
|
||||
.sampler = sampler,
|
||||
};
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
ANV_FROM_HANDLE(anv_image_view, iview,
|
||||
write->pImageInfo[j].imageView);
|
||||
ANV_FROM_HANDLE(anv_sampler, sampler,
|
||||
write->pImageInfo[j].sampler);
|
||||
|
||||
desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
desc[j].image_view = iview;
|
||||
|
||||
/* If this descriptor has an immutable sampler, we don't want
|
||||
* to stomp on it.
|
||||
*/
|
||||
if (sampler)
|
||||
desc[j].sampler = sampler;
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
ANV_FROM_HANDLE(anv_image_view, iview,
|
||||
write->pImageInfo[j].imageView);
|
||||
|
||||
desc[j] = (struct anv_descriptor) {
|
||||
.type = write->descriptorType,
|
||||
.image_view = iview,
|
||||
};
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
ANV_FROM_HANDLE(anv_buffer_view, bview,
|
||||
write->pTexelBufferView[j]);
|
||||
|
||||
desc[j] = (struct anv_descriptor) {
|
||||
.type = write->descriptorType,
|
||||
.buffer_view = bview,
|
||||
};
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
anv_finishme("input attachments not implemented");
|
||||
break;
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
for (uint32_t j = 0; j < write->descriptorCount; j++) {
|
||||
assert(write->pBufferInfo[j].buffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer);
|
||||
assert(buffer);
|
||||
|
||||
struct anv_buffer_view *view =
|
||||
&set->buffer_views[bind_layout->buffer_index];
|
||||
view += write->dstArrayElement + j;
|
||||
|
||||
const struct anv_format *format =
|
||||
anv_format_for_descriptor_type(write->descriptorType);
|
||||
|
||||
view->format = format->isl_format;
|
||||
view->bo = buffer->bo;
|
||||
view->offset = buffer->offset + write->pBufferInfo[j].offset;
|
||||
|
||||
/* For buffers with dynamic offsets, we use the full possible
|
||||
* range in the surface state and do the actual range-checking
|
||||
* in the shader.
|
||||
*/
|
||||
if (bind_layout->dynamic_offset_index >= 0 ||
|
||||
write->pBufferInfo[j].range == VK_WHOLE_SIZE)
|
||||
view->range = buffer->size - write->pBufferInfo[j].offset;
|
||||
else
|
||||
view->range = write->pBufferInfo[j].range;
|
||||
|
||||
anv_fill_buffer_surface_state(device, view->surface_state,
|
||||
view->format,
|
||||
view->offset, view->range, 1);
|
||||
|
||||
desc[j] = (struct anv_descriptor) {
|
||||
.type = write->descriptorType,
|
||||
.buffer_view = view,
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < descriptorCopyCount; i++) {
|
||||
const VkCopyDescriptorSet *copy = &pDescriptorCopies[i];
|
||||
ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet);
|
||||
ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet);
|
||||
|
||||
const struct anv_descriptor_set_binding_layout *src_layout =
|
||||
&src->layout->binding[copy->srcBinding];
|
||||
struct anv_descriptor *src_desc =
|
||||
&src->descriptors[src_layout->descriptor_index];
|
||||
src_desc += copy->srcArrayElement;
|
||||
|
||||
const struct anv_descriptor_set_binding_layout *dst_layout =
|
||||
&dst->layout->binding[copy->dstBinding];
|
||||
struct anv_descriptor *dst_desc =
|
||||
&dst->descriptors[dst_layout->descriptor_index];
|
||||
dst_desc += copy->dstArrayElement;
|
||||
|
||||
for (uint32_t j = 0; j < copy->descriptorCount; j++)
|
||||
dst_desc[j] = src_desc[j];
|
||||
}
|
||||
}
|
||||
1793
src/intel/vulkan/anv_device.c
Normal file
1793
src/intel/vulkan/anv_device.c
Normal file
File diff suppressed because it is too large
Load diff
209
src/intel/vulkan/anv_dump.c
Normal file
209
src/intel/vulkan/anv_dump.c
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
/* This file contains utility functions for help debugging. They can be
|
||||
* called from GDB or similar to help inspect images and buffers.
|
||||
*/
|
||||
|
||||
void
|
||||
anv_dump_image_to_ppm(struct anv_device *device,
|
||||
struct anv_image *image, unsigned miplevel,
|
||||
unsigned array_layer, const char *filename)
|
||||
{
|
||||
VkDevice vk_device = anv_device_to_handle(device);
|
||||
VkResult result;
|
||||
|
||||
VkExtent2D extent = { image->extent.width, image->extent.height };
|
||||
for (unsigned i = 0; i < miplevel; i++) {
|
||||
extent.width = MAX2(1, extent.width / 2);
|
||||
extent.height = MAX2(1, extent.height / 2);
|
||||
}
|
||||
|
||||
VkImage copy_image;
|
||||
result = anv_CreateImage(vk_device,
|
||||
&(VkImageCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = VK_FORMAT_R8G8B8A8_UNORM,
|
||||
.extent = (VkExtent3D) { extent.width, extent.height, 1 },
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = 1,
|
||||
.tiling = VK_IMAGE_TILING_LINEAR,
|
||||
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
|
||||
.flags = 0,
|
||||
}, NULL, ©_image);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
VkMemoryRequirements reqs;
|
||||
anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs);
|
||||
|
||||
VkDeviceMemory memory;
|
||||
result = anv_AllocateMemory(vk_device,
|
||||
&(VkMemoryAllocateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = reqs.size,
|
||||
.memoryTypeIndex = 0,
|
||||
}, NULL, &memory);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
result = anv_BindImageMemory(vk_device, copy_image, memory, 0);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
VkCommandPool commandPool;
|
||||
result = anv_CreateCommandPool(vk_device,
|
||||
&(VkCommandPoolCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.queueFamilyIndex = 0,
|
||||
.flags = 0,
|
||||
}, NULL, &commandPool);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
VkCommandBuffer cmd;
|
||||
result = anv_AllocateCommandBuffers(vk_device,
|
||||
&(VkCommandBufferAllocateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.commandPool = commandPool,
|
||||
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
}, &cmd);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
result = anv_BeginCommandBuffer(cmd,
|
||||
&(VkCommandBufferBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
});
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
anv_CmdBlitImage(cmd,
|
||||
anv_image_to_handle(image), VK_IMAGE_LAYOUT_GENERAL,
|
||||
copy_image, VK_IMAGE_LAYOUT_GENERAL, 1,
|
||||
&(VkImageBlit) {
|
||||
.srcSubresource = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = miplevel,
|
||||
.baseArrayLayer = array_layer,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.srcOffsets = {
|
||||
{ 0, 0, 0 },
|
||||
{ extent.width, extent.height, 1 },
|
||||
},
|
||||
.dstSubresource = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.dstOffsets = {
|
||||
{ 0, 0, 0 },
|
||||
{ extent.width, extent.height, 1 },
|
||||
},
|
||||
}, VK_FILTER_NEAREST);
|
||||
|
||||
ANV_CALL(CmdPipelineBarrier)(cmd,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
true, 0, NULL, 0, NULL, 1,
|
||||
&(VkImageMemoryBarrier) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_HOST_READ_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = 0,
|
||||
.dstQueueFamilyIndex = 0,
|
||||
.image = copy_image,
|
||||
.subresourceRange = (VkImageSubresourceRange) {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
});
|
||||
|
||||
result = anv_EndCommandBuffer(cmd);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
VkFence fence;
|
||||
result = anv_CreateFence(vk_device,
|
||||
&(VkFenceCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
.flags = 0,
|
||||
}, NULL, &fence);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1,
|
||||
&(VkSubmitInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &cmd,
|
||||
}, fence);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
anv_DestroyFence(vk_device, fence, NULL);
|
||||
anv_DestroyCommandPool(vk_device, commandPool, NULL);
|
||||
|
||||
uint8_t *map;
|
||||
result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
VkSubresourceLayout layout;
|
||||
anv_GetImageSubresourceLayout(vk_device, copy_image,
|
||||
&(VkImageSubresource) {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.arrayLayer = 0,
|
||||
}, &layout);
|
||||
|
||||
map += layout.offset;
|
||||
|
||||
/* Now we can finally write the PPM file */
|
||||
FILE *file = fopen(filename, "wb");
|
||||
assert(file);
|
||||
|
||||
fprintf(file, "P6\n%d %d\n255\n", extent.width, extent.height);
|
||||
for (unsigned y = 0; y < extent.height; y++) {
|
||||
uint8_t row[extent.width * 3];
|
||||
for (unsigned x = 0; x < extent.width; x++) {
|
||||
row[x * 3 + 0] = map[x * 4 + 0];
|
||||
row[x * 3 + 1] = map[x * 4 + 1];
|
||||
row[x * 3 + 2] = map[x * 4 + 2];
|
||||
}
|
||||
fwrite(row, 3, extent.width, file);
|
||||
|
||||
map += layout.rowPitch;
|
||||
}
|
||||
fclose(file);
|
||||
|
||||
anv_UnmapMemory(vk_device, memory);
|
||||
anv_DestroyImage(vk_device, copy_image, NULL);
|
||||
anv_FreeMemory(vk_device, memory, NULL);
|
||||
}
|
||||
323
src/intel/vulkan/anv_entrypoints_gen.py
Normal file
323
src/intel/vulkan/anv_entrypoints_gen.py
Normal file
|
|
@ -0,0 +1,323 @@
|
|||
# coding=utf-8
|
||||
#
|
||||
# Copyright © 2015 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
#
|
||||
|
||||
import fileinput, re, sys
|
||||
|
||||
# Each function typedef in the vulkan.h header is all on one line and matches
|
||||
# this regepx. We hope that won't change.
|
||||
|
||||
p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);')
|
||||
|
||||
entrypoints = []
|
||||
|
||||
# We generate a static hash table for entry point lookup
|
||||
# (vkGetProcAddress). We use a linear congruential generator for our hash
|
||||
# function and a power-of-two size table. The prime numbers are determined
|
||||
# experimentally.
|
||||
|
||||
none = 0xffff
|
||||
hash_size = 256
|
||||
u32_mask = 2**32 - 1
|
||||
hash_mask = hash_size - 1
|
||||
|
||||
prime_factor = 5024183
|
||||
prime_step = 19
|
||||
|
||||
def hash(name):
|
||||
h = 0;
|
||||
for c in name:
|
||||
h = (h * prime_factor + ord(c)) & u32_mask
|
||||
|
||||
return h
|
||||
|
||||
opt_header = False
|
||||
opt_code = False
|
||||
|
||||
if (sys.argv[1] == "header"):
|
||||
opt_header = True
|
||||
sys.argv.pop()
|
||||
elif (sys.argv[1] == "code"):
|
||||
opt_code = True
|
||||
sys.argv.pop()
|
||||
|
||||
# Parse the entry points in the header
|
||||
|
||||
i = 0
|
||||
for line in fileinput.input():
|
||||
m = p.match(line)
|
||||
if (m):
|
||||
if m.group(2) == 'VoidFunction':
|
||||
continue
|
||||
fullname = "vk" + m.group(2)
|
||||
h = hash(fullname)
|
||||
entrypoints.append((m.group(1), m.group(2), m.group(3), i, h))
|
||||
i = i + 1
|
||||
|
||||
# For outputting entrypoints.h we generate a anv_EntryPoint() prototype
|
||||
# per entry point.
|
||||
|
||||
if opt_header:
|
||||
print "/* This file generated from vk_gen.py, don't edit directly. */\n"
|
||||
|
||||
print "struct anv_dispatch_table {"
|
||||
print " union {"
|
||||
print " void *entrypoints[%d];" % len(entrypoints)
|
||||
print " struct {"
|
||||
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print " %s (*%s)%s;" % (type, name, args)
|
||||
print " };\n"
|
||||
print " };\n"
|
||||
print "};\n"
|
||||
|
||||
print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n"
|
||||
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print "%s anv_%s%s;" % (type, name, args)
|
||||
print "%s gen7_%s%s;" % (type, name, args)
|
||||
print "%s gen75_%s%s;" % (type, name, args)
|
||||
print "%s gen8_%s%s;" % (type, name, args)
|
||||
print "%s gen9_%s%s;" % (type, name, args)
|
||||
print "%s anv_validate_%s%s;" % (type, name, args)
|
||||
exit()
|
||||
|
||||
|
||||
|
||||
print """/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* DO NOT EDIT! This is a generated file. */
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
struct anv_entrypoint {
|
||||
uint32_t name;
|
||||
uint32_t hash;
|
||||
};
|
||||
|
||||
/* We use a big string constant to avoid lots of reloctions from the entry
|
||||
* point table to lots of little strings. The entries in the entry point table
|
||||
* store the index into this big string.
|
||||
*/
|
||||
|
||||
static const char strings[] ="""
|
||||
|
||||
offsets = []
|
||||
i = 0;
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print " \"vk%s\\0\"" % name
|
||||
offsets.append(i)
|
||||
i += 2 + len(name) + 1
|
||||
print """ ;
|
||||
|
||||
/* Weak aliases for all potential validate functions. These will resolve to
|
||||
* NULL if they're not defined, which lets the resolve_entrypoint() function
|
||||
* either pick a validate wrapper if available or just plug in the actual
|
||||
* entry point.
|
||||
*/
|
||||
"""
|
||||
|
||||
# Now generate the table of all entry points and their validation functions
|
||||
|
||||
print "\nstatic const struct anv_entrypoint entrypoints[] = {"
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print " { %5d, 0x%08x }," % (offsets[num], h)
|
||||
print "};\n"
|
||||
|
||||
for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]:
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args)
|
||||
print "\nconst struct anv_dispatch_table %s_layer = {" % layer
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print " .%s = %s_%s," % (name, layer, name)
|
||||
print "};\n"
|
||||
|
||||
print """
|
||||
#ifdef DEBUG
|
||||
static bool enable_validate = true;
|
||||
#else
|
||||
static bool enable_validate = false;
|
||||
#endif
|
||||
|
||||
/* We can't use symbols that need resolving (like, oh, getenv) in the resolve
|
||||
* function. This means that we have to determine whether or not to use the
|
||||
* validation layer sometime before that. The constructor function attribute asks
|
||||
* the dynamic linker to invoke determine_validate() at dlopen() time which
|
||||
* works.
|
||||
*/
|
||||
static void __attribute__ ((constructor))
|
||||
determine_validate(void)
|
||||
{
|
||||
const char *s = getenv("ANV_VALIDATE");
|
||||
|
||||
if (s)
|
||||
enable_validate = atoi(s);
|
||||
}
|
||||
|
||||
static const struct brw_device_info *dispatch_devinfo;
|
||||
|
||||
void
|
||||
anv_set_dispatch_devinfo(const struct brw_device_info *devinfo)
|
||||
{
|
||||
dispatch_devinfo = devinfo;
|
||||
}
|
||||
|
||||
void * __attribute__ ((noinline))
|
||||
anv_resolve_entrypoint(uint32_t index)
|
||||
{
|
||||
if (enable_validate && validate_layer.entrypoints[index])
|
||||
return validate_layer.entrypoints[index];
|
||||
|
||||
if (dispatch_devinfo == NULL) {
|
||||
return anv_layer.entrypoints[index];
|
||||
}
|
||||
|
||||
switch (dispatch_devinfo->gen) {
|
||||
case 9:
|
||||
if (gen9_layer.entrypoints[index])
|
||||
return gen9_layer.entrypoints[index];
|
||||
/* fall through */
|
||||
case 8:
|
||||
if (gen8_layer.entrypoints[index])
|
||||
return gen8_layer.entrypoints[index];
|
||||
/* fall through */
|
||||
case 7:
|
||||
if (dispatch_devinfo->is_haswell && gen75_layer.entrypoints[index])
|
||||
return gen75_layer.entrypoints[index];
|
||||
|
||||
if (gen7_layer.entrypoints[index])
|
||||
return gen7_layer.entrypoints[index];
|
||||
/* fall through */
|
||||
case 0:
|
||||
return anv_layer.entrypoints[index];
|
||||
default:
|
||||
unreachable("unsupported gen\\n");
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
# Now output ifuncs and their resolve helpers for all entry points. The
|
||||
# resolve helper calls resolve_entrypoint() with the entry point index, which
|
||||
# lets the resolver look it up in the table.
|
||||
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num)
|
||||
print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name)
|
||||
|
||||
|
||||
# Now generate the hash table used for entry point look up. This is a
|
||||
# uint16_t table of entry point indices. We use 0xffff to indicate an entry
|
||||
# in the hash table is empty.
|
||||
|
||||
map = [none for f in xrange(hash_size)]
|
||||
collisions = [0 for f in xrange(10)]
|
||||
for type, name, args, num, h in entrypoints:
|
||||
level = 0
|
||||
while map[h & hash_mask] != none:
|
||||
h = h + prime_step
|
||||
level = level + 1
|
||||
if level > 9:
|
||||
collisions[9] += 1
|
||||
else:
|
||||
collisions[level] += 1
|
||||
map[h & hash_mask] = num
|
||||
|
||||
print "/* Hash table stats:"
|
||||
print " * size %d entries" % hash_size
|
||||
print " * collisions entries"
|
||||
for i in xrange(10):
|
||||
if (i == 9):
|
||||
plus = "+"
|
||||
else:
|
||||
plus = " "
|
||||
|
||||
print " * %2d%s %4d" % (i, plus, collisions[i])
|
||||
print " */\n"
|
||||
|
||||
print "#define none 0x%04x\n" % none
|
||||
|
||||
print "static const uint16_t map[] = {"
|
||||
for i in xrange(0, hash_size, 8):
|
||||
print " ",
|
||||
for j in xrange(i, i + 8):
|
||||
if map[j] & 0xffff == 0xffff:
|
||||
print " none,",
|
||||
else:
|
||||
print "0x%04x," % (map[j] & 0xffff),
|
||||
print
|
||||
|
||||
print "};"
|
||||
|
||||
# Finally we generate the hash table lookup function. The hash function and
|
||||
# linear probing algorithm matches the hash table generated above.
|
||||
|
||||
print """
|
||||
void *
|
||||
anv_lookup_entrypoint(const char *name)
|
||||
{
|
||||
static const uint32_t prime_factor = %d;
|
||||
static const uint32_t prime_step = %d;
|
||||
const struct anv_entrypoint *e;
|
||||
uint32_t hash, h, i;
|
||||
const char *p;
|
||||
|
||||
hash = 0;
|
||||
for (p = name; *p; p++)
|
||||
hash = hash * prime_factor + *p;
|
||||
|
||||
h = hash;
|
||||
do {
|
||||
i = map[h & %d];
|
||||
if (i == none)
|
||||
return NULL;
|
||||
e = &entrypoints[i];
|
||||
h += prime_step;
|
||||
} while (e->hash != hash);
|
||||
|
||||
if (strcmp(name, strings + e->name) != 0)
|
||||
return NULL;
|
||||
|
||||
return anv_resolve_entrypoint(i);
|
||||
}
|
||||
""" % (prime_factor, prime_step, hash_mask)
|
||||
601
src/intel/vulkan/anv_formats.c
Normal file
601
src/intel/vulkan/anv_formats.c
Normal file
|
|
@ -0,0 +1,601 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "brw_surface_formats.h"
|
||||
|
||||
#define RGBA { 0, 1, 2, 3 }
|
||||
#define BGRA { 2, 1, 0, 3 }
|
||||
|
||||
#define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \
|
||||
[__vk_fmt] = { \
|
||||
.vk_format = __vk_fmt, \
|
||||
.name = #__vk_fmt, \
|
||||
.isl_format = __hw_fmt, \
|
||||
.isl_layout = &isl_format_layouts[__hw_fmt], \
|
||||
.swizzle = __swizzle, \
|
||||
__VA_ARGS__ \
|
||||
}
|
||||
|
||||
#define fmt(__vk_fmt, __hw_fmt, ...) \
|
||||
swiz_fmt(__vk_fmt, __hw_fmt, RGBA, __VA_ARGS__)
|
||||
|
||||
/* HINT: For array formats, the ISL name should match the VK name. For
|
||||
* packed formats, they should have the channels in reverse order from each
|
||||
* other. The reason for this is that, for packed formats, the ISL (and
|
||||
* bspec) names are in LSB -> MSB order while VK formats are MSB -> LSB.
|
||||
*/
|
||||
static const struct anv_format anv_formats[] = {
|
||||
fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW),
|
||||
fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM),
|
||||
swiz_fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM, BGRA),
|
||||
fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM),
|
||||
swiz_fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM, BGRA),
|
||||
fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM),
|
||||
fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM),
|
||||
fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM),
|
||||
fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM),
|
||||
fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED),
|
||||
fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED),
|
||||
fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT),
|
||||
fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT),
|
||||
fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM),
|
||||
fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM),
|
||||
fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED),
|
||||
fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED),
|
||||
fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT),
|
||||
fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT),
|
||||
fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */
|
||||
fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8_UNORM),
|
||||
fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM),
|
||||
fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED),
|
||||
fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED),
|
||||
fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT),
|
||||
fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT),
|
||||
fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */
|
||||
fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM),
|
||||
fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM),
|
||||
fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED),
|
||||
fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED),
|
||||
fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT),
|
||||
fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT),
|
||||
fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB),
|
||||
fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM),
|
||||
fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM),
|
||||
fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED),
|
||||
fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED),
|
||||
fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT),
|
||||
fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT),
|
||||
fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB),
|
||||
fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM),
|
||||
fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM),
|
||||
fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED),
|
||||
fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED),
|
||||
fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT),
|
||||
fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT),
|
||||
fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM),
|
||||
fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM),
|
||||
fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED),
|
||||
fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED),
|
||||
fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT),
|
||||
fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT),
|
||||
fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM),
|
||||
fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM),
|
||||
fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED),
|
||||
fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED),
|
||||
fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT),
|
||||
fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT),
|
||||
fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT),
|
||||
fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM),
|
||||
fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM),
|
||||
fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED),
|
||||
fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED),
|
||||
fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT),
|
||||
fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT),
|
||||
fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT),
|
||||
fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM),
|
||||
fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM),
|
||||
fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED),
|
||||
fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED),
|
||||
fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT),
|
||||
fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT),
|
||||
fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT),
|
||||
fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM),
|
||||
fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM),
|
||||
fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED),
|
||||
fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED),
|
||||
fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT),
|
||||
fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT),
|
||||
fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT),
|
||||
fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT,),
|
||||
fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT,),
|
||||
fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT,),
|
||||
fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT,),
|
||||
fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT,),
|
||||
fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT,),
|
||||
fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT,),
|
||||
fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT,),
|
||||
fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT,),
|
||||
fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT,),
|
||||
fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT,),
|
||||
fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT,),
|
||||
fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU),
|
||||
fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU),
|
||||
fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT),
|
||||
fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU),
|
||||
fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU),
|
||||
fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT),
|
||||
fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU),
|
||||
fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU),
|
||||
fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT),
|
||||
fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU),
|
||||
fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU),
|
||||
fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT),
|
||||
fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT),
|
||||
fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP),
|
||||
|
||||
fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .has_depth = true),
|
||||
fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true),
|
||||
fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true),
|
||||
fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true),
|
||||
fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true),
|
||||
fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true),
|
||||
|
||||
fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB),
|
||||
fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB),
|
||||
fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM),
|
||||
fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB),
|
||||
fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM),
|
||||
fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_BC2_UNORM_SRGB),
|
||||
fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_BC3_UNORM),
|
||||
fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_BC3_UNORM_SRGB),
|
||||
fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_BC4_UNORM),
|
||||
fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_BC4_SNORM),
|
||||
fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_BC5_UNORM),
|
||||
fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_BC5_SNORM),
|
||||
fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_BC6H_UF16),
|
||||
fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_BC6H_SF16),
|
||||
fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_BC7_UNORM),
|
||||
fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_BC7_UNORM_SRGB),
|
||||
fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8),
|
||||
fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8),
|
||||
fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA),
|
||||
fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8_PTA),
|
||||
fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, ISL_FORMAT_ETC2_EAC_RGBA8),
|
||||
fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, ISL_FORMAT_ETC2_EAC_SRGB8_A8),
|
||||
fmt(VK_FORMAT_EAC_R11_UNORM_BLOCK, ISL_FORMAT_EAC_R11),
|
||||
fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11),
|
||||
fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK, ISL_FORMAT_EAC_RG11),
|
||||
fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_RG11),
|
||||
fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8_SSCALED, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM),
|
||||
fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED),
|
||||
fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB),
|
||||
};
|
||||
|
||||
#undef fmt
|
||||
|
||||
const struct anv_format *
|
||||
anv_format_for_vk_format(VkFormat format)
|
||||
{
|
||||
return &anv_formats[format];
|
||||
}
|
||||
|
||||
/**
|
||||
* Exactly one bit must be set in \a aspect.
|
||||
*/
|
||||
enum isl_format
|
||||
anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect,
|
||||
VkImageTiling tiling, struct anv_format_swizzle *swizzle)
|
||||
{
|
||||
const struct anv_format *anv_fmt = &anv_formats[format];
|
||||
|
||||
if (swizzle)
|
||||
*swizzle = anv_fmt->swizzle;
|
||||
|
||||
switch (aspect) {
|
||||
case VK_IMAGE_ASPECT_COLOR_BIT:
|
||||
if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) {
|
||||
return ISL_FORMAT_UNSUPPORTED;
|
||||
} else if (tiling == VK_IMAGE_TILING_OPTIMAL &&
|
||||
!util_is_power_of_two(anv_fmt->isl_layout->bs)) {
|
||||
/* Tiled formats *must* be power-of-two because we need up upload
|
||||
* them with the render pipeline. For 3-channel formats, we fix
|
||||
* this by switching them over to RGBX or RGBA formats under the
|
||||
* hood.
|
||||
*/
|
||||
enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format);
|
||||
if (rgbx != ISL_FORMAT_UNSUPPORTED)
|
||||
return rgbx;
|
||||
else
|
||||
return isl_format_rgb_to_rgba(anv_fmt->isl_format);
|
||||
} else {
|
||||
return anv_fmt->isl_format;
|
||||
}
|
||||
|
||||
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
||||
case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT):
|
||||
assert(anv_fmt->has_depth);
|
||||
return anv_fmt->isl_format;
|
||||
|
||||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
assert(anv_fmt->has_stencil);
|
||||
return ISL_FORMAT_R8_UINT;
|
||||
|
||||
default:
|
||||
unreachable("bad VkImageAspect");
|
||||
return ISL_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
}
|
||||
|
||||
// Format capabilities
|
||||
|
||||
void anv_validate_GetPhysicalDeviceFormatProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkFormat _format,
|
||||
VkFormatProperties* pFormatProperties)
|
||||
{
|
||||
const struct anv_format *format = anv_format_for_vk_format(_format);
|
||||
fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name);
|
||||
anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties);
|
||||
}
|
||||
|
||||
static VkFormatFeatureFlags
|
||||
get_image_format_properties(int gen, enum isl_format base,
|
||||
enum isl_format actual,
|
||||
struct anv_format_swizzle swizzle)
|
||||
{
|
||||
const struct brw_surface_format_info *info = &surface_formats[actual];
|
||||
|
||||
if (actual == ISL_FORMAT_UNSUPPORTED || !info->exists)
|
||||
return 0;
|
||||
|
||||
VkFormatFeatureFlags flags = 0;
|
||||
if (info->sampling <= gen) {
|
||||
flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
|
||||
VK_FORMAT_FEATURE_BLIT_SRC_BIT;
|
||||
|
||||
if (info->filtering <= gen)
|
||||
flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
|
||||
}
|
||||
|
||||
/* We can render to swizzled formats. However, if the alpha channel is
|
||||
* moved, then blending won't work correctly. The PRM tells us
|
||||
* straight-up not to render to such a surface.
|
||||
*/
|
||||
if (info->render_target <= gen && swizzle.a == 3) {
|
||||
flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
|
||||
VK_FORMAT_FEATURE_BLIT_DST_BIT;
|
||||
}
|
||||
|
||||
if (info->alpha_blend <= gen && swizzle.a == 3)
|
||||
flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
|
||||
|
||||
/* Load/store is determined based on base format. This prevents RGB
|
||||
* formats from showing up as load/store capable.
|
||||
*/
|
||||
if (isl_is_storage_image_format(base))
|
||||
flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
|
||||
|
||||
if (base == ISL_FORMAT_R32_SINT || base == ISL_FORMAT_R32_UINT)
|
||||
flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static VkFormatFeatureFlags
|
||||
get_buffer_format_properties(int gen, enum isl_format format)
|
||||
{
|
||||
const struct brw_surface_format_info *info = &surface_formats[format];
|
||||
|
||||
if (format == ISL_FORMAT_UNSUPPORTED || !info->exists)
|
||||
return 0;
|
||||
|
||||
VkFormatFeatureFlags flags = 0;
|
||||
if (info->sampling <= gen && !isl_format_is_compressed(format))
|
||||
flags |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
|
||||
|
||||
if (info->input_vb <= gen)
|
||||
flags |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
|
||||
|
||||
if (isl_is_storage_image_format(format))
|
||||
flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
|
||||
|
||||
if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT)
|
||||
flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_physical_device_get_format_properties(struct anv_physical_device *physical_device,
|
||||
VkFormat format,
|
||||
VkFormatProperties *out_properties)
|
||||
{
|
||||
int gen = physical_device->info->gen * 10;
|
||||
if (physical_device->info->is_haswell)
|
||||
gen += 5;
|
||||
|
||||
VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
|
||||
if (anv_format_is_depth_or_stencil(&anv_formats[format])) {
|
||||
tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
if (physical_device->info->gen >= 8)
|
||||
tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
|
||||
|
||||
tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_BLIT_DST_BIT;
|
||||
} else {
|
||||
enum isl_format linear_fmt, tiled_fmt;
|
||||
struct anv_format_swizzle linear_swizzle, tiled_swizzle;
|
||||
linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
VK_IMAGE_TILING_LINEAR, &linear_swizzle);
|
||||
tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle);
|
||||
|
||||
linear = get_image_format_properties(gen, linear_fmt, linear_fmt,
|
||||
linear_swizzle);
|
||||
tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt,
|
||||
tiled_swizzle);
|
||||
buffer = get_buffer_format_properties(gen, linear_fmt);
|
||||
|
||||
/* XXX: We handle 3-channel formats by switching them out for RGBX or
|
||||
* RGBA formats behind-the-scenes. This works fine for textures
|
||||
* because the upload process will fill in the extra channel.
|
||||
* We could also support it for render targets, but it will take
|
||||
* substantially more work and we have enough RGBX formats to handle
|
||||
* what most clients will want.
|
||||
*/
|
||||
if (linear_fmt != ISL_FORMAT_UNSUPPORTED &&
|
||||
!util_is_power_of_two(isl_format_layouts[linear_fmt].bs) &&
|
||||
isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) {
|
||||
tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT &
|
||||
~VK_FORMAT_FEATURE_BLIT_DST_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
out_properties->linearTilingFeatures = linear;
|
||||
out_properties->optimalTilingFeatures = tiled;
|
||||
out_properties->bufferFeatures = buffer;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void anv_GetPhysicalDeviceFormatProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkFormat format,
|
||||
VkFormatProperties* pFormatProperties)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
|
||||
|
||||
anv_physical_device_get_format_properties(
|
||||
physical_device,
|
||||
format,
|
||||
pFormatProperties);
|
||||
}
|
||||
|
||||
VkResult anv_GetPhysicalDeviceImageFormatProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkFormat format,
|
||||
VkImageType type,
|
||||
VkImageTiling tiling,
|
||||
VkImageUsageFlags usage,
|
||||
VkImageCreateFlags createFlags,
|
||||
VkImageFormatProperties* pImageFormatProperties)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
|
||||
VkFormatProperties format_props;
|
||||
VkFormatFeatureFlags format_feature_flags;
|
||||
VkExtent3D maxExtent;
|
||||
uint32_t maxMipLevels;
|
||||
uint32_t maxArraySize;
|
||||
VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
|
||||
|
||||
anv_physical_device_get_format_properties(physical_device, format,
|
||||
&format_props);
|
||||
|
||||
/* Extract the VkFormatFeatureFlags that are relevant for the queried
|
||||
* tiling.
|
||||
*/
|
||||
if (tiling == VK_IMAGE_TILING_LINEAR) {
|
||||
format_feature_flags = format_props.linearTilingFeatures;
|
||||
} else if (tiling == VK_IMAGE_TILING_OPTIMAL) {
|
||||
format_feature_flags = format_props.optimalTilingFeatures;
|
||||
} else {
|
||||
unreachable("bad VkImageTiling");
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
default:
|
||||
unreachable("bad VkImageType");
|
||||
case VK_IMAGE_TYPE_1D:
|
||||
maxExtent.width = 16384;
|
||||
maxExtent.height = 1;
|
||||
maxExtent.depth = 1;
|
||||
maxMipLevels = 15; /* log2(maxWidth) + 1 */
|
||||
maxArraySize = 2048;
|
||||
sampleCounts = VK_SAMPLE_COUNT_1_BIT;
|
||||
break;
|
||||
case VK_IMAGE_TYPE_2D:
|
||||
/* FINISHME: Does this really differ for cube maps? The documentation
|
||||
* for RENDER_SURFACE_STATE suggests so.
|
||||
*/
|
||||
maxExtent.width = 16384;
|
||||
maxExtent.height = 16384;
|
||||
maxExtent.depth = 1;
|
||||
maxMipLevels = 15; /* log2(maxWidth) + 1 */
|
||||
maxArraySize = 2048;
|
||||
break;
|
||||
case VK_IMAGE_TYPE_3D:
|
||||
maxExtent.width = 2048;
|
||||
maxExtent.height = 2048;
|
||||
maxExtent.depth = 2048;
|
||||
maxMipLevels = 12; /* log2(maxWidth) + 1 */
|
||||
maxArraySize = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (tiling == VK_IMAGE_TILING_OPTIMAL &&
|
||||
type == VK_IMAGE_TYPE_2D &&
|
||||
(format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
|
||||
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
|
||||
!(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
|
||||
!(usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
|
||||
sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev);
|
||||
}
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
|
||||
/* Meta implements transfers by sampling from the source image. */
|
||||
if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
|
||||
goto unsupported;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
|
||||
if (anv_format_for_vk_format(format)->has_stencil) {
|
||||
/* Not yet implemented because copying to a W-tiled surface is crazy
|
||||
* hard.
|
||||
*/
|
||||
anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for "
|
||||
"stencil format");
|
||||
goto unsupported;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
|
||||
if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
|
||||
goto unsupported;
|
||||
}
|
||||
}
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_STORAGE_BIT) {
|
||||
if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
|
||||
goto unsupported;
|
||||
}
|
||||
}
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
|
||||
if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
|
||||
goto unsupported;
|
||||
}
|
||||
}
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
|
||||
if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
|
||||
goto unsupported;
|
||||
}
|
||||
}
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) {
|
||||
/* Nothing to check. */
|
||||
}
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
|
||||
/* Ignore this flag because it was removed from the
|
||||
* provisional_I_20150910 header.
|
||||
*/
|
||||
}
|
||||
|
||||
*pImageFormatProperties = (VkImageFormatProperties) {
|
||||
.maxExtent = maxExtent,
|
||||
.maxMipLevels = maxMipLevels,
|
||||
.maxArrayLayers = maxArraySize,
|
||||
.sampleCounts = sampleCounts,
|
||||
|
||||
/* FINISHME: Accurately calculate
|
||||
* VkImageFormatProperties::maxResourceSize.
|
||||
*/
|
||||
.maxResourceSize = UINT32_MAX,
|
||||
};
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
unsupported:
|
||||
*pImageFormatProperties = (VkImageFormatProperties) {
|
||||
.maxExtent = { 0, 0, 0 },
|
||||
.maxMipLevels = 0,
|
||||
.maxArrayLayers = 0,
|
||||
.sampleCounts = 0,
|
||||
.maxResourceSize = 0,
|
||||
};
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_GetPhysicalDeviceSparseImageFormatProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkFormat format,
|
||||
VkImageType type,
|
||||
uint32_t samples,
|
||||
VkImageUsageFlags usage,
|
||||
VkImageTiling tiling,
|
||||
uint32_t* pNumProperties,
|
||||
VkSparseImageFormatProperties* pProperties)
|
||||
{
|
||||
/* Sparse images are not yet supported. */
|
||||
*pNumProperties = 0;
|
||||
}
|
||||
335
src/intel/vulkan/anv_gem.c
Normal file
335
src/intel/vulkan/anv_gem.c
Normal file
|
|
@ -0,0 +1,335 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define _DEFAULT_SOURCE
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
static int
|
||||
anv_ioctl(int fd, unsigned long request, void *arg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
do {
|
||||
ret = ioctl(fd, request, arg);
|
||||
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around DRM_IOCTL_I915_GEM_CREATE.
|
||||
*
|
||||
* Return gem handle, or 0 on failure. Gem handles are never 0.
|
||||
*/
|
||||
uint32_t
|
||||
anv_gem_create(struct anv_device *device, size_t size)
|
||||
{
|
||||
struct drm_i915_gem_create gem_create = {
|
||||
.size = size,
|
||||
};
|
||||
|
||||
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
|
||||
if (ret != 0) {
|
||||
/* FIXME: What do we do if this fails? */
|
||||
return 0;
|
||||
}
|
||||
|
||||
return gem_create.handle;
|
||||
}
|
||||
|
||||
void
|
||||
anv_gem_close(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
struct drm_gem_close close = {
|
||||
.handle = gem_handle,
|
||||
};
|
||||
|
||||
anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around DRM_IOCTL_I915_GEM_MMAP.
|
||||
*/
|
||||
void*
|
||||
anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
|
||||
uint64_t offset, uint64_t size, uint32_t flags)
|
||||
{
|
||||
struct drm_i915_gem_mmap gem_mmap = {
|
||||
.handle = gem_handle,
|
||||
.offset = offset,
|
||||
.size = size,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap);
|
||||
if (ret != 0) {
|
||||
/* FIXME: Is NULL the right error return? Cf MAP_INVALID */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1));
|
||||
return (void *)(uintptr_t) gem_mmap.addr_ptr;
|
||||
}
|
||||
|
||||
/* This is just a wrapper around munmap, but it also notifies valgrind that
|
||||
* this map is no longer valid. Pair this with anv_gem_mmap().
|
||||
*/
|
||||
void
|
||||
anv_gem_munmap(void *p, uint64_t size)
|
||||
{
|
||||
VG(VALGRIND_FREELIKE_BLOCK(p, 0));
|
||||
munmap(p, size);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
|
||||
{
|
||||
struct drm_i915_gem_userptr userptr = {
|
||||
.user_ptr = (__u64)((unsigned long) mem),
|
||||
.user_size = size,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
|
||||
if (ret == -1)
|
||||
return 0;
|
||||
|
||||
return userptr.handle;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_caching(struct anv_device *device,
|
||||
uint32_t gem_handle, uint32_t caching)
|
||||
{
|
||||
struct drm_i915_gem_caching gem_caching = {
|
||||
.handle = gem_handle,
|
||||
.caching = caching,
|
||||
};
|
||||
|
||||
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching);
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
|
||||
uint32_t read_domains, uint32_t write_domain)
|
||||
{
|
||||
struct drm_i915_gem_set_domain gem_set_domain = {
|
||||
.handle = gem_handle,
|
||||
.read_domains = read_domains,
|
||||
.write_domain = write_domain,
|
||||
};
|
||||
|
||||
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain);
|
||||
}
|
||||
|
||||
/**
|
||||
* On error, \a timeout_ns holds the remaining time.
|
||||
*/
|
||||
int
|
||||
anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
|
||||
{
|
||||
struct drm_i915_gem_wait wait = {
|
||||
.bo_handle = gem_handle,
|
||||
.timeout_ns = *timeout_ns,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
|
||||
*timeout_ns = wait.timeout_ns;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_execbuffer(struct anv_device *device,
|
||||
struct drm_i915_gem_execbuffer2 *execbuf)
|
||||
{
|
||||
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_tiling(struct anv_device *device,
|
||||
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* set_tiling overwrites the input on the error path, so we have to open
|
||||
* code anv_ioctl.
|
||||
*/
|
||||
do {
|
||||
struct drm_i915_gem_set_tiling set_tiling = {
|
||||
.handle = gem_handle,
|
||||
.tiling_mode = tiling,
|
||||
.stride = stride,
|
||||
};
|
||||
|
||||
ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
|
||||
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_get_param(int fd, uint32_t param)
|
||||
{
|
||||
int tmp;
|
||||
|
||||
drm_i915_getparam_t gp = {
|
||||
.param = param,
|
||||
.value = &tmp,
|
||||
};
|
||||
|
||||
int ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
|
||||
if (ret == 0)
|
||||
return tmp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
anv_gem_get_bit6_swizzle(int fd, uint32_t tiling)
|
||||
{
|
||||
struct drm_gem_close close;
|
||||
int ret;
|
||||
|
||||
struct drm_i915_gem_create gem_create = {
|
||||
.size = 4096,
|
||||
};
|
||||
|
||||
if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) {
|
||||
assert(!"Failed to create GEM BO");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool swizzled = false;
|
||||
|
||||
/* set_tiling overwrites the input on the error path, so we have to open
|
||||
* code anv_ioctl.
|
||||
*/
|
||||
do {
|
||||
struct drm_i915_gem_set_tiling set_tiling = {
|
||||
.handle = gem_create.handle,
|
||||
.tiling_mode = tiling,
|
||||
.stride = tiling == I915_TILING_X ? 512 : 128,
|
||||
};
|
||||
|
||||
ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
|
||||
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
|
||||
|
||||
if (ret != 0) {
|
||||
assert(!"Failed to set BO tiling");
|
||||
goto close_and_return;
|
||||
}
|
||||
|
||||
struct drm_i915_gem_get_tiling get_tiling = {
|
||||
.handle = gem_create.handle,
|
||||
};
|
||||
|
||||
if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) {
|
||||
assert(!"Failed to get BO tiling");
|
||||
goto close_and_return;
|
||||
}
|
||||
|
||||
swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE;
|
||||
|
||||
close_and_return:
|
||||
|
||||
memset(&close, 0, sizeof(close));
|
||||
close.handle = gem_create.handle;
|
||||
anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
|
||||
|
||||
return swizzled;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_create_context(struct anv_device *device)
|
||||
{
|
||||
struct drm_i915_gem_context_create create = { 0 };
|
||||
|
||||
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
|
||||
if (ret == -1)
|
||||
return -1;
|
||||
|
||||
return create.ctx_id;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_destroy_context(struct anv_device *device, int context)
|
||||
{
|
||||
struct drm_i915_gem_context_destroy destroy = {
|
||||
.ctx_id = context,
|
||||
};
|
||||
|
||||
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_get_aperture(int fd, uint64_t *size)
|
||||
{
|
||||
struct drm_i915_gem_get_aperture aperture = { 0 };
|
||||
|
||||
int ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
|
||||
if (ret == -1)
|
||||
return -1;
|
||||
|
||||
*size = aperture.aper_available_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
struct drm_prime_handle args = {
|
||||
.handle = gem_handle,
|
||||
.flags = DRM_CLOEXEC,
|
||||
};
|
||||
|
||||
int ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
|
||||
if (ret == -1)
|
||||
return -1;
|
||||
|
||||
return args.fd;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_fd_to_handle(struct anv_device *device, int fd)
|
||||
{
|
||||
struct drm_prime_handle args = {
|
||||
.fd = fd,
|
||||
};
|
||||
|
||||
int ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
|
||||
if (ret == -1)
|
||||
return 0;
|
||||
|
||||
return args.handle;
|
||||
}
|
||||
159
src/intel/vulkan/anv_gem_stubs.c
Normal file
159
src/intel/vulkan/anv_gem_stubs.c
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#define _DEFAULT_SOURCE
|
||||
|
||||
#include <linux/memfd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
static inline int
|
||||
memfd_create(const char *name, unsigned int flags)
|
||||
{
|
||||
return syscall(SYS_memfd_create, name, flags);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_create(struct anv_device *device, size_t size)
|
||||
{
|
||||
int fd = memfd_create("fake bo", MFD_CLOEXEC);
|
||||
if (fd == -1)
|
||||
return 0;
|
||||
|
||||
assert(fd != 0);
|
||||
|
||||
if (ftruncate(fd, size) == -1)
|
||||
return 0;
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
void
|
||||
anv_gem_close(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
close(gem_handle);
|
||||
}
|
||||
|
||||
void*
|
||||
anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
|
||||
uint64_t offset, uint64_t size, uint32_t flags)
|
||||
{
|
||||
/* Ignore flags, as they're specific to I915_GEM_MMAP. */
|
||||
(void) flags;
|
||||
|
||||
return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
gem_handle, offset);
|
||||
}
|
||||
|
||||
/* This is just a wrapper around munmap, but it also notifies valgrind that
|
||||
* this map is no longer valid. Pair this with anv_gem_mmap().
|
||||
*/
|
||||
void
|
||||
anv_gem_munmap(void *p, uint64_t size)
|
||||
{
|
||||
munmap(p, size);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_execbuffer(struct anv_device *device,
|
||||
struct drm_i915_gem_execbuffer2 *execbuf)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_tiling(struct anv_device *device,
|
||||
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle,
|
||||
uint32_t caching)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
|
||||
uint32_t read_domains, uint32_t write_domain)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_get_param(int fd, uint32_t param)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
bool
|
||||
anv_gem_get_bit6_swizzle(int fd, uint32_t tiling)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_create_context(struct anv_device *device)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_destroy_context(struct anv_device *device, int context)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_get_aperture(int fd, uint64_t *size)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
int
|
||||
anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_gem_fd_to_handle(struct anv_device *device, int fd)
|
||||
{
|
||||
unreachable("Unused");
|
||||
}
|
||||
66
src/intel/vulkan/anv_genX.h
Normal file
66
src/intel/vulkan/anv_genX.h
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Gen-specific function declarations. This header must *not* be included
|
||||
* directly. Instead, it is included multiple times by gen8_private.h.
|
||||
*
|
||||
* In this header file, the usual genx() macro is available.
|
||||
*/
|
||||
|
||||
VkResult genX(init_device_state)(struct anv_device *device);
|
||||
|
||||
void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
struct anv_state
|
||||
genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_framebuffer *fb);
|
||||
|
||||
void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_subpass *subpass);
|
||||
|
||||
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
|
||||
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
|
||||
bool enable_slm);
|
||||
|
||||
void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
VkResult
|
||||
genX(graphics_pipeline_create)(VkDevice _device,
|
||||
struct anv_pipeline_cache *cache,
|
||||
const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
||||
const struct anv_graphics_pipeline_create_info *extra,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
VkPipeline *pPipeline);
|
||||
|
||||
VkResult
|
||||
genX(compute_pipeline_create)(VkDevice _device,
|
||||
struct anv_pipeline_cache *cache,
|
||||
const VkComputePipelineCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
VkPipeline *pPipeline);
|
||||
787
src/intel/vulkan/anv_image.c
Normal file
787
src/intel/vulkan/anv_image.c
Normal file
|
|
@ -0,0 +1,787 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
/**
|
||||
* Exactly one bit must be set in \a aspect.
|
||||
*/
|
||||
static isl_surf_usage_flags_t
|
||||
choose_isl_surf_usage(VkImageUsageFlags vk_usage,
|
||||
VkImageAspectFlags aspect)
|
||||
{
|
||||
isl_surf_usage_flags_t isl_usage = 0;
|
||||
|
||||
/* FINISHME: Support aux surfaces */
|
||||
isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
|
||||
isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)
|
||||
isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
|
||||
isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
|
||||
|
||||
if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
|
||||
isl_usage |= ISL_SURF_USAGE_CUBE_BIT;
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
|
||||
switch (aspect) {
|
||||
default:
|
||||
unreachable("bad VkImageAspect");
|
||||
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
||||
isl_usage |= ISL_SURF_USAGE_DEPTH_BIT;
|
||||
break;
|
||||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
isl_usage |= ISL_SURF_USAGE_STENCIL_BIT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
|
||||
/* Meta implements transfers by sampling from the source image. */
|
||||
isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
}
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
|
||||
/* Meta implements transfers by rendering into the destination image. */
|
||||
isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
|
||||
}
|
||||
|
||||
return isl_usage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exactly one bit must be set in \a aspect.
|
||||
*/
|
||||
static struct anv_surface *
|
||||
get_surface(struct anv_image *image, VkImageAspectFlags aspect)
|
||||
{
|
||||
switch (aspect) {
|
||||
default:
|
||||
unreachable("bad VkImageAspect");
|
||||
case VK_IMAGE_ASPECT_COLOR_BIT:
|
||||
return &image->color_surface;
|
||||
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
||||
return &image->depth_surface;
|
||||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
return &image->stencil_surface;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the anv_image::*_surface selected by \a aspect. Then update the
|
||||
* image's memory requirements (that is, the image's size and alignment).
|
||||
*
|
||||
* Exactly one bit must be set in \a aspect.
|
||||
*/
|
||||
static VkResult
|
||||
make_surface(const struct anv_device *dev,
|
||||
struct anv_image *image,
|
||||
const struct anv_image_create_info *anv_info,
|
||||
VkImageAspectFlags aspect)
|
||||
{
|
||||
const VkImageCreateInfo *vk_info = anv_info->vk_info;
|
||||
bool ok UNUSED;
|
||||
|
||||
static const enum isl_surf_dim vk_to_isl_surf_dim[] = {
|
||||
[VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D,
|
||||
[VK_IMAGE_TYPE_2D] = ISL_SURF_DIM_2D,
|
||||
[VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D,
|
||||
};
|
||||
|
||||
isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags;
|
||||
if (vk_info->tiling == VK_IMAGE_TILING_LINEAR)
|
||||
tiling_flags = ISL_TILING_LINEAR_BIT;
|
||||
|
||||
struct anv_surface *anv_surf = get_surface(image, aspect);
|
||||
|
||||
image->extent = anv_sanitize_image_extent(vk_info->imageType,
|
||||
vk_info->extent);
|
||||
|
||||
ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl,
|
||||
.dim = vk_to_isl_surf_dim[vk_info->imageType],
|
||||
.format = anv_get_isl_format(vk_info->format, aspect,
|
||||
vk_info->tiling, NULL),
|
||||
.width = image->extent.width,
|
||||
.height = image->extent.height,
|
||||
.depth = image->extent.depth,
|
||||
.levels = vk_info->mipLevels,
|
||||
.array_len = vk_info->arrayLayers,
|
||||
.samples = vk_info->samples,
|
||||
.min_alignment = 0,
|
||||
.min_pitch = anv_info->stride,
|
||||
.usage = choose_isl_surf_usage(image->usage, aspect),
|
||||
.tiling_flags = tiling_flags);
|
||||
|
||||
/* isl_surf_init() will fail only if provided invalid input. Invalid input
|
||||
* is illegal in Vulkan.
|
||||
*/
|
||||
assert(ok);
|
||||
|
||||
anv_surf->offset = align_u32(image->size, anv_surf->isl.alignment);
|
||||
image->size = anv_surf->offset + anv_surf->isl.size;
|
||||
image->alignment = MAX(image->alignment, anv_surf->isl.alignment);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameter @a format is required and overrides VkImageCreateInfo::format.
|
||||
*/
|
||||
static VkImageUsageFlags
|
||||
anv_image_get_full_usage(const VkImageCreateInfo *info,
|
||||
const struct anv_format *format)
|
||||
{
|
||||
VkImageUsageFlags usage = info->usage;
|
||||
|
||||
if (info->samples > 1 &&
|
||||
(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
|
||||
/* Meta will resolve the image by binding it as a texture. */
|
||||
usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
}
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
|
||||
/* Meta will transfer from the image by binding it as a texture. */
|
||||
usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
}
|
||||
|
||||
if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
|
||||
/* For non-clear transfer operations, meta will transfer to the image by
|
||||
* binding it as a color attachment, even if the image format is not
|
||||
* a color format.
|
||||
*/
|
||||
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
|
||||
if (anv_format_is_depth_or_stencil(format)) {
|
||||
/* vkCmdClearDepthStencilImage() only requires that
|
||||
* VK_IMAGE_USAGE_TRANSFER_SRC_BIT be set. In particular, it does
|
||||
* not require VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT. Meta
|
||||
* clears the image, though, by binding it as a depthstencil
|
||||
* attachment.
|
||||
*/
|
||||
usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
return usage;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_image_create(VkDevice _device,
|
||||
const struct anv_image_create_info *create_info,
|
||||
const VkAllocationCallbacks* alloc,
|
||||
VkImage *pImage)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
|
||||
struct anv_image *image = NULL;
|
||||
const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format);
|
||||
VkResult r;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
|
||||
|
||||
anv_assert(pCreateInfo->mipLevels > 0);
|
||||
anv_assert(pCreateInfo->arrayLayers > 0);
|
||||
anv_assert(pCreateInfo->samples > 0);
|
||||
anv_assert(pCreateInfo->extent.width > 0);
|
||||
anv_assert(pCreateInfo->extent.height > 0);
|
||||
anv_assert(pCreateInfo->extent.depth > 0);
|
||||
|
||||
image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!image)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
memset(image, 0, sizeof(*image));
|
||||
image->type = pCreateInfo->imageType;
|
||||
image->extent = pCreateInfo->extent;
|
||||
image->vk_format = pCreateInfo->format;
|
||||
image->format = format;
|
||||
image->levels = pCreateInfo->mipLevels;
|
||||
image->array_size = pCreateInfo->arrayLayers;
|
||||
image->samples = pCreateInfo->samples;
|
||||
image->usage = anv_image_get_full_usage(pCreateInfo, format);
|
||||
image->tiling = pCreateInfo->tiling;
|
||||
|
||||
if (likely(anv_format_is_color(format))) {
|
||||
r = make_surface(device, image, create_info,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
} else {
|
||||
if (image->format->has_depth) {
|
||||
r = make_surface(device, image, create_info,
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (image->format->has_stencil) {
|
||||
r = make_surface(device, image, create_info,
|
||||
VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
if (r != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
*pImage = anv_image_to_handle(image);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
if (image)
|
||||
anv_free2(&device->alloc, alloc, image);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_CreateImage(VkDevice device,
|
||||
const VkImageCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkImage *pImage)
|
||||
{
|
||||
return anv_image_create(device,
|
||||
&(struct anv_image_create_info) {
|
||||
.vk_info = pCreateInfo,
|
||||
.isl_tiling_flags = ISL_TILING_ANY_MASK,
|
||||
},
|
||||
pAllocator,
|
||||
pImage);
|
||||
}
|
||||
|
||||
void
|
||||
anv_DestroyImage(VkDevice _device, VkImage _image,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
|
||||
anv_free2(&device->alloc, pAllocator, anv_image_from_handle(_image));
|
||||
}
|
||||
|
||||
static void
|
||||
anv_surface_get_subresource_layout(struct anv_image *image,
|
||||
struct anv_surface *surface,
|
||||
const VkImageSubresource *subresource,
|
||||
VkSubresourceLayout *layout)
|
||||
{
|
||||
/* If we are on a non-zero mip level or array slice, we need to
|
||||
* calculate a real offset.
|
||||
*/
|
||||
anv_assert(subresource->mipLevel == 0);
|
||||
anv_assert(subresource->arrayLayer == 0);
|
||||
|
||||
layout->offset = surface->offset;
|
||||
layout->rowPitch = surface->isl.row_pitch;
|
||||
layout->depthPitch = isl_surf_get_array_pitch(&surface->isl);
|
||||
layout->arrayPitch = isl_surf_get_array_pitch(&surface->isl);
|
||||
layout->size = surface->isl.size;
|
||||
}
|
||||
|
||||
void anv_GetImageSubresourceLayout(
|
||||
VkDevice device,
|
||||
VkImage _image,
|
||||
const VkImageSubresource* pSubresource,
|
||||
VkSubresourceLayout* pLayout)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_image, image, _image);
|
||||
|
||||
assert(__builtin_popcount(pSubresource->aspectMask) == 1);
|
||||
|
||||
switch (pSubresource->aspectMask) {
|
||||
case VK_IMAGE_ASPECT_COLOR_BIT:
|
||||
anv_surface_get_subresource_layout(image, &image->color_surface,
|
||||
pSubresource, pLayout);
|
||||
break;
|
||||
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
||||
anv_surface_get_subresource_layout(image, &image->depth_surface,
|
||||
pSubresource, pLayout);
|
||||
break;
|
||||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
anv_surface_get_subresource_layout(image, &image->stencil_surface,
|
||||
pSubresource, pLayout);
|
||||
break;
|
||||
default:
|
||||
assert(!"Invalid image aspect");
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_validate_CreateImageView(VkDevice _device,
|
||||
const VkImageViewCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkImageView *pView)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image);
|
||||
const VkImageSubresourceRange *subresource;
|
||||
const struct anv_format *view_format_info;
|
||||
|
||||
/* Validate structure type before dereferencing it. */
|
||||
assert(pCreateInfo);
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO);
|
||||
subresource = &pCreateInfo->subresourceRange;
|
||||
|
||||
/* Validate viewType is in range before using it. */
|
||||
assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE);
|
||||
assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE);
|
||||
|
||||
/* Validate format is in range before using it. */
|
||||
assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE);
|
||||
assert(pCreateInfo->format <= VK_FORMAT_END_RANGE);
|
||||
view_format_info = anv_format_for_vk_format(pCreateInfo->format);
|
||||
|
||||
/* Validate channel swizzles. */
|
||||
assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
|
||||
assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE);
|
||||
assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
|
||||
assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE);
|
||||
assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
|
||||
assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE);
|
||||
assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
|
||||
assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE);
|
||||
|
||||
/* Validate subresource. */
|
||||
assert(subresource->aspectMask != 0);
|
||||
assert(subresource->levelCount > 0);
|
||||
assert(subresource->layerCount > 0);
|
||||
assert(subresource->baseMipLevel < image->levels);
|
||||
assert(subresource->baseMipLevel + anv_get_levelCount(image, subresource) <= image->levels);
|
||||
assert(subresource->baseArrayLayer < image->array_size);
|
||||
assert(subresource->baseArrayLayer + anv_get_layerCount(image, subresource) <= image->array_size);
|
||||
assert(pView);
|
||||
|
||||
const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT
|
||||
| VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
||||
/* Validate format. */
|
||||
if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
assert(!image->format->has_depth);
|
||||
assert(!image->format->has_stencil);
|
||||
assert(!view_format_info->has_depth);
|
||||
assert(!view_format_info->has_stencil);
|
||||
assert(view_format_info->isl_layout->bs ==
|
||||
image->format->isl_layout->bs);
|
||||
} else if (subresource->aspectMask & ds_flags) {
|
||||
assert((subresource->aspectMask & ~ds_flags) == 0);
|
||||
|
||||
if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
assert(image->format->has_depth);
|
||||
assert(view_format_info->has_depth);
|
||||
assert(view_format_info->isl_layout->bs ==
|
||||
image->format->isl_layout->bs);
|
||||
}
|
||||
|
||||
if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
/* FINISHME: Is it legal to have an R8 view of S8? */
|
||||
assert(image->format->has_stencil);
|
||||
assert(view_format_info->has_stencil);
|
||||
}
|
||||
} else {
|
||||
assert(!"bad VkImageSubresourceRange::aspectFlags");
|
||||
}
|
||||
|
||||
return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView);
|
||||
}
|
||||
|
||||
static struct anv_state
|
||||
alloc_surface_state(struct anv_device *device,
|
||||
struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
if (cmd_buffer) {
|
||||
return anv_cmd_buffer_alloc_surface_state(cmd_buffer);
|
||||
} else {
|
||||
return anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
has_matching_storage_typed_format(const struct anv_device *device,
|
||||
enum isl_format format)
|
||||
{
|
||||
return (isl_format_get_layout(format)->bs <= 4 ||
|
||||
(isl_format_get_layout(format)->bs <= 8 &&
|
||||
(device->info.gen >= 8 || device->info.is_haswell)) ||
|
||||
device->info.gen >= 9);
|
||||
}
|
||||
|
||||
static enum isl_channel_select
|
||||
remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component,
|
||||
struct anv_format_swizzle format_swizzle)
|
||||
{
|
||||
if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY)
|
||||
swizzle = component;
|
||||
|
||||
switch (swizzle) {
|
||||
case VK_COMPONENT_SWIZZLE_ZERO:
|
||||
return ISL_CHANNEL_SELECT_ZERO;
|
||||
case VK_COMPONENT_SWIZZLE_ONE:
|
||||
return ISL_CHANNEL_SELECT_ONE;
|
||||
case VK_COMPONENT_SWIZZLE_R:
|
||||
return ISL_CHANNEL_SELECT_RED + format_swizzle.r;
|
||||
case VK_COMPONENT_SWIZZLE_G:
|
||||
return ISL_CHANNEL_SELECT_RED + format_swizzle.g;
|
||||
case VK_COMPONENT_SWIZZLE_B:
|
||||
return ISL_CHANNEL_SELECT_RED + format_swizzle.b;
|
||||
case VK_COMPONENT_SWIZZLE_A:
|
||||
return ISL_CHANNEL_SELECT_RED + format_swizzle.a;
|
||||
default:
|
||||
unreachable("Invalid swizzle");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
anv_image_view_init(struct anv_image_view *iview,
|
||||
struct anv_device *device,
|
||||
const VkImageViewCreateInfo* pCreateInfo,
|
||||
struct anv_cmd_buffer *cmd_buffer,
|
||||
VkImageUsageFlags usage_mask)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image);
|
||||
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
|
||||
|
||||
assert(range->layerCount > 0);
|
||||
assert(range->baseMipLevel < image->levels);
|
||||
assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
VK_IMAGE_USAGE_STORAGE_BIT |
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
|
||||
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT));
|
||||
|
||||
switch (image->type) {
|
||||
default:
|
||||
unreachable("bad VkImageType");
|
||||
case VK_IMAGE_TYPE_1D:
|
||||
case VK_IMAGE_TYPE_2D:
|
||||
assert(range->baseArrayLayer + anv_get_layerCount(image, range) - 1 <= image->array_size);
|
||||
break;
|
||||
case VK_IMAGE_TYPE_3D:
|
||||
assert(range->baseArrayLayer + anv_get_layerCount(image, range) - 1
|
||||
<= anv_minify(image->extent.depth, range->baseMipLevel));
|
||||
break;
|
||||
}
|
||||
|
||||
struct anv_surface *surface =
|
||||
anv_image_get_surface_for_aspect_mask(image, range->aspectMask);
|
||||
|
||||
iview->image = image;
|
||||
iview->bo = image->bo;
|
||||
iview->offset = image->offset + surface->offset;
|
||||
|
||||
iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
|
||||
iview->vk_format = pCreateInfo->format;
|
||||
|
||||
struct anv_format_swizzle swizzle;
|
||||
enum isl_format format = anv_get_isl_format(pCreateInfo->format,
|
||||
range->aspectMask,
|
||||
image->tiling, &swizzle);
|
||||
|
||||
iview->base_layer = range->baseArrayLayer;
|
||||
iview->base_mip = range->baseMipLevel;
|
||||
|
||||
struct isl_view isl_view = {
|
||||
.format = format,
|
||||
.base_level = range->baseMipLevel,
|
||||
.levels = anv_get_levelCount(image, range),
|
||||
.base_array_layer = range->baseArrayLayer,
|
||||
.array_len = anv_get_layerCount(image, range),
|
||||
.channel_select = {
|
||||
remap_swizzle(pCreateInfo->components.r,
|
||||
VK_COMPONENT_SWIZZLE_R, swizzle),
|
||||
remap_swizzle(pCreateInfo->components.g,
|
||||
VK_COMPONENT_SWIZZLE_G, swizzle),
|
||||
remap_swizzle(pCreateInfo->components.b,
|
||||
VK_COMPONENT_SWIZZLE_B, swizzle),
|
||||
remap_swizzle(pCreateInfo->components.a,
|
||||
VK_COMPONENT_SWIZZLE_A, swizzle),
|
||||
},
|
||||
};
|
||||
|
||||
iview->extent = (VkExtent3D) {
|
||||
.width = anv_minify(image->extent.width , range->baseMipLevel),
|
||||
.height = anv_minify(image->extent.height, range->baseMipLevel),
|
||||
.depth = anv_minify(image->extent.depth , range->baseMipLevel),
|
||||
};
|
||||
|
||||
isl_surf_usage_flags_t cube_usage;
|
||||
if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE ||
|
||||
pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
|
||||
cube_usage = ISL_SURF_USAGE_CUBE_BIT;
|
||||
} else {
|
||||
cube_usage = 0;
|
||||
}
|
||||
|
||||
if (image->usage & usage_mask & VK_IMAGE_USAGE_SAMPLED_BIT) {
|
||||
iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer);
|
||||
|
||||
isl_view.usage = cube_usage | ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
isl_surf_fill_state(&device->isl_dev,
|
||||
iview->sampler_surface_state.map,
|
||||
.surf = &surface->isl,
|
||||
.view = &isl_view,
|
||||
.mocs = device->default_mocs);
|
||||
|
||||
if (!device->info.has_llc)
|
||||
anv_state_clflush(iview->sampler_surface_state);
|
||||
} else {
|
||||
iview->sampler_surface_state.alloc_size = 0;
|
||||
}
|
||||
|
||||
if (image->usage & usage_mask & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
|
||||
iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer);
|
||||
|
||||
isl_view.usage = cube_usage | ISL_SURF_USAGE_RENDER_TARGET_BIT;
|
||||
isl_surf_fill_state(&device->isl_dev,
|
||||
iview->color_rt_surface_state.map,
|
||||
.surf = &surface->isl,
|
||||
.view = &isl_view,
|
||||
.mocs = device->default_mocs);
|
||||
|
||||
if (!device->info.has_llc)
|
||||
anv_state_clflush(iview->color_rt_surface_state);
|
||||
} else {
|
||||
iview->color_rt_surface_state.alloc_size = 0;
|
||||
}
|
||||
|
||||
if (image->usage & usage_mask & VK_IMAGE_USAGE_STORAGE_BIT) {
|
||||
iview->storage_surface_state = alloc_surface_state(device, cmd_buffer);
|
||||
|
||||
if (has_matching_storage_typed_format(device, format)) {
|
||||
isl_view.usage = cube_usage | ISL_SURF_USAGE_STORAGE_BIT;
|
||||
isl_surf_fill_state(&device->isl_dev,
|
||||
iview->storage_surface_state.map,
|
||||
.surf = &surface->isl,
|
||||
.view = &isl_view,
|
||||
.mocs = device->default_mocs);
|
||||
} else {
|
||||
anv_fill_buffer_surface_state(device, iview->storage_surface_state,
|
||||
ISL_FORMAT_RAW,
|
||||
iview->offset,
|
||||
iview->bo->size - iview->offset, 1);
|
||||
}
|
||||
|
||||
isl_surf_fill_image_param(&device->isl_dev,
|
||||
&iview->storage_image_param,
|
||||
&surface->isl, &isl_view);
|
||||
|
||||
if (!device->info.has_llc)
|
||||
anv_state_clflush(iview->storage_surface_state);
|
||||
} else {
|
||||
iview->storage_surface_state.alloc_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_CreateImageView(VkDevice _device,
|
||||
const VkImageViewCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkImageView *pView)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_image_view *view;
|
||||
|
||||
view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (view == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
anv_image_view_init(view, device, pCreateInfo, NULL, ~0);
|
||||
|
||||
*pView = anv_image_view_to_handle(view);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_DestroyImageView(VkDevice _device, VkImageView _iview,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_image_view, iview, _iview);
|
||||
|
||||
if (iview->color_rt_surface_state.alloc_size > 0) {
|
||||
anv_state_pool_free(&device->surface_state_pool,
|
||||
iview->color_rt_surface_state);
|
||||
}
|
||||
|
||||
if (iview->sampler_surface_state.alloc_size > 0) {
|
||||
anv_state_pool_free(&device->surface_state_pool,
|
||||
iview->sampler_surface_state);
|
||||
}
|
||||
|
||||
if (iview->storage_surface_state.alloc_size > 0) {
|
||||
anv_state_pool_free(&device->surface_state_pool,
|
||||
iview->storage_surface_state);
|
||||
}
|
||||
|
||||
anv_free2(&device->alloc, pAllocator, iview);
|
||||
}
|
||||
|
||||
|
||||
void anv_buffer_view_init(struct anv_buffer_view *view,
|
||||
struct anv_device *device,
|
||||
const VkBufferViewCreateInfo* pCreateInfo,
|
||||
struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer);
|
||||
|
||||
const struct anv_format *format =
|
||||
anv_format_for_vk_format(pCreateInfo->format);
|
||||
|
||||
view->format = format->isl_format;
|
||||
view->bo = buffer->bo;
|
||||
view->offset = buffer->offset + pCreateInfo->offset;
|
||||
view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
|
||||
buffer->size - view->offset : pCreateInfo->range;
|
||||
|
||||
if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) {
|
||||
view->surface_state = alloc_surface_state(device, cmd_buffer);
|
||||
|
||||
anv_fill_buffer_surface_state(device, view->surface_state,
|
||||
view->format,
|
||||
view->offset, view->range,
|
||||
format->isl_layout->bs);
|
||||
} else {
|
||||
view->surface_state = (struct anv_state){ 0 };
|
||||
}
|
||||
|
||||
if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) {
|
||||
view->storage_surface_state = alloc_surface_state(device, cmd_buffer);
|
||||
|
||||
enum isl_format storage_format =
|
||||
has_matching_storage_typed_format(device, view->format) ?
|
||||
isl_lower_storage_image_format(&device->isl_dev, view->format) :
|
||||
ISL_FORMAT_RAW;
|
||||
|
||||
anv_fill_buffer_surface_state(device, view->storage_surface_state,
|
||||
storage_format,
|
||||
view->offset, view->range,
|
||||
(storage_format == ISL_FORMAT_RAW ? 1 :
|
||||
format->isl_layout->bs));
|
||||
|
||||
isl_buffer_fill_image_param(&device->isl_dev,
|
||||
&view->storage_image_param,
|
||||
view->format, view->range);
|
||||
} else {
|
||||
view->storage_surface_state = (struct anv_state){ 0 };
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_CreateBufferView(VkDevice _device,
|
||||
const VkBufferViewCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkBufferView *pView)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_buffer_view *view;
|
||||
|
||||
view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!view)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
anv_buffer_view_init(view, device, pCreateInfo, NULL);
|
||||
|
||||
*pView = anv_buffer_view_to_handle(view);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_buffer_view, view, bufferView);
|
||||
|
||||
if (view->surface_state.alloc_size > 0)
|
||||
anv_state_pool_free(&device->surface_state_pool,
|
||||
view->surface_state);
|
||||
|
||||
if (view->storage_surface_state.alloc_size > 0)
|
||||
anv_state_pool_free(&device->surface_state_pool,
|
||||
view->storage_surface_state);
|
||||
|
||||
anv_free2(&device->alloc, pAllocator, view);
|
||||
}
|
||||
|
||||
struct anv_surface *
|
||||
anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask)
|
||||
{
|
||||
switch (aspect_mask) {
|
||||
case VK_IMAGE_ASPECT_COLOR_BIT:
|
||||
/* Dragons will eat you.
|
||||
*
|
||||
* Meta attaches all destination surfaces as color render targets. Guess
|
||||
* what surface the Meta Dragons really want.
|
||||
*/
|
||||
if (image->format->has_depth && image->format->has_stencil) {
|
||||
return &image->depth_surface;
|
||||
} else if (image->format->has_depth) {
|
||||
return &image->depth_surface;
|
||||
} else if (image->format->has_stencil) {
|
||||
return &image->stencil_surface;
|
||||
} else {
|
||||
return &image->color_surface;
|
||||
}
|
||||
break;
|
||||
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
||||
assert(image->format->has_depth);
|
||||
return &image->depth_surface;
|
||||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
assert(image->format->has_stencil);
|
||||
return &image->stencil_surface;
|
||||
case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
if (image->format->has_depth && image->format->has_stencil) {
|
||||
/* FINISHME: The Vulkan spec (git a511ba2) requires support for
|
||||
* combined depth stencil formats. Specifically, it states:
|
||||
*
|
||||
* At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or
|
||||
* ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported.
|
||||
*
|
||||
* Image views with both depth and stencil aspects are only valid for
|
||||
* render target attachments, in which case
|
||||
* cmd_buffer_emit_depth_stencil() will pick out both the depth and
|
||||
* stencil surfaces from the underlying surface.
|
||||
*/
|
||||
return &image->depth_surface;
|
||||
} else if (image->format->has_depth) {
|
||||
return &image->depth_surface;
|
||||
} else if (image->format->has_stencil) {
|
||||
return &image->stencil_surface;
|
||||
}
|
||||
/* fallthrough */
|
||||
default:
|
||||
unreachable("image does not have aspect");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
100
src/intel/vulkan/anv_intel.c
Normal file
100
src/intel/vulkan/anv_intel.c
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
VkResult anv_CreateDmaBufImageINTEL(
|
||||
VkDevice _device,
|
||||
const VkDmaBufImageCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkDeviceMemory* pMem,
|
||||
VkImage* pImage)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_device_memory *mem;
|
||||
struct anv_image *image;
|
||||
VkResult result;
|
||||
VkImage image_h;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL);
|
||||
|
||||
mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (mem == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd);
|
||||
if (!mem->bo.gem_handle) {
|
||||
result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
mem->bo.map = NULL;
|
||||
mem->bo.index = 0;
|
||||
mem->bo.offset = 0;
|
||||
mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height;
|
||||
|
||||
anv_image_create(_device,
|
||||
&(struct anv_image_create_info) {
|
||||
.isl_tiling_flags = ISL_TILING_X_BIT,
|
||||
.stride = pCreateInfo->strideInBytes,
|
||||
.vk_info =
|
||||
&(VkImageCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = pCreateInfo->format,
|
||||
.extent = pCreateInfo->extent,
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = 1,
|
||||
/* FIXME: Need a way to use X tiling to allow scanout */
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
.flags = 0,
|
||||
}},
|
||||
pAllocator, &image_h);
|
||||
|
||||
image = anv_image_from_handle(image_h);
|
||||
image->bo = &mem->bo;
|
||||
image->offset = 0;
|
||||
|
||||
assert(image->extent.width > 0);
|
||||
assert(image->extent.height > 0);
|
||||
assert(image->extent.depth == 1);
|
||||
|
||||
*pMem = anv_device_memory_to_handle(mem);
|
||||
*pImage = anv_image_to_handle(image);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
anv_free2(&device->alloc, pAllocator, mem);
|
||||
|
||||
return result;
|
||||
}
|
||||
176
src/intel/vulkan/anv_meta.c
Normal file
176
src/intel/vulkan/anv_meta.c
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_meta.h"
|
||||
|
||||
struct anv_render_pass anv_meta_dummy_renderpass = {0};
|
||||
|
||||
void
|
||||
anv_meta_save(struct anv_meta_saved_state *state,
|
||||
const struct anv_cmd_buffer *cmd_buffer,
|
||||
uint32_t dynamic_mask)
|
||||
{
|
||||
state->old_pipeline = cmd_buffer->state.pipeline;
|
||||
state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
|
||||
memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings,
|
||||
sizeof(state->old_vertex_bindings));
|
||||
|
||||
state->dynamic_mask = dynamic_mask;
|
||||
anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic,
|
||||
dynamic_mask);
|
||||
}
|
||||
|
||||
void
|
||||
anv_meta_restore(const struct anv_meta_saved_state *state,
|
||||
struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
cmd_buffer->state.pipeline = state->old_pipeline;
|
||||
cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
|
||||
memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
|
||||
sizeof(state->old_vertex_bindings));
|
||||
|
||||
cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1;
|
||||
cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE;
|
||||
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
|
||||
anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic,
|
||||
state->dynamic_mask);
|
||||
cmd_buffer->state.dirty |= state->dynamic_mask;
|
||||
|
||||
/* Since we've used the pipeline with the VS disabled, set
|
||||
* need_query_wa. See CmdBeginQuery.
|
||||
*/
|
||||
cmd_buffer->state.need_query_wa = true;
|
||||
}
|
||||
|
||||
VkImageViewType
|
||||
anv_meta_get_view_type(const struct anv_image *image)
|
||||
{
|
||||
switch (image->type) {
|
||||
case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D;
|
||||
case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D;
|
||||
case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D;
|
||||
default:
|
||||
unreachable("bad VkImageViewType");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* When creating a destination VkImageView, this function provides the needed
|
||||
* VkImageViewCreateInfo::subresourceRange::baseArrayLayer.
|
||||
*/
|
||||
uint32_t
|
||||
anv_meta_get_iview_layer(const struct anv_image *dest_image,
|
||||
const VkImageSubresourceLayers *dest_subresource,
|
||||
const VkOffset3D *dest_offset)
|
||||
{
|
||||
switch (dest_image->type) {
|
||||
case VK_IMAGE_TYPE_1D:
|
||||
case VK_IMAGE_TYPE_2D:
|
||||
return dest_subresource->baseArrayLayer;
|
||||
case VK_IMAGE_TYPE_3D:
|
||||
/* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
|
||||
* but meta does it anyway. When doing so, we translate the
|
||||
* destination's z offset into an array offset.
|
||||
*/
|
||||
return dest_offset->z;
|
||||
default:
|
||||
assert(!"bad VkImageType");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void *
|
||||
meta_alloc(void* _device, size_t size, size_t alignment,
|
||||
VkSystemAllocationScope allocationScope)
|
||||
{
|
||||
struct anv_device *device = _device;
|
||||
return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
}
|
||||
|
||||
static void *
|
||||
meta_realloc(void* _device, void *original, size_t size, size_t alignment,
|
||||
VkSystemAllocationScope allocationScope)
|
||||
{
|
||||
struct anv_device *device = _device;
|
||||
return device->alloc.pfnReallocation(device->alloc.pUserData, original,
|
||||
size, alignment,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
}
|
||||
|
||||
static void
|
||||
meta_free(void* _device, void *data)
|
||||
{
|
||||
struct anv_device *device = _device;
|
||||
return device->alloc.pfnFree(device->alloc.pUserData, data);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_init_meta(struct anv_device *device)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
device->meta_state.alloc = (VkAllocationCallbacks) {
|
||||
.pUserData = device,
|
||||
.pfnAllocation = meta_alloc,
|
||||
.pfnReallocation = meta_realloc,
|
||||
.pfnFree = meta_free,
|
||||
};
|
||||
|
||||
result = anv_device_init_meta_clear_state(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_clear;
|
||||
|
||||
result = anv_device_init_meta_resolve_state(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_resolve;
|
||||
|
||||
result = anv_device_init_meta_blit_state(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_blit;
|
||||
|
||||
result = anv_device_init_meta_blit2d_state(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_blit2d;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_blit2d:
|
||||
anv_device_finish_meta_blit_state(device);
|
||||
fail_blit:
|
||||
anv_device_finish_meta_resolve_state(device);
|
||||
fail_resolve:
|
||||
anv_device_finish_meta_clear_state(device);
|
||||
fail_clear:
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_finish_meta(struct anv_device *device)
|
||||
{
|
||||
anv_device_finish_meta_resolve_state(device);
|
||||
anv_device_finish_meta_clear_state(device);
|
||||
anv_device_finish_meta_blit_state(device);
|
||||
anv_device_finish_meta_blit2d_state(device);
|
||||
}
|
||||
113
src/intel/vulkan/anv_meta.h
Normal file
113
src/intel/vulkan/anv_meta.h
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ANV_META_VERTEX_BINDING_COUNT 2
|
||||
|
||||
struct anv_meta_saved_state {
|
||||
struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT];
|
||||
struct anv_descriptor_set *old_descriptor_set0;
|
||||
struct anv_pipeline *old_pipeline;
|
||||
|
||||
/**
|
||||
* Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic
|
||||
* state.
|
||||
*/
|
||||
uint32_t dynamic_mask;
|
||||
struct anv_dynamic_state dynamic;
|
||||
};
|
||||
|
||||
VkResult anv_device_init_meta_clear_state(struct anv_device *device);
|
||||
void anv_device_finish_meta_clear_state(struct anv_device *device);
|
||||
|
||||
VkResult anv_device_init_meta_resolve_state(struct anv_device *device);
|
||||
void anv_device_finish_meta_resolve_state(struct anv_device *device);
|
||||
|
||||
VkResult anv_device_init_meta_blit_state(struct anv_device *device);
|
||||
void anv_device_finish_meta_blit_state(struct anv_device *device);
|
||||
|
||||
VkResult anv_device_init_meta_blit2d_state(struct anv_device *device);
|
||||
void anv_device_finish_meta_blit2d_state(struct anv_device *device);
|
||||
|
||||
void
|
||||
anv_meta_save(struct anv_meta_saved_state *state,
|
||||
const struct anv_cmd_buffer *cmd_buffer,
|
||||
uint32_t dynamic_mask);
|
||||
|
||||
void
|
||||
anv_meta_restore(const struct anv_meta_saved_state *state,
|
||||
struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
VkImageViewType
|
||||
anv_meta_get_view_type(const struct anv_image *image);
|
||||
|
||||
uint32_t
|
||||
anv_meta_get_iview_layer(const struct anv_image *dest_image,
|
||||
const VkImageSubresourceLayers *dest_subresource,
|
||||
const VkOffset3D *dest_offset);
|
||||
|
||||
struct anv_meta_blit2d_surf {
|
||||
struct anv_bo *bo;
|
||||
enum isl_tiling tiling;
|
||||
|
||||
/** Base offset to the start of the image */
|
||||
uint64_t base_offset;
|
||||
|
||||
/** The size of an element in bytes. */
|
||||
uint8_t bs;
|
||||
|
||||
/** Pitch between rows in bytes. */
|
||||
uint32_t pitch;
|
||||
};
|
||||
|
||||
struct anv_meta_blit2d_rect {
|
||||
uint32_t src_x, src_y;
|
||||
uint32_t dst_x, dst_y;
|
||||
uint32_t width, height;
|
||||
};
|
||||
|
||||
void
|
||||
anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_meta_saved_state *save);
|
||||
|
||||
void
|
||||
anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_meta_blit2d_surf *src,
|
||||
struct anv_meta_blit2d_surf *dst,
|
||||
unsigned num_rects,
|
||||
struct anv_meta_blit2d_rect *rects);
|
||||
|
||||
void
|
||||
anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_meta_saved_state *save);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
736
src/intel/vulkan/anv_meta_blit.c
Normal file
736
src/intel/vulkan/anv_meta_blit.c
Normal file
|
|
@ -0,0 +1,736 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_meta.h"
|
||||
#include "nir/nir_builder.h"
|
||||
|
||||
struct blit_region {
|
||||
VkOffset3D src_offset;
|
||||
VkExtent3D src_extent;
|
||||
VkOffset3D dest_offset;
|
||||
VkExtent3D dest_extent;
|
||||
};
|
||||
|
||||
static nir_shader *
|
||||
build_nir_vertex_shader(void)
|
||||
{
|
||||
const struct glsl_type *vec4 = glsl_vec4_type();
|
||||
nir_builder b;
|
||||
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
|
||||
b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
|
||||
|
||||
nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
|
||||
vec4, "a_pos");
|
||||
pos_in->data.location = VERT_ATTRIB_GENERIC0;
|
||||
nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
|
||||
vec4, "gl_Position");
|
||||
pos_out->data.location = VARYING_SLOT_POS;
|
||||
nir_copy_var(&b, pos_out, pos_in);
|
||||
|
||||
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
|
||||
vec4, "a_tex_pos");
|
||||
tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
|
||||
nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
|
||||
vec4, "v_tex_pos");
|
||||
tex_pos_out->data.location = VARYING_SLOT_VAR0;
|
||||
tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
|
||||
nir_copy_var(&b, tex_pos_out, tex_pos_in);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
|
||||
{
|
||||
const struct glsl_type *vec4 = glsl_vec4_type();
|
||||
nir_builder b;
|
||||
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs");
|
||||
|
||||
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
|
||||
vec4, "v_tex_pos");
|
||||
tex_pos_in->data.location = VARYING_SLOT_VAR0;
|
||||
|
||||
/* Swizzle the array index which comes in as Z coordinate into the right
|
||||
* position.
|
||||
*/
|
||||
unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
|
||||
nir_ssa_def *const tex_pos =
|
||||
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
|
||||
(tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
|
||||
|
||||
const struct glsl_type *sampler_type =
|
||||
glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
|
||||
glsl_get_base_type(vec4));
|
||||
nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
|
||||
sampler_type, "s_tex");
|
||||
sampler->data.descriptor_set = 0;
|
||||
sampler->data.binding = 0;
|
||||
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
|
||||
tex->sampler_dim = tex_dim;
|
||||
tex->op = nir_texop_tex;
|
||||
tex->src[0].src_type = nir_tex_src_coord;
|
||||
tex->src[0].src = nir_src_for_ssa(tex_pos);
|
||||
tex->dest_type = nir_type_float; /* TODO */
|
||||
tex->is_array = glsl_sampler_type_is_array(sampler_type);
|
||||
tex->coord_components = tex_pos->num_components;
|
||||
tex->texture = nir_deref_var_create(tex, sampler);
|
||||
tex->sampler = nir_deref_var_create(tex, sampler);
|
||||
|
||||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
|
||||
nir_builder_instr_insert(&b, &tex->instr);
|
||||
|
||||
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
|
||||
vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_DATA0;
|
||||
nir_store_var(&b, color_out, &tex->dest.ssa, 4);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static void
|
||||
meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_meta_saved_state *saved_state)
|
||||
{
|
||||
anv_meta_save(saved_state, cmd_buffer, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_image *src_image,
|
||||
struct anv_image_view *src_iview,
|
||||
VkOffset3D src_offset,
|
||||
VkExtent3D src_extent,
|
||||
struct anv_image *dest_image,
|
||||
struct anv_image_view *dest_iview,
|
||||
VkOffset3D dest_offset,
|
||||
VkExtent3D dest_extent,
|
||||
VkFilter blit_filter)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
struct blit_vb_data {
|
||||
float pos[2];
|
||||
float tex_coord[3];
|
||||
} *vb_data;
|
||||
|
||||
assert(src_image->samples == dest_image->samples);
|
||||
|
||||
unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
|
||||
|
||||
struct anv_state vb_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
|
||||
memset(vb_state.map, 0, sizeof(struct anv_vue_header));
|
||||
vb_data = vb_state.map + sizeof(struct anv_vue_header);
|
||||
|
||||
vb_data[0] = (struct blit_vb_data) {
|
||||
.pos = {
|
||||
dest_offset.x + dest_extent.width,
|
||||
dest_offset.y + dest_extent.height,
|
||||
},
|
||||
.tex_coord = {
|
||||
(float)(src_offset.x + src_extent.width)
|
||||
/ (float)src_iview->extent.width,
|
||||
(float)(src_offset.y + src_extent.height)
|
||||
/ (float)src_iview->extent.height,
|
||||
(float)src_offset.z / (float)src_iview->extent.depth,
|
||||
},
|
||||
};
|
||||
|
||||
vb_data[1] = (struct blit_vb_data) {
|
||||
.pos = {
|
||||
dest_offset.x,
|
||||
dest_offset.y + dest_extent.height,
|
||||
},
|
||||
.tex_coord = {
|
||||
(float)src_offset.x / (float)src_iview->extent.width,
|
||||
(float)(src_offset.y + src_extent.height) /
|
||||
(float)src_iview->extent.height,
|
||||
(float)src_offset.z / (float)src_iview->extent.depth,
|
||||
},
|
||||
};
|
||||
|
||||
vb_data[2] = (struct blit_vb_data) {
|
||||
.pos = {
|
||||
dest_offset.x,
|
||||
dest_offset.y,
|
||||
},
|
||||
.tex_coord = {
|
||||
(float)src_offset.x / (float)src_iview->extent.width,
|
||||
(float)src_offset.y / (float)src_iview->extent.height,
|
||||
(float)src_offset.z / (float)src_iview->extent.depth,
|
||||
},
|
||||
};
|
||||
|
||||
if (!device->info.has_llc)
|
||||
anv_state_clflush(vb_state);
|
||||
|
||||
struct anv_buffer vertex_buffer = {
|
||||
.device = device,
|
||||
.size = vb_size,
|
||||
.bo = &device->dynamic_state_block_pool.bo,
|
||||
.offset = vb_state.offset,
|
||||
};
|
||||
|
||||
anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
|
||||
(VkBuffer[]) {
|
||||
anv_buffer_to_handle(&vertex_buffer),
|
||||
anv_buffer_to_handle(&vertex_buffer)
|
||||
},
|
||||
(VkDeviceSize[]) {
|
||||
0,
|
||||
sizeof(struct anv_vue_header),
|
||||
});
|
||||
|
||||
VkSampler sampler;
|
||||
ANV_CALL(CreateSampler)(anv_device_to_handle(device),
|
||||
&(VkSamplerCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||
.magFilter = blit_filter,
|
||||
.minFilter = blit_filter,
|
||||
}, &cmd_buffer->pool->alloc, &sampler);
|
||||
|
||||
VkDescriptorPool desc_pool;
|
||||
anv_CreateDescriptorPool(anv_device_to_handle(device),
|
||||
&(const VkDescriptorPoolCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.pNext = NULL,
|
||||
.flags = 0,
|
||||
.maxSets = 1,
|
||||
.poolSizeCount = 1,
|
||||
.pPoolSizes = (VkDescriptorPoolSize[]) {
|
||||
{
|
||||
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1
|
||||
},
|
||||
}
|
||||
}, &cmd_buffer->pool->alloc, &desc_pool);
|
||||
|
||||
VkDescriptorSet set;
|
||||
anv_AllocateDescriptorSets(anv_device_to_handle(device),
|
||||
&(VkDescriptorSetAllocateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.descriptorPool = desc_pool,
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = &device->meta_state.blit.ds_layout
|
||||
}, &set);
|
||||
|
||||
anv_UpdateDescriptorSets(anv_device_to_handle(device),
|
||||
1, /* writeCount */
|
||||
(VkWriteDescriptorSet[]) {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = set,
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.pImageInfo = (VkDescriptorImageInfo[]) {
|
||||
{
|
||||
.sampler = sampler,
|
||||
.imageView = anv_image_view_to_handle(src_iview),
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
},
|
||||
}
|
||||
}
|
||||
}, 0, NULL);
|
||||
|
||||
VkFramebuffer fb;
|
||||
anv_CreateFramebuffer(anv_device_to_handle(device),
|
||||
&(VkFramebufferCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = (VkImageView[]) {
|
||||
anv_image_view_to_handle(dest_iview),
|
||||
},
|
||||
.width = dest_iview->extent.width,
|
||||
.height = dest_iview->extent.height,
|
||||
.layers = 1
|
||||
}, &cmd_buffer->pool->alloc, &fb);
|
||||
|
||||
ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
|
||||
&(VkRenderPassBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = device->meta_state.blit.render_pass,
|
||||
.framebuffer = fb,
|
||||
.renderArea = {
|
||||
.offset = { dest_offset.x, dest_offset.y },
|
||||
.extent = { dest_extent.width, dest_extent.height },
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = NULL,
|
||||
}, VK_SUBPASS_CONTENTS_INLINE);
|
||||
|
||||
VkPipeline pipeline;
|
||||
|
||||
switch (src_image->type) {
|
||||
case VK_IMAGE_TYPE_1D:
|
||||
pipeline = device->meta_state.blit.pipeline_1d_src;
|
||||
break;
|
||||
case VK_IMAGE_TYPE_2D:
|
||||
pipeline = device->meta_state.blit.pipeline_2d_src;
|
||||
break;
|
||||
case VK_IMAGE_TYPE_3D:
|
||||
pipeline = device->meta_state.blit.pipeline_3d_src;
|
||||
break;
|
||||
default:
|
||||
unreachable(!"bad VkImageType");
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
|
||||
anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
|
||||
VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
}
|
||||
|
||||
anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
|
||||
VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
device->meta_state.blit.pipeline_layout, 0, 1,
|
||||
&set, 0, NULL);
|
||||
|
||||
ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
|
||||
|
||||
ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
|
||||
|
||||
/* At the point where we emit the draw call, all data from the
|
||||
* descriptor sets, etc. has been used. We are free to delete it.
|
||||
*/
|
||||
anv_DestroyDescriptorPool(anv_device_to_handle(device),
|
||||
desc_pool, &cmd_buffer->pool->alloc);
|
||||
anv_DestroySampler(anv_device_to_handle(device), sampler,
|
||||
&cmd_buffer->pool->alloc);
|
||||
anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
|
||||
&cmd_buffer->pool->alloc);
|
||||
}
|
||||
|
||||
static void
|
||||
meta_finish_blit(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_meta_saved_state *saved_state)
|
||||
{
|
||||
anv_meta_restore(saved_state, cmd_buffer);
|
||||
}
|
||||
|
||||
void anv_CmdBlitImage(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkImage srcImage,
|
||||
VkImageLayout srcImageLayout,
|
||||
VkImage destImage,
|
||||
VkImageLayout destImageLayout,
|
||||
uint32_t regionCount,
|
||||
const VkImageBlit* pRegions,
|
||||
VkFilter filter)
|
||||
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_image, src_image, srcImage);
|
||||
ANV_FROM_HANDLE(anv_image, dest_image, destImage);
|
||||
struct anv_meta_saved_state saved_state;
|
||||
|
||||
/* From the Vulkan 1.0 spec:
|
||||
*
|
||||
* vkCmdBlitImage must not be used for multisampled source or
|
||||
* destination images. Use vkCmdResolveImage for this purpose.
|
||||
*/
|
||||
assert(src_image->samples == 1);
|
||||
assert(dest_image->samples == 1);
|
||||
|
||||
meta_prepare_blit(cmd_buffer, &saved_state);
|
||||
|
||||
for (unsigned r = 0; r < regionCount; r++) {
|
||||
struct anv_image_view src_iview;
|
||||
anv_image_view_init(&src_iview, cmd_buffer->device,
|
||||
&(VkImageViewCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.image = srcImage,
|
||||
.viewType = anv_meta_get_view_type(src_image),
|
||||
.format = src_image->vk_format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = pRegions[r].srcSubresource.aspectMask,
|
||||
.baseMipLevel = pRegions[r].srcSubresource.mipLevel,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
|
||||
.layerCount = 1
|
||||
},
|
||||
},
|
||||
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
|
||||
|
||||
const VkOffset3D dest_offset = {
|
||||
.x = pRegions[r].dstOffsets[0].x,
|
||||
.y = pRegions[r].dstOffsets[0].y,
|
||||
.z = 0,
|
||||
};
|
||||
|
||||
if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
|
||||
pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
|
||||
pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
|
||||
pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
|
||||
anv_finishme("FINISHME: Allow flipping in blits");
|
||||
|
||||
const VkExtent3D dest_extent = {
|
||||
.width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
|
||||
.height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
|
||||
};
|
||||
|
||||
const VkExtent3D src_extent = {
|
||||
.width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
|
||||
.height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
|
||||
};
|
||||
|
||||
const uint32_t dest_array_slice =
|
||||
anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
|
||||
&pRegions[r].dstOffsets[0]);
|
||||
|
||||
if (pRegions[r].srcSubresource.layerCount > 1)
|
||||
anv_finishme("FINISHME: copy multiple array layers");
|
||||
|
||||
if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z ||
|
||||
pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z)
|
||||
anv_finishme("FINISHME: copy multiple depth layers");
|
||||
|
||||
struct anv_image_view dest_iview;
|
||||
anv_image_view_init(&dest_iview, cmd_buffer->device,
|
||||
&(VkImageViewCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.image = destImage,
|
||||
.viewType = anv_meta_get_view_type(dest_image),
|
||||
.format = dest_image->vk_format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = pRegions[r].dstSubresource.mipLevel,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = dest_array_slice,
|
||||
.layerCount = 1
|
||||
},
|
||||
},
|
||||
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
|
||||
|
||||
meta_emit_blit(cmd_buffer,
|
||||
src_image, &src_iview,
|
||||
pRegions[r].srcOffsets[0], src_extent,
|
||||
dest_image, &dest_iview,
|
||||
dest_offset, dest_extent,
|
||||
filter);
|
||||
}
|
||||
|
||||
meta_finish_blit(cmd_buffer, &saved_state);
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_finish_meta_blit_state(struct anv_device *device)
|
||||
{
|
||||
anv_DestroyRenderPass(anv_device_to_handle(device),
|
||||
device->meta_state.blit.render_pass,
|
||||
&device->meta_state.alloc);
|
||||
anv_DestroyPipeline(anv_device_to_handle(device),
|
||||
device->meta_state.blit.pipeline_1d_src,
|
||||
&device->meta_state.alloc);
|
||||
anv_DestroyPipeline(anv_device_to_handle(device),
|
||||
device->meta_state.blit.pipeline_2d_src,
|
||||
&device->meta_state.alloc);
|
||||
anv_DestroyPipeline(anv_device_to_handle(device),
|
||||
device->meta_state.blit.pipeline_3d_src,
|
||||
&device->meta_state.alloc);
|
||||
anv_DestroyPipelineLayout(anv_device_to_handle(device),
|
||||
device->meta_state.blit.pipeline_layout,
|
||||
&device->meta_state.alloc);
|
||||
anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
|
||||
device->meta_state.blit.ds_layout,
|
||||
&device->meta_state.alloc);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_init_meta_blit_state(struct anv_device *device)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
result = anv_CreateRenderPass(anv_device_to_handle(device),
|
||||
&(VkRenderPassCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &(VkAttachmentDescription) {
|
||||
.format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
},
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &(VkSubpassDescription) {
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.inputAttachmentCount = 0,
|
||||
.colorAttachmentCount = 1,
|
||||
.pColorAttachments = &(VkAttachmentReference) {
|
||||
.attachment = 0,
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
},
|
||||
.pResolveAttachments = NULL,
|
||||
.pDepthStencilAttachment = &(VkAttachmentReference) {
|
||||
.attachment = VK_ATTACHMENT_UNUSED,
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
},
|
||||
.preserveAttachmentCount = 1,
|
||||
.pPreserveAttachments = (uint32_t[]) { 0 },
|
||||
},
|
||||
.dependencyCount = 0,
|
||||
}, &device->meta_state.alloc, &device->meta_state.blit.render_pass);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
/* We don't use a vertex shader for blitting, but instead build and pass
|
||||
* the VUEs directly to the rasterization backend. However, we do need
|
||||
* to provide GLSL source for the vertex shader so that the compiler
|
||||
* does not dead-code our inputs.
|
||||
*/
|
||||
struct anv_shader_module vs = {
|
||||
.nir = build_nir_vertex_shader(),
|
||||
};
|
||||
|
||||
struct anv_shader_module fs_1d = {
|
||||
.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D),
|
||||
};
|
||||
|
||||
struct anv_shader_module fs_2d = {
|
||||
.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
|
||||
};
|
||||
|
||||
struct anv_shader_module fs_3d = {
|
||||
.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D),
|
||||
};
|
||||
|
||||
VkPipelineVertexInputStateCreateInfo vi_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.vertexBindingDescriptionCount = 2,
|
||||
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
|
||||
{
|
||||
.binding = 0,
|
||||
.stride = 0,
|
||||
.inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
|
||||
},
|
||||
{
|
||||
.binding = 1,
|
||||
.stride = 5 * sizeof(float),
|
||||
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
|
||||
},
|
||||
},
|
||||
.vertexAttributeDescriptionCount = 3,
|
||||
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
|
||||
{
|
||||
/* VUE Header */
|
||||
.location = 0,
|
||||
.binding = 0,
|
||||
.format = VK_FORMAT_R32G32B32A32_UINT,
|
||||
.offset = 0
|
||||
},
|
||||
{
|
||||
/* Position */
|
||||
.location = 1,
|
||||
.binding = 1,
|
||||
.format = VK_FORMAT_R32G32_SFLOAT,
|
||||
.offset = 0
|
||||
},
|
||||
{
|
||||
/* Texture Coordinate */
|
||||
.location = 2,
|
||||
.binding = 1,
|
||||
.format = VK_FORMAT_R32G32B32_SFLOAT,
|
||||
.offset = 8
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo ds_layout_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.bindingCount = 1,
|
||||
.pBindings = (VkDescriptorSetLayoutBinding[]) {
|
||||
{
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
.pImmutableSamplers = NULL
|
||||
},
|
||||
}
|
||||
};
|
||||
result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
|
||||
&ds_layout_info,
|
||||
&device->meta_state.alloc,
|
||||
&device->meta_state.blit.ds_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_render_pass;
|
||||
|
||||
result = anv_CreatePipelineLayout(anv_device_to_handle(device),
|
||||
&(VkPipelineLayoutCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &device->meta_state.blit.ds_layout,
|
||||
},
|
||||
&device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_descriptor_set_layout;
|
||||
|
||||
VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
||||
.module = anv_shader_module_to_handle(&vs),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL
|
||||
}, {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
.module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL
|
||||
},
|
||||
};
|
||||
|
||||
const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.stageCount = ARRAY_SIZE(pipeline_shader_stages),
|
||||
.pStages = pipeline_shader_stages,
|
||||
.pVertexInputState = &vi_create_info,
|
||||
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
|
||||
.primitiveRestartEnable = false,
|
||||
},
|
||||
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.viewportCount = 1,
|
||||
.scissorCount = 1,
|
||||
},
|
||||
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.rasterizerDiscardEnable = false,
|
||||
.polygonMode = VK_POLYGON_MODE_FILL,
|
||||
.cullMode = VK_CULL_MODE_NONE,
|
||||
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
|
||||
},
|
||||
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.rasterizationSamples = 1,
|
||||
.sampleShadingEnable = false,
|
||||
.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
|
||||
},
|
||||
.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = (VkPipelineColorBlendAttachmentState []) {
|
||||
{ .colorWriteMask =
|
||||
VK_COLOR_COMPONENT_A_BIT |
|
||||
VK_COLOR_COMPONENT_R_BIT |
|
||||
VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT },
|
||||
}
|
||||
},
|
||||
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.dynamicStateCount = 9,
|
||||
.pDynamicStates = (VkDynamicState[]) {
|
||||
VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
VK_DYNAMIC_STATE_LINE_WIDTH,
|
||||
VK_DYNAMIC_STATE_DEPTH_BIAS,
|
||||
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
|
||||
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
|
||||
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
|
||||
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
|
||||
},
|
||||
},
|
||||
.flags = 0,
|
||||
.layout = device->meta_state.blit.pipeline_layout,
|
||||
.renderPass = device->meta_state.blit.render_pass,
|
||||
.subpass = 0,
|
||||
};
|
||||
|
||||
const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
|
||||
.color_attachment_count = -1,
|
||||
.use_repclear = false,
|
||||
.disable_vs = true,
|
||||
.use_rectlist = true
|
||||
};
|
||||
|
||||
pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d);
|
||||
result = anv_graphics_pipeline_create(anv_device_to_handle(device),
|
||||
VK_NULL_HANDLE,
|
||||
&vk_pipeline_info, &anv_pipeline_info,
|
||||
&device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_pipeline_layout;
|
||||
|
||||
pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
|
||||
result = anv_graphics_pipeline_create(anv_device_to_handle(device),
|
||||
VK_NULL_HANDLE,
|
||||
&vk_pipeline_info, &anv_pipeline_info,
|
||||
&device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_pipeline_1d;
|
||||
|
||||
pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d);
|
||||
result = anv_graphics_pipeline_create(anv_device_to_handle(device),
|
||||
VK_NULL_HANDLE,
|
||||
&vk_pipeline_info, &anv_pipeline_info,
|
||||
&device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_pipeline_2d;
|
||||
|
||||
ralloc_free(vs.nir);
|
||||
ralloc_free(fs_1d.nir);
|
||||
ralloc_free(fs_2d.nir);
|
||||
ralloc_free(fs_3d.nir);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_pipeline_2d:
|
||||
anv_DestroyPipeline(anv_device_to_handle(device),
|
||||
device->meta_state.blit.pipeline_2d_src,
|
||||
&device->meta_state.alloc);
|
||||
|
||||
fail_pipeline_1d:
|
||||
anv_DestroyPipeline(anv_device_to_handle(device),
|
||||
device->meta_state.blit.pipeline_1d_src,
|
||||
&device->meta_state.alloc);
|
||||
|
||||
fail_pipeline_layout:
|
||||
anv_DestroyPipelineLayout(anv_device_to_handle(device),
|
||||
device->meta_state.blit.pipeline_layout,
|
||||
&device->meta_state.alloc);
|
||||
fail_descriptor_set_layout:
|
||||
anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
|
||||
device->meta_state.blit.ds_layout,
|
||||
&device->meta_state.alloc);
|
||||
fail_render_pass:
|
||||
anv_DestroyRenderPass(anv_device_to_handle(device),
|
||||
device->meta_state.blit.render_pass,
|
||||
&device->meta_state.alloc);
|
||||
|
||||
ralloc_free(vs.nir);
|
||||
ralloc_free(fs_1d.nir);
|
||||
ralloc_free(fs_2d.nir);
|
||||
ralloc_free(fs_3d.nir);
|
||||
fail:
|
||||
return result;
|
||||
}
|
||||
1316
src/intel/vulkan/anv_meta_blit2d.c
Normal file
1316
src/intel/vulkan/anv_meta_blit2d.c
Normal file
File diff suppressed because it is too large
Load diff
1070
src/intel/vulkan/anv_meta_clear.c
Normal file
1070
src/intel/vulkan/anv_meta_clear.c
Normal file
File diff suppressed because it is too large
Load diff
462
src/intel/vulkan/anv_meta_copy.c
Normal file
462
src/intel/vulkan/anv_meta_copy.c
Normal file
|
|
@ -0,0 +1,462 @@
|
|||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_meta.h"
|
||||
|
||||
/* Returns the user-provided VkBufferImageCopy::imageExtent in units of
|
||||
* elements rather than texels. One element equals one texel or one block
|
||||
* if Image is uncompressed or compressed, respectively.
|
||||
*/
|
||||
static struct VkExtent3D
|
||||
meta_region_extent_el(const struct anv_image *image,
|
||||
const struct VkExtent3D *extent)
|
||||
{
|
||||
const struct isl_format_layout *isl_layout =
|
||||
anv_format_for_vk_format(image->vk_format)->isl_layout;
|
||||
return anv_sanitize_image_extent(image->type, (VkExtent3D) {
|
||||
.width = DIV_ROUND_UP(extent->width , isl_layout->bw),
|
||||
.height = DIV_ROUND_UP(extent->height, isl_layout->bh),
|
||||
.depth = DIV_ROUND_UP(extent->depth , isl_layout->bd),
|
||||
});
|
||||
}
|
||||
|
||||
/* Returns the user-provided VkBufferImageCopy::imageOffset in units of
|
||||
* elements rather than texels. One element equals one texel or one block
|
||||
* if Image is uncompressed or compressed, respectively.
|
||||
*/
|
||||
static struct VkOffset3D
|
||||
meta_region_offset_el(const struct anv_image *image,
|
||||
const struct VkOffset3D *offset)
|
||||
{
|
||||
const struct isl_format_layout *isl_layout = image->format->isl_layout;
|
||||
return anv_sanitize_image_offset(image->type, (VkOffset3D) {
|
||||
.x = offset->x / isl_layout->bw,
|
||||
.y = offset->y / isl_layout->bh,
|
||||
.z = offset->z / isl_layout->bd,
|
||||
});
|
||||
}
|
||||
|
||||
static struct anv_meta_blit2d_surf
|
||||
blit_surf_for_image(const struct anv_image* image,
|
||||
const struct isl_surf *img_isl_surf)
|
||||
{
|
||||
return (struct anv_meta_blit2d_surf) {
|
||||
.bo = image->bo,
|
||||
.tiling = img_isl_surf->tiling,
|
||||
.base_offset = image->offset,
|
||||
.bs = isl_format_get_layout(img_isl_surf->format)->bs,
|
||||
.pitch = isl_surf_get_row_pitch(img_isl_surf),
|
||||
};
|
||||
}
|
||||
|
||||
static void
|
||||
do_buffer_copy(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *src, uint64_t src_offset,
|
||||
struct anv_bo *dest, uint64_t dest_offset,
|
||||
int width, int height, int bs)
|
||||
{
|
||||
struct anv_meta_blit2d_surf b_src = {
|
||||
.bo = src,
|
||||
.tiling = ISL_TILING_LINEAR,
|
||||
.base_offset = src_offset,
|
||||
.bs = bs,
|
||||
.pitch = width * bs,
|
||||
};
|
||||
struct anv_meta_blit2d_surf b_dst = {
|
||||
.bo = dest,
|
||||
.tiling = ISL_TILING_LINEAR,
|
||||
.base_offset = dest_offset,
|
||||
.bs = bs,
|
||||
.pitch = width * bs,
|
||||
};
|
||||
struct anv_meta_blit2d_rect rect = {
|
||||
.width = width,
|
||||
.height = height,
|
||||
};
|
||||
anv_meta_blit2d(cmd_buffer, &b_src, &b_dst, 1, &rect);
|
||||
}
|
||||
|
||||
static void
|
||||
meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_buffer* buffer,
|
||||
struct anv_image* image,
|
||||
uint32_t regionCount,
|
||||
const VkBufferImageCopy* pRegions,
|
||||
bool forward)
|
||||
{
|
||||
struct anv_meta_saved_state saved_state;
|
||||
|
||||
/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
|
||||
* VK_SAMPLE_COUNT_1_BIT."
|
||||
*/
|
||||
assert(image->samples == 1);
|
||||
|
||||
anv_meta_begin_blit2d(cmd_buffer, &saved_state);
|
||||
|
||||
for (unsigned r = 0; r < regionCount; r++) {
|
||||
|
||||
/**
|
||||
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
|
||||
* extent is the size in texels of the source image to copy in width,
|
||||
* height and depth. 1D images use only x and width. 2D images use x, y,
|
||||
* width and height. 3D images use x, y, z, width, height and depth.
|
||||
*
|
||||
*
|
||||
* Also, convert the offsets and extent from units of texels to units of
|
||||
* blocks - which is the highest resolution accessible in this command.
|
||||
*/
|
||||
const VkOffset3D img_offset_el =
|
||||
meta_region_offset_el(image, &pRegions[r].imageOffset);
|
||||
const VkExtent3D bufferExtent = {
|
||||
.width = pRegions[r].bufferRowLength,
|
||||
.height = pRegions[r].bufferImageHeight,
|
||||
};
|
||||
|
||||
/* Start creating blit rect */
|
||||
const VkExtent3D buf_extent_el =
|
||||
meta_region_extent_el(image, &bufferExtent);
|
||||
const VkExtent3D img_extent_el =
|
||||
meta_region_extent_el(image, &pRegions[r].imageExtent);
|
||||
struct anv_meta_blit2d_rect rect = {
|
||||
.width = MAX2(buf_extent_el.width, img_extent_el.width),
|
||||
.height = MAX2(buf_extent_el.height, img_extent_el.height),
|
||||
};
|
||||
|
||||
/* Create blit surfaces */
|
||||
VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
|
||||
const struct isl_surf *img_isl_surf =
|
||||
&anv_image_get_surface_for_aspect_mask(image, aspect)->isl;
|
||||
struct anv_meta_blit2d_surf img_bsurf =
|
||||
blit_surf_for_image(image, img_isl_surf);
|
||||
struct anv_meta_blit2d_surf buf_bsurf = {
|
||||
.bo = buffer->bo,
|
||||
.tiling = ISL_TILING_LINEAR,
|
||||
.base_offset = buffer->offset + pRegions[r].bufferOffset,
|
||||
.bs = forward ? image->format->isl_layout->bs : img_bsurf.bs,
|
||||
.pitch = rect.width * buf_bsurf.bs,
|
||||
};
|
||||
|
||||
/* Set direction-dependent variables */
|
||||
struct anv_meta_blit2d_surf *dst_bsurf = forward ? &img_bsurf : &buf_bsurf;
|
||||
struct anv_meta_blit2d_surf *src_bsurf = forward ? &buf_bsurf : &img_bsurf;
|
||||
uint32_t *x_offset = forward ? &rect.dst_x : &rect.src_x;
|
||||
uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y;
|
||||
|
||||
/* Loop through each 3D or array slice */
|
||||
unsigned num_slices_3d = img_extent_el.depth;
|
||||
unsigned num_slices_array = pRegions[r].imageSubresource.layerCount;
|
||||
unsigned slice_3d = 0;
|
||||
unsigned slice_array = 0;
|
||||
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
|
||||
|
||||
/* Finish creating blit rect */
|
||||
isl_surf_get_image_offset_el(img_isl_surf,
|
||||
pRegions[r].imageSubresource.mipLevel,
|
||||
pRegions[r].imageSubresource.baseArrayLayer
|
||||
+ slice_array,
|
||||
img_offset_el.z + slice_3d,
|
||||
x_offset,
|
||||
y_offset);
|
||||
*x_offset += img_offset_el.x;
|
||||
*y_offset += img_offset_el.y;
|
||||
|
||||
/* Perform Blit */
|
||||
anv_meta_blit2d(cmd_buffer, src_bsurf, dst_bsurf, 1, &rect);
|
||||
|
||||
/* Once we've done the blit, all of the actual information about
|
||||
* the image is embedded in the command buffer so we can just
|
||||
* increment the offset directly in the image effectively
|
||||
* re-binding it to different backing memory.
|
||||
*/
|
||||
buf_bsurf.base_offset += rect.width * rect.height * buf_bsurf.bs;
|
||||
|
||||
if (image->type == VK_IMAGE_TYPE_3D)
|
||||
slice_3d++;
|
||||
else
|
||||
slice_array++;
|
||||
}
|
||||
}
|
||||
anv_meta_end_blit2d(cmd_buffer, &saved_state);
|
||||
}
|
||||
|
||||
void anv_CmdCopyBufferToImage(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkBuffer srcBuffer,
|
||||
VkImage destImage,
|
||||
VkImageLayout destImageLayout,
|
||||
uint32_t regionCount,
|
||||
const VkBufferImageCopy* pRegions)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_image, dest_image, destImage);
|
||||
ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
|
||||
|
||||
meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
|
||||
regionCount, pRegions, true);
|
||||
}
|
||||
|
||||
void anv_CmdCopyImageToBuffer(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkImage srcImage,
|
||||
VkImageLayout srcImageLayout,
|
||||
VkBuffer destBuffer,
|
||||
uint32_t regionCount,
|
||||
const VkBufferImageCopy* pRegions)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_image, src_image, srcImage);
|
||||
ANV_FROM_HANDLE(anv_buffer, dst_buffer, destBuffer);
|
||||
|
||||
meta_copy_buffer_to_image(cmd_buffer, dst_buffer, src_image,
|
||||
regionCount, pRegions, false);
|
||||
}
|
||||
|
||||
void anv_CmdCopyImage(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkImage srcImage,
|
||||
VkImageLayout srcImageLayout,
|
||||
VkImage destImage,
|
||||
VkImageLayout destImageLayout,
|
||||
uint32_t regionCount,
|
||||
const VkImageCopy* pRegions)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_image, src_image, srcImage);
|
||||
ANV_FROM_HANDLE(anv_image, dest_image, destImage);
|
||||
struct anv_meta_saved_state saved_state;
|
||||
|
||||
/* From the Vulkan 1.0 spec:
|
||||
*
|
||||
* vkCmdCopyImage can be used to copy image data between multisample
|
||||
* images, but both images must have the same number of samples.
|
||||
*/
|
||||
assert(src_image->samples == dest_image->samples);
|
||||
|
||||
anv_meta_begin_blit2d(cmd_buffer, &saved_state);
|
||||
|
||||
for (unsigned r = 0; r < regionCount; r++) {
|
||||
assert(pRegions[r].srcSubresource.aspectMask ==
|
||||
pRegions[r].dstSubresource.aspectMask);
|
||||
|
||||
VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask;
|
||||
|
||||
/* Create blit surfaces */
|
||||
struct isl_surf *src_isl_surf =
|
||||
&anv_image_get_surface_for_aspect_mask(src_image, aspect)->isl;
|
||||
struct isl_surf *dst_isl_surf =
|
||||
&anv_image_get_surface_for_aspect_mask(dest_image, aspect)->isl;
|
||||
struct anv_meta_blit2d_surf b_src =
|
||||
blit_surf_for_image(src_image, src_isl_surf);
|
||||
struct anv_meta_blit2d_surf b_dst =
|
||||
blit_surf_for_image(dest_image, dst_isl_surf);
|
||||
|
||||
/**
|
||||
* From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images
|
||||
* imageExtent is the size in texels of the image to copy in width, height
|
||||
* and depth. 1D images use only x and width. 2D images use x, y, width
|
||||
* and height. 3D images use x, y, z, width, height and depth.
|
||||
*
|
||||
* Also, convert the offsets and extent from units of texels to units of
|
||||
* blocks - which is the highest resolution accessible in this command.
|
||||
*/
|
||||
const VkOffset3D dst_offset_el =
|
||||
meta_region_offset_el(dest_image, &pRegions[r].dstOffset);
|
||||
const VkOffset3D src_offset_el =
|
||||
meta_region_offset_el(src_image, &pRegions[r].srcOffset);
|
||||
const VkExtent3D img_extent_el =
|
||||
meta_region_extent_el(src_image, &pRegions[r].extent);
|
||||
|
||||
/* Start creating blit rect */
|
||||
struct anv_meta_blit2d_rect rect = {
|
||||
.width = img_extent_el.width,
|
||||
.height = img_extent_el.height,
|
||||
};
|
||||
|
||||
/* Loop through each 3D or array slice */
|
||||
unsigned num_slices_3d = img_extent_el.depth;
|
||||
unsigned num_slices_array = pRegions[r].dstSubresource.layerCount;
|
||||
unsigned slice_3d = 0;
|
||||
unsigned slice_array = 0;
|
||||
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
|
||||
|
||||
/* Finish creating blit rect */
|
||||
isl_surf_get_image_offset_el(dst_isl_surf,
|
||||
pRegions[r].dstSubresource.mipLevel,
|
||||
pRegions[r].dstSubresource.baseArrayLayer
|
||||
+ slice_array,
|
||||
dst_offset_el.z + slice_3d,
|
||||
&rect.dst_x,
|
||||
&rect.dst_y);
|
||||
isl_surf_get_image_offset_el(src_isl_surf,
|
||||
pRegions[r].srcSubresource.mipLevel,
|
||||
pRegions[r].srcSubresource.baseArrayLayer
|
||||
+ slice_array,
|
||||
src_offset_el.z + slice_3d,
|
||||
&rect.src_x,
|
||||
&rect.src_y);
|
||||
rect.dst_x += dst_offset_el.x;
|
||||
rect.dst_y += dst_offset_el.y;
|
||||
rect.src_x += src_offset_el.x;
|
||||
rect.src_y += src_offset_el.y;
|
||||
|
||||
/* Perform Blit */
|
||||
anv_meta_blit2d(cmd_buffer, &b_src, &b_dst, 1, &rect);
|
||||
|
||||
if (dest_image->type == VK_IMAGE_TYPE_3D)
|
||||
slice_3d++;
|
||||
else
|
||||
slice_array++;
|
||||
}
|
||||
}
|
||||
|
||||
anv_meta_end_blit2d(cmd_buffer, &saved_state);
|
||||
}
|
||||
|
||||
void anv_CmdCopyBuffer(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkBuffer srcBuffer,
|
||||
VkBuffer destBuffer,
|
||||
uint32_t regionCount,
|
||||
const VkBufferCopy* pRegions)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer);
|
||||
|
||||
struct anv_meta_saved_state saved_state;
|
||||
|
||||
anv_meta_begin_blit2d(cmd_buffer, &saved_state);
|
||||
|
||||
for (unsigned r = 0; r < regionCount; r++) {
|
||||
uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
|
||||
uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
|
||||
uint64_t copy_size = pRegions[r].size;
|
||||
|
||||
/* First, we compute the biggest format that can be used with the
|
||||
* given offsets and size.
|
||||
*/
|
||||
int bs = 16;
|
||||
|
||||
int fs = ffs(src_offset) - 1;
|
||||
if (fs != -1)
|
||||
bs = MIN2(bs, 1 << fs);
|
||||
assert(src_offset % bs == 0);
|
||||
|
||||
fs = ffs(dest_offset) - 1;
|
||||
if (fs != -1)
|
||||
bs = MIN2(bs, 1 << fs);
|
||||
assert(dest_offset % bs == 0);
|
||||
|
||||
fs = ffs(pRegions[r].size) - 1;
|
||||
if (fs != -1)
|
||||
bs = MIN2(bs, 1 << fs);
|
||||
assert(pRegions[r].size % bs == 0);
|
||||
|
||||
/* This is maximum possible width/height our HW can handle */
|
||||
uint64_t max_surface_dim = 1 << 14;
|
||||
|
||||
/* First, we make a bunch of max-sized copies */
|
||||
uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs;
|
||||
while (copy_size >= max_copy_size) {
|
||||
do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
|
||||
dest_buffer->bo, dest_offset,
|
||||
max_surface_dim, max_surface_dim, bs);
|
||||
copy_size -= max_copy_size;
|
||||
src_offset += max_copy_size;
|
||||
dest_offset += max_copy_size;
|
||||
}
|
||||
|
||||
uint64_t height = copy_size / (max_surface_dim * bs);
|
||||
assert(height < max_surface_dim);
|
||||
if (height != 0) {
|
||||
uint64_t rect_copy_size = height * max_surface_dim * bs;
|
||||
do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
|
||||
dest_buffer->bo, dest_offset,
|
||||
max_surface_dim, height, bs);
|
||||
copy_size -= rect_copy_size;
|
||||
src_offset += rect_copy_size;
|
||||
dest_offset += rect_copy_size;
|
||||
}
|
||||
|
||||
if (copy_size != 0) {
|
||||
do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
|
||||
dest_buffer->bo, dest_offset,
|
||||
copy_size / bs, 1, bs);
|
||||
}
|
||||
}
|
||||
|
||||
anv_meta_end_blit2d(cmd_buffer, &saved_state);
|
||||
}
|
||||
|
||||
void anv_CmdUpdateBuffer(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkBuffer dstBuffer,
|
||||
VkDeviceSize dstOffset,
|
||||
VkDeviceSize dataSize,
|
||||
const uint32_t* pData)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
|
||||
struct anv_meta_saved_state saved_state;
|
||||
|
||||
anv_meta_begin_blit2d(cmd_buffer, &saved_state);
|
||||
|
||||
/* We can't quite grab a full block because the state stream needs a
|
||||
* little data at the top to build its linked list.
|
||||
*/
|
||||
const uint32_t max_update_size =
|
||||
cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
|
||||
|
||||
assert(max_update_size < (1 << 14) * 4);
|
||||
|
||||
while (dataSize) {
|
||||
const uint32_t copy_size = MIN2(dataSize, max_update_size);
|
||||
|
||||
struct anv_state tmp_data =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
|
||||
|
||||
memcpy(tmp_data.map, pData, copy_size);
|
||||
|
||||
int bs;
|
||||
if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) {
|
||||
bs = 16;
|
||||
} else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) {
|
||||
bs = 8;
|
||||
} else {
|
||||
assert((copy_size & 3) == 0 && (dstOffset & 3) == 0);
|
||||
bs = 4;
|
||||
}
|
||||
|
||||
do_buffer_copy(cmd_buffer,
|
||||
&cmd_buffer->device->dynamic_state_block_pool.bo,
|
||||
tmp_data.offset,
|
||||
dst_buffer->bo, dst_buffer->offset + dstOffset,
|
||||
copy_size / bs, 1, bs);
|
||||
|
||||
dataSize -= copy_size;
|
||||
dstOffset += copy_size;
|
||||
pData = (void *)pData + copy_size;
|
||||
}
|
||||
|
||||
anv_meta_end_blit2d(cmd_buffer, &saved_state);
|
||||
}
|
||||
870
src/intel/vulkan/anv_meta_resolve.c
Normal file
870
src/intel/vulkan/anv_meta_resolve.c
Normal file
|
|
@ -0,0 +1,870 @@
|
|||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "anv_meta.h"
|
||||
#include "anv_private.h"
|
||||
#include "nir/nir_builder.h"
|
||||
|
||||
/**
|
||||
* Vertex attributes used by all pipelines.
|
||||
*/
|
||||
struct vertex_attrs {
|
||||
struct anv_vue_header vue_header;
|
||||
float position[2]; /**< 3DPRIM_RECTLIST */
|
||||
float tex_position[2];
|
||||
};
|
||||
|
||||
static void
|
||||
meta_resolve_save(struct anv_meta_saved_state *saved_state,
|
||||
struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
anv_meta_save(saved_state, cmd_buffer, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
meta_resolve_restore(struct anv_meta_saved_state *saved_state,
|
||||
struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
anv_meta_restore(saved_state, cmd_buffer);
|
||||
}
|
||||
|
||||
static VkPipeline *
|
||||
get_pipeline_h(struct anv_device *device, uint32_t samples)
|
||||
{
|
||||
uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */
|
||||
|
||||
assert(samples >= 2);
|
||||
assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines));
|
||||
|
||||
return &device->meta_state.resolve.pipelines[i];
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_nir_vs(void)
|
||||
{
|
||||
const struct glsl_type *vec4 = glsl_vec4_type();
|
||||
|
||||
nir_builder b;
|
||||
nir_variable *a_position;
|
||||
nir_variable *v_position;
|
||||
nir_variable *a_tex_position;
|
||||
nir_variable *v_tex_position;
|
||||
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
|
||||
b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
|
||||
|
||||
a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
|
||||
"a_position");
|
||||
a_position->data.location = VERT_ATTRIB_GENERIC0;
|
||||
|
||||
v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
|
||||
"gl_Position");
|
||||
v_position->data.location = VARYING_SLOT_POS;
|
||||
|
||||
a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
|
||||
"a_tex_position");
|
||||
a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
|
||||
|
||||
v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
|
||||
"v_tex_position");
|
||||
v_tex_position->data.location = VARYING_SLOT_VAR0;
|
||||
|
||||
nir_copy_var(&b, v_position, a_position);
|
||||
nir_copy_var(&b, v_tex_position, a_tex_position);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_nir_fs(uint32_t num_samples)
|
||||
{
|
||||
const struct glsl_type *vec4 = glsl_vec4_type();
|
||||
|
||||
const struct glsl_type *sampler2DMS =
|
||||
glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
|
||||
/*is_shadow*/ false,
|
||||
/*is_array*/ false,
|
||||
GLSL_TYPE_FLOAT);
|
||||
|
||||
nir_builder b;
|
||||
nir_variable *u_tex; /* uniform sampler */
|
||||
nir_variable *v_position; /* vec4, varying fragment position */
|
||||
nir_variable *v_tex_position; /* vec4, varying texture coordinate */
|
||||
nir_variable *f_color; /* vec4, fragment output color */
|
||||
nir_ssa_def *accum; /* vec4, accumulation of sample values */
|
||||
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
b.shader->info.name = ralloc_asprintf(b.shader,
|
||||
"meta_resolve_fs_samples%02d",
|
||||
num_samples);
|
||||
|
||||
u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS,
|
||||
"u_tex");
|
||||
u_tex->data.descriptor_set = 0;
|
||||
u_tex->data.binding = 0;
|
||||
|
||||
v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
|
||||
"v_position");
|
||||
v_position->data.location = VARYING_SLOT_POS;
|
||||
v_position->data.origin_upper_left = true;
|
||||
|
||||
v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
|
||||
"v_tex_position");
|
||||
v_tex_position->data.location = VARYING_SLOT_VAR0;
|
||||
|
||||
f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
|
||||
"f_color");
|
||||
f_color->data.location = FRAG_RESULT_DATA0;
|
||||
|
||||
accum = nir_imm_vec4(&b, 0, 0, 0, 0);
|
||||
|
||||
nir_ssa_def *tex_position_ivec =
|
||||
nir_f2i(&b, nir_load_var(&b, v_tex_position));
|
||||
|
||||
for (uint32_t i = 0; i < num_samples; ++i) {
|
||||
nir_tex_instr *tex;
|
||||
|
||||
tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2);
|
||||
tex->texture = nir_deref_var_create(tex, u_tex);
|
||||
tex->sampler = nir_deref_var_create(tex, u_tex);
|
||||
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
|
||||
tex->op = nir_texop_txf_ms;
|
||||
tex->src[0].src = nir_src_for_ssa(tex_position_ivec);
|
||||
tex->src[0].src_type = nir_tex_src_coord;
|
||||
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
|
||||
tex->src[1].src_type = nir_tex_src_ms_index;
|
||||
tex->dest_type = nir_type_float;
|
||||
tex->is_array = false;
|
||||
tex->coord_components = 3;
|
||||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
|
||||
nir_builder_instr_insert(&b, &tex->instr);
|
||||
|
||||
accum = nir_fadd(&b, accum, &tex->dest.ssa);
|
||||
}
|
||||
|
||||
accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples));
|
||||
nir_store_var(&b, f_color, accum, /*writemask*/ 4);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
create_pass(struct anv_device *device)
|
||||
{
|
||||
VkResult result;
|
||||
VkDevice device_h = anv_device_to_handle(device);
|
||||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
|
||||
|
||||
result = anv_CreateRenderPass(device_h,
|
||||
&(VkRenderPassCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &(VkAttachmentDescription) {
|
||||
.format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
|
||||
.samples = 1,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
},
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &(VkSubpassDescription) {
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.inputAttachmentCount = 0,
|
||||
.colorAttachmentCount = 1,
|
||||
.pColorAttachments = &(VkAttachmentReference) {
|
||||
.attachment = 0,
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
},
|
||||
.pResolveAttachments = NULL,
|
||||
.pDepthStencilAttachment = &(VkAttachmentReference) {
|
||||
.attachment = VK_ATTACHMENT_UNUSED,
|
||||
},
|
||||
.preserveAttachmentCount = 0,
|
||||
.pPreserveAttachments = NULL,
|
||||
},
|
||||
.dependencyCount = 0,
|
||||
},
|
||||
alloc,
|
||||
&device->meta_state.resolve.pass);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
create_pipeline(struct anv_device *device,
|
||||
uint32_t num_samples,
|
||||
VkShaderModule vs_module_h)
|
||||
{
|
||||
VkResult result;
|
||||
VkDevice device_h = anv_device_to_handle(device);
|
||||
|
||||
struct anv_shader_module fs_module = {
|
||||
.nir = build_nir_fs(num_samples),
|
||||
};
|
||||
|
||||
if (!fs_module.nir) {
|
||||
/* XXX: Need more accurate error */
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
result = anv_graphics_pipeline_create(device_h,
|
||||
VK_NULL_HANDLE,
|
||||
&(VkGraphicsPipelineCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.stageCount = 2,
|
||||
.pStages = (VkPipelineShaderStageCreateInfo[]) {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
||||
.module = vs_module_h,
|
||||
.pName = "main",
|
||||
},
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
.module = anv_shader_module_to_handle(&fs_module),
|
||||
.pName = "main",
|
||||
},
|
||||
},
|
||||
.pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.vertexBindingDescriptionCount = 1,
|
||||
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
|
||||
{
|
||||
.binding = 0,
|
||||
.stride = sizeof(struct vertex_attrs),
|
||||
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
|
||||
},
|
||||
},
|
||||
.vertexAttributeDescriptionCount = 3,
|
||||
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
|
||||
{
|
||||
/* VUE Header */
|
||||
.location = 0,
|
||||
.binding = 0,
|
||||
.format = VK_FORMAT_R32G32B32A32_UINT,
|
||||
.offset = offsetof(struct vertex_attrs, vue_header),
|
||||
},
|
||||
{
|
||||
/* Position */
|
||||
.location = 1,
|
||||
.binding = 0,
|
||||
.format = VK_FORMAT_R32G32_SFLOAT,
|
||||
.offset = offsetof(struct vertex_attrs, position),
|
||||
},
|
||||
{
|
||||
/* Texture Coordinate */
|
||||
.location = 2,
|
||||
.binding = 0,
|
||||
.format = VK_FORMAT_R32G32_SFLOAT,
|
||||
.offset = offsetof(struct vertex_attrs, tex_position),
|
||||
},
|
||||
},
|
||||
},
|
||||
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
|
||||
.primitiveRestartEnable = false,
|
||||
},
|
||||
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.viewportCount = 1,
|
||||
.scissorCount = 1,
|
||||
},
|
||||
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.depthClampEnable = false,
|
||||
.rasterizerDiscardEnable = false,
|
||||
.polygonMode = VK_POLYGON_MODE_FILL,
|
||||
.cullMode = VK_CULL_MODE_NONE,
|
||||
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
|
||||
},
|
||||
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.rasterizationSamples = 1,
|
||||
.sampleShadingEnable = false,
|
||||
.pSampleMask = (VkSampleMask[]) { 0x1 },
|
||||
.alphaToCoverageEnable = false,
|
||||
.alphaToOneEnable = false,
|
||||
},
|
||||
.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.logicOpEnable = false,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = (VkPipelineColorBlendAttachmentState []) {
|
||||
{
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
|
||||
VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT |
|
||||
VK_COLOR_COMPONENT_A_BIT,
|
||||
},
|
||||
},
|
||||
},
|
||||
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.dynamicStateCount = 2,
|
||||
.pDynamicStates = (VkDynamicState[]) {
|
||||
VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
},
|
||||
},
|
||||
.layout = device->meta_state.resolve.pipeline_layout,
|
||||
.renderPass = device->meta_state.resolve.pass,
|
||||
.subpass = 0,
|
||||
},
|
||||
&(struct anv_graphics_pipeline_create_info) {
|
||||
.color_attachment_count = -1,
|
||||
.use_repclear = false,
|
||||
.disable_vs = true,
|
||||
.use_rectlist = true
|
||||
},
|
||||
&device->meta_state.alloc,
|
||||
get_pipeline_h(device, num_samples));
|
||||
if (result != VK_SUCCESS)
|
||||
goto cleanup;
|
||||
|
||||
goto cleanup;
|
||||
|
||||
cleanup:
|
||||
ralloc_free(fs_module.nir);
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
anv_device_finish_meta_resolve_state(struct anv_device *device)
|
||||
{
|
||||
struct anv_meta_state *state = &device->meta_state;
|
||||
VkDevice device_h = anv_device_to_handle(device);
|
||||
VkRenderPass pass_h = device->meta_state.resolve.pass;
|
||||
VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout;
|
||||
VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout;
|
||||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
|
||||
|
||||
if (pass_h)
|
||||
ANV_CALL(DestroyRenderPass)(device_h, pass_h,
|
||||
&device->meta_state.alloc);
|
||||
|
||||
if (pipeline_layout_h)
|
||||
ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc);
|
||||
|
||||
if (ds_layout_h)
|
||||
ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) {
|
||||
VkPipeline pipeline_h = state->resolve.pipelines[i];
|
||||
|
||||
if (pipeline_h) {
|
||||
ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_init_meta_resolve_state(struct anv_device *device)
|
||||
{
|
||||
VkResult res = VK_SUCCESS;
|
||||
VkDevice device_h = anv_device_to_handle(device);
|
||||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
|
||||
|
||||
const isl_sample_count_mask_t sample_count_mask =
|
||||
isl_device_get_sample_counts(&device->isl_dev);
|
||||
|
||||
zero(device->meta_state.resolve);
|
||||
|
||||
struct anv_shader_module vs_module = { .nir = build_nir_vs() };
|
||||
if (!vs_module.nir) {
|
||||
/* XXX: Need more accurate error */
|
||||
res = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module);
|
||||
|
||||
res = anv_CreateDescriptorSetLayout(device_h,
|
||||
&(VkDescriptorSetLayoutCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.bindingCount = 1,
|
||||
.pBindings = (VkDescriptorSetLayoutBinding[]) {
|
||||
{
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
},
|
||||
},
|
||||
},
|
||||
alloc,
|
||||
&device->meta_state.resolve.ds_layout);
|
||||
if (res != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
res = anv_CreatePipelineLayout(device_h,
|
||||
&(VkPipelineLayoutCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = (VkDescriptorSetLayout[]) {
|
||||
device->meta_state.resolve.ds_layout,
|
||||
},
|
||||
},
|
||||
alloc,
|
||||
&device->meta_state.resolve.pipeline_layout);
|
||||
if (res != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
res = create_pass(device);
|
||||
if (res != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
for (uint32_t i = 0;
|
||||
i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) {
|
||||
|
||||
uint32_t sample_count = 1 << (1 + i);
|
||||
if (!(sample_count_mask & sample_count))
|
||||
continue;
|
||||
|
||||
res = create_pipeline(device, sample_count, vs_module_h);
|
||||
if (res != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
goto cleanup;
|
||||
|
||||
fail:
|
||||
anv_device_finish_meta_resolve_state(device);
|
||||
|
||||
cleanup:
|
||||
ralloc_free(vs_module.nir);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_resolve(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_image_view *src_iview,
|
||||
const VkOffset2D *src_offset,
|
||||
struct anv_image_view *dest_iview,
|
||||
const VkOffset2D *dest_offset,
|
||||
const VkExtent2D *resolve_extent)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
VkDevice device_h = anv_device_to_handle(device);
|
||||
VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
|
||||
const struct anv_image *src_image = src_iview->image;
|
||||
|
||||
const struct vertex_attrs vertex_data[3] = {
|
||||
{
|
||||
.vue_header = {0},
|
||||
.position = {
|
||||
dest_offset->x + resolve_extent->width,
|
||||
dest_offset->y + resolve_extent->height,
|
||||
},
|
||||
.tex_position = {
|
||||
src_offset->x + resolve_extent->width,
|
||||
src_offset->y + resolve_extent->height,
|
||||
},
|
||||
},
|
||||
{
|
||||
.vue_header = {0},
|
||||
.position = {
|
||||
dest_offset->x,
|
||||
dest_offset->y + resolve_extent->height,
|
||||
},
|
||||
.tex_position = {
|
||||
src_offset->x,
|
||||
src_offset->y + resolve_extent->height,
|
||||
},
|
||||
},
|
||||
{
|
||||
.vue_header = {0},
|
||||
.position = {
|
||||
dest_offset->x,
|
||||
dest_offset->y,
|
||||
},
|
||||
.tex_position = {
|
||||
src_offset->x,
|
||||
src_offset->y,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
struct anv_state vertex_mem =
|
||||
anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data,
|
||||
sizeof(vertex_data), 16);
|
||||
|
||||
struct anv_buffer vertex_buffer = {
|
||||
.device = device,
|
||||
.size = sizeof(vertex_data),
|
||||
.bo = &cmd_buffer->dynamic_state_stream.block_pool->bo,
|
||||
.offset = vertex_mem.offset,
|
||||
};
|
||||
|
||||
VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer);
|
||||
|
||||
anv_CmdBindVertexBuffers(cmd_buffer_h,
|
||||
/*firstBinding*/ 0,
|
||||
/*bindingCount*/ 1,
|
||||
(VkBuffer[]) { vertex_buffer_h },
|
||||
(VkDeviceSize[]) { 0 });
|
||||
|
||||
VkSampler sampler_h;
|
||||
ANV_CALL(CreateSampler)(device_h,
|
||||
&(VkSamplerCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||
.magFilter = VK_FILTER_NEAREST,
|
||||
.minFilter = VK_FILTER_NEAREST,
|
||||
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
|
||||
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
.mipLodBias = 0.0,
|
||||
.anisotropyEnable = false,
|
||||
.compareEnable = false,
|
||||
.minLod = 0.0,
|
||||
.maxLod = 0.0,
|
||||
.unnormalizedCoordinates = false,
|
||||
},
|
||||
&cmd_buffer->pool->alloc,
|
||||
&sampler_h);
|
||||
|
||||
VkDescriptorPool desc_pool;
|
||||
anv_CreateDescriptorPool(anv_device_to_handle(device),
|
||||
&(const VkDescriptorPoolCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.pNext = NULL,
|
||||
.flags = 0,
|
||||
.maxSets = 1,
|
||||
.poolSizeCount = 1,
|
||||
.pPoolSizes = (VkDescriptorPoolSize[]) {
|
||||
{
|
||||
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1
|
||||
},
|
||||
}
|
||||
}, &cmd_buffer->pool->alloc, &desc_pool);
|
||||
|
||||
VkDescriptorSet desc_set_h;
|
||||
anv_AllocateDescriptorSets(device_h,
|
||||
&(VkDescriptorSetAllocateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.descriptorPool = desc_pool,
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = (VkDescriptorSetLayout[]) {
|
||||
device->meta_state.resolve.ds_layout,
|
||||
},
|
||||
},
|
||||
&desc_set_h);
|
||||
|
||||
anv_UpdateDescriptorSets(device_h,
|
||||
/*writeCount*/ 1,
|
||||
(VkWriteDescriptorSet[]) {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = desc_set_h,
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.pImageInfo = (VkDescriptorImageInfo[]) {
|
||||
{
|
||||
.sampler = sampler_h,
|
||||
.imageView = anv_image_view_to_handle(src_iview),
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
/*copyCount*/ 0,
|
||||
/*copies */ NULL);
|
||||
|
||||
VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples);
|
||||
ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h);
|
||||
|
||||
if (cmd_buffer->state.pipeline != pipeline) {
|
||||
anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
pipeline_h);
|
||||
}
|
||||
|
||||
anv_CmdBindDescriptorSets(cmd_buffer_h,
|
||||
VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
device->meta_state.resolve.pipeline_layout,
|
||||
/*firstSet*/ 0,
|
||||
/* setCount */ 1,
|
||||
(VkDescriptorSet[]) {
|
||||
desc_set_h,
|
||||
},
|
||||
/*copyCount*/ 0,
|
||||
/*copies */ NULL);
|
||||
|
||||
ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
|
||||
|
||||
/* All objects below are consumed by the draw call. We may safely destroy
|
||||
* them.
|
||||
*/
|
||||
anv_DestroyDescriptorPool(anv_device_to_handle(device),
|
||||
desc_pool, &cmd_buffer->pool->alloc);
|
||||
anv_DestroySampler(device_h, sampler_h,
|
||||
&cmd_buffer->pool->alloc);
|
||||
}
|
||||
|
||||
void anv_CmdResolveImage(
|
||||
VkCommandBuffer cmd_buffer_h,
|
||||
VkImage src_image_h,
|
||||
VkImageLayout src_image_layout,
|
||||
VkImage dest_image_h,
|
||||
VkImageLayout dest_image_layout,
|
||||
uint32_t region_count,
|
||||
const VkImageResolve* regions)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h);
|
||||
ANV_FROM_HANDLE(anv_image, src_image, src_image_h);
|
||||
ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_meta_saved_state state;
|
||||
VkDevice device_h = anv_device_to_handle(device);
|
||||
|
||||
meta_resolve_save(&state, cmd_buffer);
|
||||
|
||||
assert(src_image->samples > 1);
|
||||
assert(dest_image->samples == 1);
|
||||
|
||||
if (src_image->samples >= 16) {
|
||||
/* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
|
||||
* glBlitFramebuffer workaround for samples >= 16.
|
||||
*/
|
||||
anv_finishme("vkCmdResolveImage: need interpolation workaround when "
|
||||
"samples >= 16");
|
||||
}
|
||||
|
||||
if (src_image->array_size > 1)
|
||||
anv_finishme("vkCmdResolveImage: multisample array images");
|
||||
|
||||
for (uint32_t r = 0; r < region_count; ++r) {
|
||||
const VkImageResolve *region = ®ions[r];
|
||||
|
||||
/* From the Vulkan 1.0 spec:
|
||||
*
|
||||
* - The aspectMask member of srcSubresource and dstSubresource must
|
||||
* only contain VK_IMAGE_ASPECT_COLOR_BIT
|
||||
*
|
||||
* - The layerCount member of srcSubresource and dstSubresource must
|
||||
* match
|
||||
*/
|
||||
assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
assert(region->srcSubresource.layerCount ==
|
||||
region->dstSubresource.layerCount);
|
||||
|
||||
const uint32_t src_base_layer =
|
||||
anv_meta_get_iview_layer(src_image, ®ion->srcSubresource,
|
||||
®ion->srcOffset);
|
||||
|
||||
const uint32_t dest_base_layer =
|
||||
anv_meta_get_iview_layer(dest_image, ®ion->dstSubresource,
|
||||
®ion->dstOffset);
|
||||
|
||||
/**
|
||||
* From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
|
||||
*
|
||||
* extent is the size in texels of the source image to resolve in width,
|
||||
* height and depth. 1D images use only x and width. 2D images use x, y,
|
||||
* width and height. 3D images use x, y, z, width, height and depth.
|
||||
*
|
||||
* srcOffset and dstOffset select the initial x, y, and z offsets in
|
||||
* texels of the sub-regions of the source and destination image data.
|
||||
* extent is the size in texels of the source image to resolve in width,
|
||||
* height and depth. 1D images use only x and width. 2D images use x, y,
|
||||
* width and height. 3D images use x, y, z, width, height and depth.
|
||||
*/
|
||||
const struct VkExtent3D extent =
|
||||
anv_sanitize_image_extent(src_image->type, region->extent);
|
||||
const struct VkOffset3D srcOffset =
|
||||
anv_sanitize_image_offset(src_image->type, region->srcOffset);
|
||||
const struct VkOffset3D dstOffset =
|
||||
anv_sanitize_image_offset(dest_image->type, region->dstOffset);
|
||||
|
||||
|
||||
for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
|
||||
++layer) {
|
||||
|
||||
struct anv_image_view src_iview;
|
||||
anv_image_view_init(&src_iview, cmd_buffer->device,
|
||||
&(VkImageViewCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.image = src_image_h,
|
||||
.viewType = anv_meta_get_view_type(src_image),
|
||||
.format = src_image->format->vk_format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = region->srcSubresource.mipLevel,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = src_base_layer + layer,
|
||||
.layerCount = 1,
|
||||
},
|
||||
},
|
||||
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
|
||||
|
||||
struct anv_image_view dest_iview;
|
||||
anv_image_view_init(&dest_iview, cmd_buffer->device,
|
||||
&(VkImageViewCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.image = dest_image_h,
|
||||
.viewType = anv_meta_get_view_type(dest_image),
|
||||
.format = dest_image->format->vk_format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = region->dstSubresource.mipLevel,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = dest_base_layer + layer,
|
||||
.layerCount = 1,
|
||||
},
|
||||
},
|
||||
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
|
||||
|
||||
VkFramebuffer fb_h;
|
||||
anv_CreateFramebuffer(device_h,
|
||||
&(VkFramebufferCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = (VkImageView[]) {
|
||||
anv_image_view_to_handle(&dest_iview),
|
||||
},
|
||||
.width = anv_minify(dest_image->extent.width,
|
||||
region->dstSubresource.mipLevel),
|
||||
.height = anv_minify(dest_image->extent.height,
|
||||
region->dstSubresource.mipLevel),
|
||||
.layers = 1
|
||||
},
|
||||
&cmd_buffer->pool->alloc,
|
||||
&fb_h);
|
||||
|
||||
ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h,
|
||||
&(VkRenderPassBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = device->meta_state.resolve.pass,
|
||||
.framebuffer = fb_h,
|
||||
.renderArea = {
|
||||
.offset = {
|
||||
dstOffset.x,
|
||||
dstOffset.y,
|
||||
},
|
||||
.extent = {
|
||||
extent.width,
|
||||
extent.height,
|
||||
}
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = NULL,
|
||||
},
|
||||
VK_SUBPASS_CONTENTS_INLINE);
|
||||
|
||||
emit_resolve(cmd_buffer,
|
||||
&src_iview,
|
||||
&(VkOffset2D) {
|
||||
.x = srcOffset.x,
|
||||
.y = srcOffset.y,
|
||||
},
|
||||
&dest_iview,
|
||||
&(VkOffset2D) {
|
||||
.x = dstOffset.x,
|
||||
.y = dstOffset.y,
|
||||
},
|
||||
&(VkExtent2D) {
|
||||
.width = extent.width,
|
||||
.height = extent.height,
|
||||
});
|
||||
|
||||
ANV_CALL(CmdEndRenderPass)(cmd_buffer_h);
|
||||
|
||||
anv_DestroyFramebuffer(device_h, fb_h,
|
||||
&cmd_buffer->pool->alloc);
|
||||
}
|
||||
}
|
||||
|
||||
meta_resolve_restore(&state, cmd_buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit any needed resolves for the current subpass.
|
||||
*/
|
||||
void
|
||||
anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
|
||||
struct anv_subpass *subpass = cmd_buffer->state.subpass;
|
||||
struct anv_meta_saved_state saved_state;
|
||||
|
||||
/* FINISHME(perf): Skip clears for resolve attachments.
|
||||
*
|
||||
* From the Vulkan 1.0 spec:
|
||||
*
|
||||
* If the first use of an attachment in a render pass is as a resolve
|
||||
* attachment, then the loadOp is effectively ignored as the resolve is
|
||||
* guaranteed to overwrite all pixels in the render area.
|
||||
*/
|
||||
|
||||
if (!subpass->has_resolve)
|
||||
return;
|
||||
|
||||
meta_resolve_save(&saved_state, cmd_buffer);
|
||||
|
||||
for (uint32_t i = 0; i < subpass->color_count; ++i) {
|
||||
uint32_t src_att = subpass->color_attachments[i];
|
||||
uint32_t dest_att = subpass->resolve_attachments[i];
|
||||
|
||||
if (dest_att == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
struct anv_image_view *src_iview = fb->attachments[src_att];
|
||||
struct anv_image_view *dest_iview = fb->attachments[dest_att];
|
||||
|
||||
struct anv_subpass resolve_subpass = {
|
||||
.color_count = 1,
|
||||
.color_attachments = (uint32_t[]) { dest_att },
|
||||
.depth_stencil_attachment = VK_ATTACHMENT_UNUSED,
|
||||
};
|
||||
|
||||
anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
|
||||
|
||||
/* Subpass resolves must respect the render area. We can ignore the
|
||||
* render area here because vkCmdBeginRenderPass set the render area
|
||||
* with 3DSTATE_DRAWING_RECTANGLE.
|
||||
*
|
||||
* XXX(chadv): Does the hardware really respect
|
||||
* 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
|
||||
*/
|
||||
emit_resolve(cmd_buffer,
|
||||
src_iview,
|
||||
&(VkOffset2D) { 0, 0 },
|
||||
dest_iview,
|
||||
&(VkOffset2D) { 0, 0 },
|
||||
&(VkExtent2D) { fb->width, fb->height });
|
||||
}
|
||||
|
||||
cmd_buffer->state.subpass = subpass;
|
||||
meta_resolve_restore(&saved_state, cmd_buffer);
|
||||
}
|
||||
45
src/intel/vulkan/anv_nir.h
Normal file
45
src/intel/vulkan/anv_nir.h
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "nir/nir.h"
|
||||
#include "anv_private.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar);
|
||||
|
||||
void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *prog_data);
|
||||
void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
172
src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
Normal file
172
src/intel/vulkan/anv_nir_apply_dynamic_offsets.c
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "nir/nir_builder.h"
|
||||
|
||||
struct apply_dynamic_offsets_state {
|
||||
nir_shader *shader;
|
||||
nir_builder builder;
|
||||
|
||||
const struct anv_pipeline_layout *layout;
|
||||
|
||||
uint32_t indices_start;
|
||||
};
|
||||
|
||||
static bool
|
||||
apply_dynamic_offsets_block(nir_block *block, void *void_state)
|
||||
{
|
||||
struct apply_dynamic_offsets_state *state = void_state;
|
||||
struct anv_descriptor_set_layout *set_layout;
|
||||
|
||||
nir_builder *b = &state->builder;
|
||||
|
||||
nir_foreach_instr_safe(block, instr) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
unsigned block_idx_src;
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
block_idx_src = 0;
|
||||
break;
|
||||
case nir_intrinsic_store_ssbo:
|
||||
block_idx_src = 1;
|
||||
break;
|
||||
default:
|
||||
continue; /* the loop */
|
||||
}
|
||||
|
||||
nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr;
|
||||
assert(res_instr->type == nir_instr_type_intrinsic);
|
||||
nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr);
|
||||
assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
|
||||
|
||||
unsigned set = res_intrin->const_index[0];
|
||||
unsigned binding = res_intrin->const_index[1];
|
||||
|
||||
set_layout = state->layout->set[set].layout;
|
||||
if (set_layout->binding[binding].dynamic_offset_index < 0)
|
||||
continue;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
/* First, we need to generate the uniform load for the buffer offset */
|
||||
uint32_t index = state->layout->set[set].dynamic_offset_start +
|
||||
set_layout->binding[binding].dynamic_offset_index;
|
||||
|
||||
nir_intrinsic_instr *offset_load =
|
||||
nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
|
||||
offset_load->num_components = 2;
|
||||
offset_load->const_index[0] = state->indices_start + index * 8;
|
||||
offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa,
|
||||
nir_imm_int(b, 8)));
|
||||
|
||||
nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, 32, NULL);
|
||||
nir_builder_instr_insert(b, &offset_load->instr);
|
||||
|
||||
nir_src *offset_src = nir_get_io_offset_src(intrin);
|
||||
nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa,
|
||||
&offset_load->dest.ssa);
|
||||
|
||||
/* In order to avoid out-of-bounds access, we predicate */
|
||||
nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1),
|
||||
offset_src->ssa);
|
||||
nir_if *if_stmt = nir_if_create(b->shader);
|
||||
if_stmt->condition = nir_src_for_ssa(pred);
|
||||
nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
|
||||
|
||||
nir_instr_remove(&intrin->instr);
|
||||
*offset_src = nir_src_for_ssa(new_offset);
|
||||
nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr);
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_store_ssbo) {
|
||||
/* It's a load, we need a phi node */
|
||||
nir_phi_instr *phi = nir_phi_instr_create(b->shader);
|
||||
nir_ssa_dest_init(&phi->instr, &phi->dest,
|
||||
intrin->num_components,
|
||||
intrin->dest.ssa.bit_size, NULL);
|
||||
|
||||
nir_phi_src *src1 = ralloc(phi, nir_phi_src);
|
||||
struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list);
|
||||
src1->pred = exec_node_data(nir_block, tnode, cf_node.node);
|
||||
src1->src = nir_src_for_ssa(&intrin->dest.ssa);
|
||||
exec_list_push_tail(&phi->srcs, &src1->node);
|
||||
|
||||
b->cursor = nir_after_cf_list(&if_stmt->else_list);
|
||||
nir_ssa_def *zero = nir_build_imm(b, intrin->num_components,
|
||||
(nir_const_value) { .u32 = { 0, 0, 0, 0 } });
|
||||
|
||||
nir_phi_src *src2 = ralloc(phi, nir_phi_src);
|
||||
struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list);
|
||||
src2->pred = exec_node_data(nir_block, enode, cf_node.node);
|
||||
src2->src = nir_src_for_ssa(zero);
|
||||
exec_list_push_tail(&phi->srcs, &src2->node);
|
||||
|
||||
assert(intrin->dest.is_ssa);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_src_for_ssa(&phi->dest.ssa));
|
||||
|
||||
nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
struct apply_dynamic_offsets_state state = {
|
||||
.shader = shader,
|
||||
.layout = pipeline->layout,
|
||||
.indices_start = shader->num_uniforms,
|
||||
};
|
||||
|
||||
if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets)
|
||||
return;
|
||||
|
||||
nir_foreach_function(shader, function) {
|
||||
if (function->impl) {
|
||||
nir_builder_init(&state.builder, function->impl);
|
||||
nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state);
|
||||
nir_metadata_preserve(function->impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
}
|
||||
}
|
||||
|
||||
struct anv_push_constants *null_data = NULL;
|
||||
for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) {
|
||||
prog_data->param[i * 2 + shader->num_uniforms / 4] =
|
||||
(const union gl_constant_value *)&null_data->dynamic[i].offset;
|
||||
prog_data->param[i * 2 + 1 + shader->num_uniforms / 4] =
|
||||
(const union gl_constant_value *)&null_data->dynamic[i].range;
|
||||
}
|
||||
|
||||
shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8;
|
||||
}
|
||||
387
src/intel/vulkan/anv_nir_apply_pipeline_layout.c
Normal file
387
src/intel/vulkan/anv_nir_apply_pipeline_layout.c
Normal file
|
|
@ -0,0 +1,387 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "nir/nir_builder.h"
|
||||
|
||||
struct apply_pipeline_layout_state {
|
||||
nir_shader *shader;
|
||||
nir_builder builder;
|
||||
|
||||
struct {
|
||||
BITSET_WORD *used;
|
||||
uint8_t *surface_offsets;
|
||||
uint8_t *sampler_offsets;
|
||||
uint8_t *image_offsets;
|
||||
} set[MAX_SETS];
|
||||
};
|
||||
|
||||
static void
|
||||
add_binding(struct apply_pipeline_layout_state *state,
|
||||
uint32_t set, uint32_t binding)
|
||||
{
|
||||
BITSET_SET(state->set[set].used, binding);
|
||||
}
|
||||
|
||||
static void
|
||||
add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var)
|
||||
{
|
||||
add_binding(state, var->data.descriptor_set, var->data.binding);
|
||||
}
|
||||
|
||||
static bool
|
||||
get_used_bindings_block(nir_block *block, void *void_state)
|
||||
{
|
||||
struct apply_pipeline_layout_state *state = void_state;
|
||||
|
||||
nir_foreach_instr_safe(block, instr) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_vulkan_resource_index:
|
||||
add_binding(state, nir_intrinsic_desc_set(intrin),
|
||||
nir_intrinsic_binding(intrin));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_store:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_min:
|
||||
case nir_intrinsic_image_atomic_max:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
case nir_intrinsic_image_size:
|
||||
case nir_intrinsic_image_samples:
|
||||
add_var_binding(state, intrin->variables[0]->var);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
assert(tex->texture);
|
||||
add_var_binding(state, tex->texture->var);
|
||||
if (tex->sampler)
|
||||
add_var_binding(state, tex->sampler->var);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
|
||||
struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
nir_builder *b = &state->builder;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
uint32_t set = nir_intrinsic_desc_set(intrin);
|
||||
uint32_t binding = nir_intrinsic_binding(intrin);
|
||||
|
||||
uint32_t surface_index = state->set[set].surface_offsets[binding];
|
||||
|
||||
nir_const_value *const_block_idx =
|
||||
nir_src_as_const_value(intrin->src[0]);
|
||||
|
||||
nir_ssa_def *block_index;
|
||||
if (const_block_idx) {
|
||||
block_index = nir_imm_int(b, surface_index + const_block_idx->u32[0]);
|
||||
} else {
|
||||
block_index = nir_iadd(b, nir_imm_int(b, surface_index),
|
||||
nir_ssa_for_src(b, intrin->src[0], 1));
|
||||
}
|
||||
|
||||
assert(intrin->dest.is_ssa);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
|
||||
unsigned *const_index, nir_tex_src_type src_type,
|
||||
struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
if (deref->deref.child) {
|
||||
assert(deref->deref.child->deref_type == nir_deref_type_array);
|
||||
nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
|
||||
|
||||
*const_index += deref_array->base_offset;
|
||||
|
||||
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
|
||||
nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
|
||||
tex->num_srcs + 1);
|
||||
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
new_srcs[i].src_type = tex->src[i].src_type;
|
||||
nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src);
|
||||
}
|
||||
|
||||
ralloc_free(tex->src);
|
||||
tex->src = new_srcs;
|
||||
|
||||
/* Now we can go ahead and move the source over to being a
|
||||
* first-class texture source.
|
||||
*/
|
||||
tex->src[tex->num_srcs].src_type = src_type;
|
||||
tex->num_srcs++;
|
||||
assert(deref_array->indirect.is_ssa);
|
||||
nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src,
|
||||
deref_array->indirect);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref)
|
||||
{
|
||||
if (deref->deref.child == NULL)
|
||||
return;
|
||||
|
||||
nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
|
||||
|
||||
if (deref_array->deref_array_type != nir_deref_array_type_indirect)
|
||||
return;
|
||||
|
||||
nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
/* No one should have come by and lowered it already */
|
||||
assert(tex->texture);
|
||||
|
||||
unsigned set = tex->texture->var->data.descriptor_set;
|
||||
unsigned binding = tex->texture->var->data.binding;
|
||||
tex->texture_index = state->set[set].surface_offsets[binding];
|
||||
lower_tex_deref(tex, tex->texture, &tex->texture_index,
|
||||
nir_tex_src_texture_offset, state);
|
||||
|
||||
if (tex->sampler) {
|
||||
unsigned set = tex->sampler->var->data.descriptor_set;
|
||||
unsigned binding = tex->sampler->var->data.binding;
|
||||
tex->sampler_index = state->set[set].sampler_offsets[binding];
|
||||
lower_tex_deref(tex, tex->sampler, &tex->sampler_index,
|
||||
nir_tex_src_sampler_offset, state);
|
||||
}
|
||||
|
||||
/* The backend only ever uses this to mark used surfaces. We don't care
|
||||
* about that little optimization so it just needs to be non-zero.
|
||||
*/
|
||||
tex->texture_array_size = 1;
|
||||
|
||||
cleanup_tex_deref(tex, tex->texture);
|
||||
if (tex->sampler)
|
||||
cleanup_tex_deref(tex, tex->sampler);
|
||||
tex->texture = NULL;
|
||||
tex->sampler = NULL;
|
||||
}
|
||||
|
||||
static bool
|
||||
apply_pipeline_layout_block(nir_block *block, void *void_state)
|
||||
{
|
||||
struct apply_pipeline_layout_state *state = void_state;
|
||||
|
||||
nir_foreach_instr_safe(block, instr) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
|
||||
lower_res_index_intrinsic(intrin, state);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_instr_type_tex:
|
||||
lower_tex(nir_instr_as_tex(instr), state);
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
setup_vec4_uniform_value(const union gl_constant_value **params,
|
||||
const union gl_constant_value *values,
|
||||
unsigned n)
|
||||
{
|
||||
static const gl_constant_value zero = { 0 };
|
||||
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
params[i] = &values[i];
|
||||
|
||||
for (unsigned i = n; i < 4; ++i)
|
||||
params[i] = &zero;
|
||||
}
|
||||
|
||||
void
|
||||
anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map)
|
||||
{
|
||||
struct anv_pipeline_layout *layout = pipeline->layout;
|
||||
|
||||
struct apply_pipeline_layout_state state = {
|
||||
.shader = shader,
|
||||
};
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
for (unsigned s = 0; s < layout->num_sets; s++) {
|
||||
const unsigned count = layout->set[s].layout->binding_count;
|
||||
const unsigned words = BITSET_WORDS(count);
|
||||
state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
|
||||
state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
|
||||
state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
|
||||
state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
|
||||
}
|
||||
|
||||
nir_foreach_function(shader, function) {
|
||||
if (function->impl)
|
||||
nir_foreach_block(function->impl, get_used_bindings_block, &state);
|
||||
}
|
||||
|
||||
for (uint32_t set = 0; set < layout->num_sets; set++) {
|
||||
struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
|
||||
|
||||
BITSET_WORD b, _tmp;
|
||||
BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
|
||||
set_layout->binding_count) {
|
||||
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0)
|
||||
map->surface_count += set_layout->binding[b].array_size;
|
||||
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0)
|
||||
map->sampler_count += set_layout->binding[b].array_size;
|
||||
if (set_layout->binding[b].stage[shader->stage].image_index >= 0)
|
||||
map->image_count += set_layout->binding[b].array_size;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned surface = 0;
|
||||
unsigned sampler = 0;
|
||||
unsigned image = 0;
|
||||
for (uint32_t set = 0; set < layout->num_sets; set++) {
|
||||
struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
|
||||
|
||||
BITSET_WORD b, _tmp;
|
||||
BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
|
||||
set_layout->binding_count) {
|
||||
unsigned array_size = set_layout->binding[b].array_size;
|
||||
unsigned set_offset = set_layout->binding[b].descriptor_index;
|
||||
|
||||
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) {
|
||||
state.set[set].surface_offsets[b] = surface;
|
||||
for (unsigned i = 0; i < array_size; i++) {
|
||||
map->surface_to_descriptor[surface + i].set = set;
|
||||
map->surface_to_descriptor[surface + i].offset = set_offset + i;
|
||||
}
|
||||
surface += array_size;
|
||||
}
|
||||
|
||||
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) {
|
||||
state.set[set].sampler_offsets[b] = sampler;
|
||||
for (unsigned i = 0; i < array_size; i++) {
|
||||
map->sampler_to_descriptor[sampler + i].set = set;
|
||||
map->sampler_to_descriptor[sampler + i].offset = set_offset + i;
|
||||
}
|
||||
sampler += array_size;
|
||||
}
|
||||
|
||||
if (set_layout->binding[b].stage[shader->stage].image_index >= 0) {
|
||||
state.set[set].image_offsets[b] = image;
|
||||
image += array_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nir_foreach_function(shader, function) {
|
||||
if (function->impl) {
|
||||
nir_builder_init(&state.builder, function->impl);
|
||||
nir_foreach_block(function->impl, apply_pipeline_layout_block, &state);
|
||||
nir_metadata_preserve(function->impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
}
|
||||
}
|
||||
|
||||
if (map->image_count > 0) {
|
||||
assert(map->image_count <= MAX_IMAGES);
|
||||
nir_foreach_variable(var, &shader->uniforms) {
|
||||
if (glsl_type_is_image(var->type) ||
|
||||
(glsl_type_is_array(var->type) &&
|
||||
glsl_type_is_image(glsl_get_array_element(var->type)))) {
|
||||
/* Images are represented as uniform push constants and the actual
|
||||
* information required for reading/writing to/from the image is
|
||||
* storred in the uniform.
|
||||
*/
|
||||
unsigned set = var->data.descriptor_set;
|
||||
unsigned binding = var->data.binding;
|
||||
unsigned image_index = state.set[set].image_offsets[binding];
|
||||
|
||||
var->data.driver_location = shader->num_uniforms +
|
||||
image_index * BRW_IMAGE_PARAM_SIZE * 4;
|
||||
}
|
||||
}
|
||||
|
||||
struct anv_push_constants *null_data = NULL;
|
||||
const gl_constant_value **param =
|
||||
prog_data->param + (shader->num_uniforms / 4);
|
||||
const struct brw_image_param *image_param = null_data->images;
|
||||
for (uint32_t i = 0; i < map->image_count; i++) {
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
|
||||
(const union gl_constant_value *)&image_param->surface_idx, 1);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
|
||||
(const union gl_constant_value *)image_param->offset, 2);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
|
||||
(const union gl_constant_value *)image_param->size, 3);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
|
||||
(const union gl_constant_value *)image_param->stride, 4);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
|
||||
(const union gl_constant_value *)image_param->tiling, 3);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
|
||||
(const union gl_constant_value *)image_param->swizzling, 2);
|
||||
|
||||
param += BRW_IMAGE_PARAM_SIZE;
|
||||
image_param ++;
|
||||
}
|
||||
|
||||
shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
|
||||
}
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
77
src/intel/vulkan/anv_nir_lower_push_constants.c
Normal file
77
src/intel/vulkan/anv_nir_lower_push_constants.c
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_nir.h"
|
||||
|
||||
struct lower_push_constants_state {
|
||||
nir_shader *shader;
|
||||
bool is_scalar;
|
||||
};
|
||||
|
||||
static bool
|
||||
lower_push_constants_block(nir_block *block, void *void_state)
|
||||
{
|
||||
struct lower_push_constants_state *state = void_state;
|
||||
|
||||
nir_foreach_instr(block, instr) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
/* TODO: Handle indirect push constants */
|
||||
if (intrin->intrinsic != nir_intrinsic_load_push_constant)
|
||||
continue;
|
||||
|
||||
/* This wont work for vec4 stages. */
|
||||
assert(state->is_scalar);
|
||||
|
||||
assert(intrin->const_index[0] % 4 == 0);
|
||||
assert(intrin->const_index[1] == 128);
|
||||
|
||||
/* We just turn them into uniform loads with the appropreate offset */
|
||||
intrin->intrinsic = nir_intrinsic_load_uniform;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar)
|
||||
{
|
||||
struct lower_push_constants_state state = {
|
||||
.shader = shader,
|
||||
.is_scalar = is_scalar,
|
||||
};
|
||||
|
||||
nir_foreach_function(shader, function) {
|
||||
if (function->impl)
|
||||
nir_foreach_block(function->impl, lower_push_constants_block, &state);
|
||||
}
|
||||
|
||||
assert(shader->num_uniforms % 4 == 0);
|
||||
if (is_scalar)
|
||||
shader->num_uniforms /= 4;
|
||||
else
|
||||
shader->num_uniforms = DIV_ROUND_UP(shader->num_uniforms, 16);
|
||||
}
|
||||
160
src/intel/vulkan/anv_pass.c
Normal file
160
src/intel/vulkan/anv_pass.c
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
VkResult anv_CreateRenderPass(
|
||||
VkDevice _device,
|
||||
const VkRenderPassCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkRenderPass* pRenderPass)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_render_pass *pass;
|
||||
size_t size;
|
||||
size_t attachments_offset;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
|
||||
|
||||
size = sizeof(*pass);
|
||||
size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
|
||||
attachments_offset = size;
|
||||
size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
|
||||
|
||||
pass = anv_alloc2(&device->alloc, pAllocator, size, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (pass == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* Clear the subpasses along with the parent pass. This required because
|
||||
* each array member of anv_subpass must be a valid pointer if not NULL.
|
||||
*/
|
||||
memset(pass, 0, size);
|
||||
pass->attachment_count = pCreateInfo->attachmentCount;
|
||||
pass->subpass_count = pCreateInfo->subpassCount;
|
||||
pass->attachments = (void *) pass + attachments_offset;
|
||||
|
||||
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
|
||||
struct anv_render_pass_attachment *att = &pass->attachments[i];
|
||||
|
||||
att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format);
|
||||
att->samples = pCreateInfo->pAttachments[i].samples;
|
||||
att->load_op = pCreateInfo->pAttachments[i].loadOp;
|
||||
att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
|
||||
// att->store_op = pCreateInfo->pAttachments[i].storeOp;
|
||||
// att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
|
||||
}
|
||||
|
||||
uint32_t subpass_attachment_count = 0, *p;
|
||||
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
|
||||
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
|
||||
|
||||
subpass_attachment_count +=
|
||||
desc->inputAttachmentCount +
|
||||
desc->colorAttachmentCount +
|
||||
/* Count colorAttachmentCount again for resolve_attachments */
|
||||
desc->colorAttachmentCount;
|
||||
}
|
||||
|
||||
pass->subpass_attachments =
|
||||
anv_alloc2(&device->alloc, pAllocator,
|
||||
subpass_attachment_count * sizeof(uint32_t), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (pass->subpass_attachments == NULL) {
|
||||
anv_free2(&device->alloc, pAllocator, pass);
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
p = pass->subpass_attachments;
|
||||
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
|
||||
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
|
||||
struct anv_subpass *subpass = &pass->subpasses[i];
|
||||
|
||||
subpass->input_count = desc->inputAttachmentCount;
|
||||
subpass->color_count = desc->colorAttachmentCount;
|
||||
|
||||
if (desc->inputAttachmentCount > 0) {
|
||||
subpass->input_attachments = p;
|
||||
p += desc->inputAttachmentCount;
|
||||
|
||||
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
|
||||
subpass->input_attachments[j]
|
||||
= desc->pInputAttachments[j].attachment;
|
||||
}
|
||||
}
|
||||
|
||||
if (desc->colorAttachmentCount > 0) {
|
||||
subpass->color_attachments = p;
|
||||
p += desc->colorAttachmentCount;
|
||||
|
||||
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
|
||||
subpass->color_attachments[j]
|
||||
= desc->pColorAttachments[j].attachment;
|
||||
}
|
||||
}
|
||||
|
||||
subpass->has_resolve = false;
|
||||
if (desc->pResolveAttachments) {
|
||||
subpass->resolve_attachments = p;
|
||||
p += desc->colorAttachmentCount;
|
||||
|
||||
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
|
||||
uint32_t a = desc->pResolveAttachments[j].attachment;
|
||||
subpass->resolve_attachments[j] = a;
|
||||
if (a != VK_ATTACHMENT_UNUSED)
|
||||
subpass->has_resolve = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (desc->pDepthStencilAttachment) {
|
||||
subpass->depth_stencil_attachment =
|
||||
desc->pDepthStencilAttachment->attachment;
|
||||
} else {
|
||||
subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED;
|
||||
}
|
||||
}
|
||||
|
||||
*pRenderPass = anv_render_pass_to_handle(pass);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_DestroyRenderPass(
|
||||
VkDevice _device,
|
||||
VkRenderPass _pass,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_render_pass, pass, _pass);
|
||||
|
||||
anv_free2(&device->alloc, pAllocator, pass->subpass_attachments);
|
||||
anv_free2(&device->alloc, pAllocator, pass);
|
||||
}
|
||||
|
||||
void anv_GetRenderAreaGranularity(
|
||||
VkDevice device,
|
||||
VkRenderPass renderPass,
|
||||
VkExtent2D* pGranularity)
|
||||
{
|
||||
*pGranularity = (VkExtent2D) { 1, 1 };
|
||||
}
|
||||
1370
src/intel/vulkan/anv_pipeline.c
Normal file
1370
src/intel/vulkan/anv_pipeline.c
Normal file
File diff suppressed because it is too large
Load diff
518
src/intel/vulkan/anv_pipeline_cache.c
Normal file
518
src/intel/vulkan/anv_pipeline_cache.c
Normal file
|
|
@ -0,0 +1,518 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "util/debug.h"
|
||||
#include "anv_private.h"
|
||||
|
||||
/* Remaining work:
|
||||
*
|
||||
* - Compact binding table layout so it's tight and not dependent on
|
||||
* descriptor set layout.
|
||||
*
|
||||
* - Review prog_data struct for size and cacheability: struct
|
||||
* brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
|
||||
* bit quantities etc; param, pull_param, and image_params are pointers, we
|
||||
* just need the compation map. use bit fields for all bools, eg
|
||||
* dual_src_blend.
|
||||
*/
|
||||
|
||||
void
|
||||
anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
|
||||
struct anv_device *device)
|
||||
{
|
||||
cache->device = device;
|
||||
anv_state_stream_init(&cache->program_stream,
|
||||
&device->instruction_block_pool);
|
||||
pthread_mutex_init(&cache->mutex, NULL);
|
||||
|
||||
cache->kernel_count = 0;
|
||||
cache->total_size = 0;
|
||||
cache->table_size = 1024;
|
||||
const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
|
||||
cache->hash_table = malloc(byte_size);
|
||||
|
||||
/* We don't consider allocation failure fatal, we just start with a 0-sized
|
||||
* cache. */
|
||||
if (cache->hash_table == NULL ||
|
||||
!env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
|
||||
cache->table_size = 0;
|
||||
else
|
||||
memset(cache->hash_table, 0xff, byte_size);
|
||||
}
|
||||
|
||||
void
|
||||
anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
|
||||
{
|
||||
anv_state_stream_finish(&cache->program_stream);
|
||||
pthread_mutex_destroy(&cache->mutex);
|
||||
free(cache->hash_table);
|
||||
}
|
||||
|
||||
struct cache_entry {
|
||||
unsigned char sha1[20];
|
||||
uint32_t prog_data_size;
|
||||
uint32_t kernel_size;
|
||||
uint32_t surface_count;
|
||||
uint32_t sampler_count;
|
||||
uint32_t image_count;
|
||||
|
||||
char prog_data[0];
|
||||
|
||||
/* kernel follows prog_data at next 64 byte aligned address */
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
entry_size(struct cache_entry *entry)
|
||||
{
|
||||
/* This returns the number of bytes needed to serialize an entry, which
|
||||
* doesn't include the alignment padding bytes.
|
||||
*/
|
||||
|
||||
const uint32_t map_size =
|
||||
entry->surface_count * sizeof(struct anv_pipeline_binding) +
|
||||
entry->sampler_count * sizeof(struct anv_pipeline_binding);
|
||||
|
||||
return sizeof(*entry) + entry->prog_data_size + map_size;
|
||||
}
|
||||
|
||||
void
|
||||
anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
|
||||
struct anv_shader_module *module,
|
||||
const char *entrypoint,
|
||||
const VkSpecializationInfo *spec_info)
|
||||
{
|
||||
struct mesa_sha1 *ctx;
|
||||
|
||||
ctx = _mesa_sha1_init();
|
||||
_mesa_sha1_update(ctx, key, key_size);
|
||||
_mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
|
||||
_mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
|
||||
/* hash in shader stage, pipeline layout? */
|
||||
if (spec_info) {
|
||||
_mesa_sha1_update(ctx, spec_info->pMapEntries,
|
||||
spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
|
||||
_mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
|
||||
}
|
||||
_mesa_sha1_final(ctx, hash);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache,
|
||||
const unsigned char *sha1,
|
||||
const struct brw_stage_prog_data **prog_data,
|
||||
struct anv_pipeline_bind_map *map)
|
||||
{
|
||||
const uint32_t mask = cache->table_size - 1;
|
||||
const uint32_t start = (*(uint32_t *) sha1);
|
||||
|
||||
for (uint32_t i = 0; i < cache->table_size; i++) {
|
||||
const uint32_t index = (start + i) & mask;
|
||||
const uint32_t offset = cache->hash_table[index];
|
||||
|
||||
if (offset == ~0)
|
||||
return NO_KERNEL;
|
||||
|
||||
struct cache_entry *entry =
|
||||
cache->program_stream.block_pool->map + offset;
|
||||
if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
|
||||
if (prog_data) {
|
||||
assert(map);
|
||||
void *p = entry->prog_data;
|
||||
*prog_data = p;
|
||||
p += entry->prog_data_size;
|
||||
map->surface_count = entry->surface_count;
|
||||
map->sampler_count = entry->sampler_count;
|
||||
map->image_count = entry->image_count;
|
||||
map->surface_to_descriptor = p;
|
||||
p += map->surface_count * sizeof(struct anv_pipeline_binding);
|
||||
map->sampler_to_descriptor = p;
|
||||
}
|
||||
|
||||
return offset + align_u32(entry_size(entry), 64);
|
||||
}
|
||||
}
|
||||
|
||||
unreachable("hash table should never be full");
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
|
||||
const unsigned char *sha1,
|
||||
const struct brw_stage_prog_data **prog_data,
|
||||
struct anv_pipeline_bind_map *map)
|
||||
{
|
||||
uint32_t kernel;
|
||||
|
||||
pthread_mutex_lock(&cache->mutex);
|
||||
|
||||
kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
|
||||
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
|
||||
return kernel;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
|
||||
struct cache_entry *entry, uint32_t entry_offset)
|
||||
{
|
||||
const uint32_t mask = cache->table_size - 1;
|
||||
const uint32_t start = (*(uint32_t *) entry->sha1);
|
||||
|
||||
/* We'll always be able to insert when we get here. */
|
||||
assert(cache->kernel_count < cache->table_size / 2);
|
||||
|
||||
for (uint32_t i = 0; i < cache->table_size; i++) {
|
||||
const uint32_t index = (start + i) & mask;
|
||||
if (cache->hash_table[index] == ~0) {
|
||||
cache->hash_table[index] = entry_offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cache->total_size += entry_size(entry) + entry->kernel_size;
|
||||
cache->kernel_count++;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
|
||||
{
|
||||
const uint32_t table_size = cache->table_size * 2;
|
||||
const uint32_t old_table_size = cache->table_size;
|
||||
const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
|
||||
uint32_t *table;
|
||||
uint32_t *old_table = cache->hash_table;
|
||||
|
||||
table = malloc(byte_size);
|
||||
if (table == NULL)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
cache->hash_table = table;
|
||||
cache->table_size = table_size;
|
||||
cache->kernel_count = 0;
|
||||
cache->total_size = 0;
|
||||
|
||||
memset(cache->hash_table, 0xff, byte_size);
|
||||
for (uint32_t i = 0; i < old_table_size; i++) {
|
||||
const uint32_t offset = old_table[i];
|
||||
if (offset == ~0)
|
||||
continue;
|
||||
|
||||
struct cache_entry *entry =
|
||||
cache->program_stream.block_pool->map + offset;
|
||||
anv_pipeline_cache_set_entry(cache, entry, offset);
|
||||
}
|
||||
|
||||
free(old_table);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
|
||||
struct cache_entry *entry, uint32_t entry_offset)
|
||||
{
|
||||
if (cache->kernel_count == cache->table_size / 2)
|
||||
anv_pipeline_cache_grow(cache);
|
||||
|
||||
/* Failing to grow that hash table isn't fatal, but may mean we don't
|
||||
* have enough space to add this new kernel. Only add it if there's room.
|
||||
*/
|
||||
if (cache->kernel_count < cache->table_size / 2)
|
||||
anv_pipeline_cache_set_entry(cache, entry, entry_offset);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
||||
const unsigned char *sha1,
|
||||
const void *kernel, size_t kernel_size,
|
||||
const struct brw_stage_prog_data **prog_data,
|
||||
size_t prog_data_size,
|
||||
struct anv_pipeline_bind_map *map)
|
||||
{
|
||||
pthread_mutex_lock(&cache->mutex);
|
||||
|
||||
/* Before uploading, check again that another thread didn't upload this
|
||||
* shader while we were compiling it.
|
||||
*/
|
||||
if (sha1) {
|
||||
uint32_t cached_kernel =
|
||||
anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
|
||||
if (cached_kernel != NO_KERNEL) {
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return cached_kernel;
|
||||
}
|
||||
}
|
||||
|
||||
struct cache_entry *entry;
|
||||
|
||||
const uint32_t map_size =
|
||||
map->surface_count * sizeof(struct anv_pipeline_binding) +
|
||||
map->sampler_count * sizeof(struct anv_pipeline_binding);
|
||||
|
||||
const uint32_t preamble_size =
|
||||
align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
|
||||
|
||||
const uint32_t size = preamble_size + kernel_size;
|
||||
|
||||
assert(size < cache->program_stream.block_pool->block_size);
|
||||
const struct anv_state state =
|
||||
anv_state_stream_alloc(&cache->program_stream, size, 64);
|
||||
|
||||
entry = state.map;
|
||||
entry->prog_data_size = prog_data_size;
|
||||
entry->surface_count = map->surface_count;
|
||||
entry->sampler_count = map->sampler_count;
|
||||
entry->image_count = map->image_count;
|
||||
entry->kernel_size = kernel_size;
|
||||
|
||||
void *p = entry->prog_data;
|
||||
memcpy(p, *prog_data, prog_data_size);
|
||||
p += prog_data_size;
|
||||
|
||||
memcpy(p, map->surface_to_descriptor,
|
||||
map->surface_count * sizeof(struct anv_pipeline_binding));
|
||||
map->surface_to_descriptor = p;
|
||||
p += map->surface_count * sizeof(struct anv_pipeline_binding);
|
||||
|
||||
memcpy(p, map->sampler_to_descriptor,
|
||||
map->sampler_count * sizeof(struct anv_pipeline_binding));
|
||||
map->sampler_to_descriptor = p;
|
||||
|
||||
if (sha1) {
|
||||
assert(anv_pipeline_cache_search_unlocked(cache, sha1,
|
||||
NULL, NULL) == NO_KERNEL);
|
||||
|
||||
memcpy(entry->sha1, sha1, sizeof(entry->sha1));
|
||||
anv_pipeline_cache_add_entry(cache, entry, state.offset);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
|
||||
memcpy(state.map + preamble_size, kernel, kernel_size);
|
||||
|
||||
if (!cache->device->info.has_llc)
|
||||
anv_state_clflush(state);
|
||||
|
||||
*prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
|
||||
|
||||
return state.offset + preamble_size;
|
||||
}
|
||||
|
||||
struct cache_header {
|
||||
uint32_t header_size;
|
||||
uint32_t header_version;
|
||||
uint32_t vendor_id;
|
||||
uint32_t device_id;
|
||||
uint8_t uuid[VK_UUID_SIZE];
|
||||
};
|
||||
|
||||
static void
|
||||
anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
|
||||
const void *data, size_t size)
|
||||
{
|
||||
struct anv_device *device = cache->device;
|
||||
struct cache_header header;
|
||||
uint8_t uuid[VK_UUID_SIZE];
|
||||
|
||||
if (size < sizeof(header))
|
||||
return;
|
||||
memcpy(&header, data, sizeof(header));
|
||||
if (header.header_size < sizeof(header))
|
||||
return;
|
||||
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
|
||||
return;
|
||||
if (header.vendor_id != 0x8086)
|
||||
return;
|
||||
if (header.device_id != device->chipset_id)
|
||||
return;
|
||||
anv_device_get_cache_uuid(uuid);
|
||||
if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
|
||||
return;
|
||||
|
||||
void *end = (void *) data + size;
|
||||
void *p = (void *) data + header.header_size;
|
||||
|
||||
while (p < end) {
|
||||
struct cache_entry *entry = p;
|
||||
|
||||
void *data = entry->prog_data;
|
||||
const struct brw_stage_prog_data *prog_data = data;
|
||||
data += entry->prog_data_size;
|
||||
|
||||
struct anv_pipeline_binding *surface_to_descriptor = data;
|
||||
data += entry->surface_count * sizeof(struct anv_pipeline_binding);
|
||||
struct anv_pipeline_binding *sampler_to_descriptor = data;
|
||||
data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
|
||||
void *kernel = data;
|
||||
|
||||
struct anv_pipeline_bind_map map = {
|
||||
.surface_count = entry->surface_count,
|
||||
.sampler_count = entry->sampler_count,
|
||||
.image_count = entry->image_count,
|
||||
.surface_to_descriptor = surface_to_descriptor,
|
||||
.sampler_to_descriptor = sampler_to_descriptor
|
||||
};
|
||||
|
||||
anv_pipeline_cache_upload_kernel(cache, entry->sha1,
|
||||
kernel, entry->kernel_size,
|
||||
&prog_data,
|
||||
entry->prog_data_size, &map);
|
||||
p = kernel + entry->kernel_size;
|
||||
}
|
||||
}
|
||||
|
||||
VkResult anv_CreatePipelineCache(
|
||||
VkDevice _device,
|
||||
const VkPipelineCacheCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkPipelineCache* pPipelineCache)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_pipeline_cache *cache;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
|
||||
assert(pCreateInfo->flags == 0);
|
||||
|
||||
cache = anv_alloc2(&device->alloc, pAllocator,
|
||||
sizeof(*cache), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (cache == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
anv_pipeline_cache_init(cache, device);
|
||||
|
||||
if (pCreateInfo->initialDataSize > 0)
|
||||
anv_pipeline_cache_load(cache,
|
||||
pCreateInfo->pInitialData,
|
||||
pCreateInfo->initialDataSize);
|
||||
|
||||
*pPipelineCache = anv_pipeline_cache_to_handle(cache);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_DestroyPipelineCache(
|
||||
VkDevice _device,
|
||||
VkPipelineCache _cache,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
|
||||
|
||||
anv_pipeline_cache_finish(cache);
|
||||
|
||||
anv_free2(&device->alloc, pAllocator, cache);
|
||||
}
|
||||
|
||||
VkResult anv_GetPipelineCacheData(
|
||||
VkDevice _device,
|
||||
VkPipelineCache _cache,
|
||||
size_t* pDataSize,
|
||||
void* pData)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
|
||||
struct cache_header *header;
|
||||
|
||||
const size_t size = sizeof(*header) + cache->total_size;
|
||||
|
||||
if (pData == NULL) {
|
||||
*pDataSize = size;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
if (*pDataSize < sizeof(*header)) {
|
||||
*pDataSize = 0;
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
void *p = pData, *end = pData + *pDataSize;
|
||||
header = p;
|
||||
header->header_size = sizeof(*header);
|
||||
header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
|
||||
header->vendor_id = 0x8086;
|
||||
header->device_id = device->chipset_id;
|
||||
anv_device_get_cache_uuid(header->uuid);
|
||||
p += header->header_size;
|
||||
|
||||
struct cache_entry *entry;
|
||||
for (uint32_t i = 0; i < cache->table_size; i++) {
|
||||
if (cache->hash_table[i] == ~0)
|
||||
continue;
|
||||
|
||||
entry = cache->program_stream.block_pool->map + cache->hash_table[i];
|
||||
const uint32_t size = entry_size(entry);
|
||||
if (end < p + size + entry->kernel_size)
|
||||
break;
|
||||
|
||||
memcpy(p, entry, size);
|
||||
p += size;
|
||||
|
||||
void *kernel = (void *) entry + align_u32(size, 64);
|
||||
|
||||
memcpy(p, kernel, entry->kernel_size);
|
||||
p += entry->kernel_size;
|
||||
}
|
||||
|
||||
*pDataSize = p - pData;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
|
||||
struct anv_pipeline_cache *src)
|
||||
{
|
||||
for (uint32_t i = 0; i < src->table_size; i++) {
|
||||
const uint32_t offset = src->hash_table[i];
|
||||
if (offset == ~0)
|
||||
continue;
|
||||
|
||||
struct cache_entry *entry =
|
||||
src->program_stream.block_pool->map + offset;
|
||||
|
||||
if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
|
||||
continue;
|
||||
|
||||
anv_pipeline_cache_add_entry(dst, entry, offset);
|
||||
}
|
||||
}
|
||||
|
||||
VkResult anv_MergePipelineCaches(
|
||||
VkDevice _device,
|
||||
VkPipelineCache destCache,
|
||||
uint32_t srcCacheCount,
|
||||
const VkPipelineCache* pSrcCaches)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
|
||||
|
||||
for (uint32_t i = 0; i < srcCacheCount; i++) {
|
||||
ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
|
||||
|
||||
anv_pipeline_cache_merge(dst, src);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
1891
src/intel/vulkan/anv_private.h
Normal file
1891
src/intel/vulkan/anv_private.h
Normal file
File diff suppressed because it is too large
Load diff
187
src/intel/vulkan/anv_query.c
Normal file
187
src/intel/vulkan/anv_query.c
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
VkResult anv_CreateQueryPool(
|
||||
VkDevice _device,
|
||||
const VkQueryPoolCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkQueryPool* pQueryPool)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_query_pool *pool;
|
||||
VkResult result;
|
||||
uint32_t slot_size;
|
||||
uint64_t size;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
|
||||
|
||||
switch (pCreateInfo->queryType) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
break;
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||
return VK_ERROR_INCOMPATIBLE_DRIVER;
|
||||
default:
|
||||
assert(!"Invalid query type");
|
||||
}
|
||||
|
||||
slot_size = sizeof(struct anv_query_pool_slot);
|
||||
pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (pool == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pool->type = pCreateInfo->queryType;
|
||||
pool->slots = pCreateInfo->queryCount;
|
||||
|
||||
size = pCreateInfo->queryCount * slot_size;
|
||||
result = anv_bo_init_new(&pool->bo, device, size);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);
|
||||
|
||||
*pQueryPool = anv_query_pool_to_handle(pool);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
anv_free2(&device->alloc, pAllocator, pool);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void anv_DestroyQueryPool(
|
||||
VkDevice _device,
|
||||
VkQueryPool _pool,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
|
||||
|
||||
anv_gem_munmap(pool->bo.map, pool->bo.size);
|
||||
anv_gem_close(device, pool->bo.gem_handle);
|
||||
anv_free2(&device->alloc, pAllocator, pool);
|
||||
}
|
||||
|
||||
VkResult anv_GetQueryPoolResults(
|
||||
VkDevice _device,
|
||||
VkQueryPool queryPool,
|
||||
uint32_t firstQuery,
|
||||
uint32_t queryCount,
|
||||
size_t dataSize,
|
||||
void* pData,
|
||||
VkDeviceSize stride,
|
||||
VkQueryResultFlags flags)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
|
||||
int64_t timeout = INT64_MAX;
|
||||
uint64_t result;
|
||||
int ret;
|
||||
|
||||
assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
|
||||
pool->type == VK_QUERY_TYPE_TIMESTAMP);
|
||||
|
||||
if (pData == NULL)
|
||||
return VK_SUCCESS;
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout);
|
||||
if (ret == -1) {
|
||||
/* We don't know the real error. */
|
||||
return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"gem_wait failed %m");
|
||||
}
|
||||
}
|
||||
|
||||
void *data_end = pData + dataSize;
|
||||
struct anv_query_pool_slot *slot = pool->bo.map;
|
||||
|
||||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION: {
|
||||
result = slot[firstQuery + i].end - slot[firstQuery + i].begin;
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||
unreachable("pipeline stats not supported");
|
||||
case VK_QUERY_TYPE_TIMESTAMP: {
|
||||
result = slot[firstQuery + i].begin;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("invalid pool type");
|
||||
}
|
||||
|
||||
if (flags & VK_QUERY_RESULT_64_BIT) {
|
||||
uint64_t *dst = pData;
|
||||
dst[0] = result;
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||
dst[1] = slot[firstQuery + i].available;
|
||||
} else {
|
||||
uint32_t *dst = pData;
|
||||
if (result > UINT32_MAX)
|
||||
result = UINT32_MAX;
|
||||
dst[0] = result;
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||
dst[1] = slot[firstQuery + i].available;
|
||||
}
|
||||
|
||||
pData += stride;
|
||||
if (pData >= data_end)
|
||||
break;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_CmdResetQueryPool(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkQueryPool queryPool,
|
||||
uint32_t firstQuery,
|
||||
uint32_t queryCount)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
|
||||
|
||||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_TIMESTAMP: {
|
||||
struct anv_query_pool_slot *slot = pool->bo.map;
|
||||
slot[firstQuery + i].available = 0;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(!"Invalid query type");
|
||||
}
|
||||
}
|
||||
}
|
||||
202
src/intel/vulkan/anv_util.c
Normal file
202
src/intel/vulkan/anv_util.c
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
/** Log an error message. */
|
||||
void anv_printflike(1, 2)
|
||||
anv_loge(const char *format, ...)
|
||||
{
|
||||
va_list va;
|
||||
|
||||
va_start(va, format);
|
||||
anv_loge_v(format, va);
|
||||
va_end(va);
|
||||
}
|
||||
|
||||
/** \see anv_loge() */
|
||||
void
|
||||
anv_loge_v(const char *format, va_list va)
|
||||
{
|
||||
fprintf(stderr, "vk: error: ");
|
||||
vfprintf(stderr, format, va);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
void anv_printflike(3, 4)
|
||||
__anv_finishme(const char *file, int line, const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char buffer[256];
|
||||
|
||||
va_start(ap, format);
|
||||
vsnprintf(buffer, sizeof(buffer), format, ap);
|
||||
va_end(ap);
|
||||
|
||||
fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
|
||||
}
|
||||
|
||||
void anv_noreturn anv_printflike(1, 2)
|
||||
anv_abortf(const char *format, ...)
|
||||
{
|
||||
va_list va;
|
||||
|
||||
va_start(va, format);
|
||||
anv_abortfv(format, va);
|
||||
va_end(va);
|
||||
}
|
||||
|
||||
void anv_noreturn
|
||||
anv_abortfv(const char *format, va_list va)
|
||||
{
|
||||
fprintf(stderr, "vk: error: ");
|
||||
vfprintf(stderr, format, va);
|
||||
fprintf(stderr, "\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
VkResult
|
||||
__vk_errorf(VkResult error, const char *file, int line, const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
char buffer[256];
|
||||
|
||||
#define ERROR_CASE(error) case error: error_str = #error; break;
|
||||
|
||||
const char *error_str;
|
||||
switch ((int32_t)error) {
|
||||
|
||||
/* Core errors */
|
||||
ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY)
|
||||
ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY)
|
||||
ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED)
|
||||
ERROR_CASE(VK_ERROR_DEVICE_LOST)
|
||||
ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED)
|
||||
ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT)
|
||||
ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT)
|
||||
ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER)
|
||||
|
||||
/* Extension errors */
|
||||
ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR)
|
||||
|
||||
default:
|
||||
assert(!"Unknown error");
|
||||
error_str = "unknown error";
|
||||
}
|
||||
|
||||
#undef ERROR_CASE
|
||||
|
||||
if (format) {
|
||||
va_start(ap, format);
|
||||
vsnprintf(buffer, sizeof(buffer), format, ap);
|
||||
va_end(ap);
|
||||
|
||||
fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str);
|
||||
} else {
|
||||
fprintf(stderr, "%s:%d: %s\n", file, line, error_str);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int
|
||||
anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size)
|
||||
{
|
||||
assert(util_is_power_of_two(size));
|
||||
assert(element_size < size && util_is_power_of_two(element_size));
|
||||
|
||||
vector->head = 0;
|
||||
vector->tail = 0;
|
||||
vector->element_size = element_size;
|
||||
vector->size = size;
|
||||
vector->data = malloc(size);
|
||||
|
||||
return vector->data != NULL;
|
||||
}
|
||||
|
||||
void *
|
||||
anv_vector_add(struct anv_vector *vector)
|
||||
{
|
||||
uint32_t offset, size, split, src_tail, dst_tail;
|
||||
void *data;
|
||||
|
||||
if (vector->head - vector->tail == vector->size) {
|
||||
size = vector->size * 2;
|
||||
data = malloc(size);
|
||||
if (data == NULL)
|
||||
return NULL;
|
||||
src_tail = vector->tail & (vector->size - 1);
|
||||
dst_tail = vector->tail & (size - 1);
|
||||
if (src_tail == 0) {
|
||||
/* Since we know that the vector is full, this means that it's
|
||||
* linear from start to end so we can do one copy.
|
||||
*/
|
||||
memcpy(data + dst_tail, vector->data, vector->size);
|
||||
} else {
|
||||
/* In this case, the vector is split into two pieces and we have
|
||||
* to do two copies. We have to be careful to make sure each
|
||||
* piece goes to the right locations. Thanks to the change in
|
||||
* size, it may or may not still wrap around.
|
||||
*/
|
||||
split = align_u32(vector->tail, vector->size);
|
||||
assert(vector->tail <= split && split < vector->head);
|
||||
memcpy(data + dst_tail, vector->data + src_tail,
|
||||
split - vector->tail);
|
||||
memcpy(data + (split & (size - 1)), vector->data,
|
||||
vector->head - split);
|
||||
}
|
||||
free(vector->data);
|
||||
vector->data = data;
|
||||
vector->size = size;
|
||||
}
|
||||
|
||||
assert(vector->head - vector->tail < vector->size);
|
||||
|
||||
offset = vector->head & (vector->size - 1);
|
||||
vector->head += vector->element_size;
|
||||
|
||||
return vector->data + offset;
|
||||
}
|
||||
|
||||
void *
|
||||
anv_vector_remove(struct anv_vector *vector)
|
||||
{
|
||||
uint32_t offset;
|
||||
|
||||
if (vector->head == vector->tail)
|
||||
return NULL;
|
||||
|
||||
assert(vector->head - vector->tail <= vector->size);
|
||||
|
||||
offset = vector->tail & (vector->size - 1);
|
||||
vector->tail += vector->element_size;
|
||||
|
||||
return vector->data + offset;
|
||||
}
|
||||
234
src/intel/vulkan/anv_wsi.c
Normal file
234
src/intel/vulkan/anv_wsi.c
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_wsi.h"
|
||||
|
||||
VkResult
|
||||
anv_init_wsi(struct anv_instance *instance)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
result = anv_x11_init_wsi(instance);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
#ifdef HAVE_WAYLAND_PLATFORM
|
||||
result = anv_wl_init_wsi(instance);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_x11_finish_wsi(instance);
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
anv_finish_wsi(struct anv_instance *instance)
|
||||
{
|
||||
#ifdef HAVE_WAYLAND_PLATFORM
|
||||
anv_wl_finish_wsi(instance);
|
||||
#endif
|
||||
anv_x11_finish_wsi(instance);
|
||||
}
|
||||
|
||||
void anv_DestroySurfaceKHR(
|
||||
VkInstance _instance,
|
||||
VkSurfaceKHR _surface,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_instance, instance, _instance);
|
||||
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
|
||||
anv_free2(&instance->alloc, pAllocator, surface);
|
||||
}
|
||||
|
||||
VkResult anv_GetPhysicalDeviceSurfaceSupportKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
uint32_t queueFamilyIndex,
|
||||
VkSurfaceKHR _surface,
|
||||
VkBool32* pSupported)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
|
||||
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
|
||||
|
||||
return iface->get_support(surface, device, queueFamilyIndex, pSupported);
|
||||
}
|
||||
|
||||
VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkSurfaceKHR _surface,
|
||||
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
|
||||
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
|
||||
|
||||
return iface->get_capabilities(surface, device, pSurfaceCapabilities);
|
||||
}
|
||||
|
||||
VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkSurfaceKHR _surface,
|
||||
uint32_t* pSurfaceFormatCount,
|
||||
VkSurfaceFormatKHR* pSurfaceFormats)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
|
||||
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
|
||||
|
||||
return iface->get_formats(surface, device, pSurfaceFormatCount,
|
||||
pSurfaceFormats);
|
||||
}
|
||||
|
||||
VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkSurfaceKHR _surface,
|
||||
uint32_t* pPresentModeCount,
|
||||
VkPresentModeKHR* pPresentModes)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
|
||||
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
|
||||
|
||||
return iface->get_present_modes(surface, device, pPresentModeCount,
|
||||
pPresentModes);
|
||||
}
|
||||
|
||||
VkResult anv_CreateSwapchainKHR(
|
||||
VkDevice _device,
|
||||
const VkSwapchainCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSwapchainKHR* pSwapchain)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface);
|
||||
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
|
||||
struct anv_swapchain *swapchain;
|
||||
|
||||
VkResult result = iface->create_swapchain(surface, device, pCreateInfo,
|
||||
pAllocator, &swapchain);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (pAllocator)
|
||||
swapchain->alloc = *pAllocator;
|
||||
else
|
||||
swapchain->alloc = device->alloc;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++)
|
||||
swapchain->fences[i] = VK_NULL_HANDLE;
|
||||
|
||||
*pSwapchain = anv_swapchain_to_handle(swapchain);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void anv_DestroySwapchainKHR(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR _swapchain,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) {
|
||||
if (swapchain->fences[i] != VK_NULL_HANDLE)
|
||||
anv_DestroyFence(device, swapchain->fences[i], pAllocator);
|
||||
}
|
||||
|
||||
swapchain->destroy(swapchain, pAllocator);
|
||||
}
|
||||
|
||||
VkResult anv_GetSwapchainImagesKHR(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR _swapchain,
|
||||
uint32_t* pSwapchainImageCount,
|
||||
VkImage* pSwapchainImages)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain);
|
||||
|
||||
return swapchain->get_images(swapchain, pSwapchainImageCount,
|
||||
pSwapchainImages);
|
||||
}
|
||||
|
||||
VkResult anv_AcquireNextImageKHR(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR _swapchain,
|
||||
uint64_t timeout,
|
||||
VkSemaphore semaphore,
|
||||
VkFence fence,
|
||||
uint32_t* pImageIndex)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain);
|
||||
|
||||
return swapchain->acquire_next_image(swapchain, timeout, semaphore,
|
||||
pImageIndex);
|
||||
}
|
||||
|
||||
VkResult anv_QueuePresentKHR(
|
||||
VkQueue _queue,
|
||||
const VkPresentInfoKHR* pPresentInfo)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_queue, queue, _queue);
|
||||
VkResult result;
|
||||
|
||||
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
|
||||
ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
|
||||
|
||||
assert(swapchain->device == queue->device);
|
||||
|
||||
if (swapchain->fences[0] == VK_NULL_HANDLE) {
|
||||
result = anv_CreateFence(anv_device_to_handle(queue->device),
|
||||
&(VkFenceCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
.flags = 0,
|
||||
}, &swapchain->alloc, &swapchain->fences[0]);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
} else {
|
||||
anv_ResetFences(anv_device_to_handle(queue->device),
|
||||
1, &swapchain->fences[0]);
|
||||
}
|
||||
|
||||
anv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]);
|
||||
|
||||
result = swapchain->queue_present(swapchain, queue,
|
||||
pPresentInfo->pImageIndices[i]);
|
||||
/* TODO: What if one of them returns OUT_OF_DATE? */
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
VkFence last = swapchain->fences[2];
|
||||
swapchain->fences[2] = swapchain->fences[1];
|
||||
swapchain->fences[1] = swapchain->fences[0];
|
||||
swapchain->fences[0] = last;
|
||||
|
||||
if (last != VK_NULL_HANDLE) {
|
||||
anv_WaitForFences(anv_device_to_handle(queue->device),
|
||||
1, &last, true, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
78
src/intel/vulkan/anv_wsi.h
Normal file
78
src/intel/vulkan/anv_wsi.h
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
struct anv_swapchain;
|
||||
|
||||
struct anv_wsi_interface {
|
||||
VkResult (*get_support)(VkIcdSurfaceBase *surface,
|
||||
struct anv_physical_device *device,
|
||||
uint32_t queueFamilyIndex,
|
||||
VkBool32* pSupported);
|
||||
VkResult (*get_capabilities)(VkIcdSurfaceBase *surface,
|
||||
struct anv_physical_device *device,
|
||||
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities);
|
||||
VkResult (*get_formats)(VkIcdSurfaceBase *surface,
|
||||
struct anv_physical_device *device,
|
||||
uint32_t* pSurfaceFormatCount,
|
||||
VkSurfaceFormatKHR* pSurfaceFormats);
|
||||
VkResult (*get_present_modes)(VkIcdSurfaceBase *surface,
|
||||
struct anv_physical_device *device,
|
||||
uint32_t* pPresentModeCount,
|
||||
VkPresentModeKHR* pPresentModes);
|
||||
VkResult (*create_swapchain)(VkIcdSurfaceBase *surface,
|
||||
struct anv_device *device,
|
||||
const VkSwapchainCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
struct anv_swapchain **swapchain);
|
||||
};
|
||||
|
||||
struct anv_swapchain {
|
||||
struct anv_device *device;
|
||||
|
||||
VkAllocationCallbacks alloc;
|
||||
|
||||
VkFence fences[3];
|
||||
|
||||
VkResult (*destroy)(struct anv_swapchain *swapchain,
|
||||
const VkAllocationCallbacks *pAllocator);
|
||||
VkResult (*get_images)(struct anv_swapchain *swapchain,
|
||||
uint32_t *pCount, VkImage *pSwapchainImages);
|
||||
VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain,
|
||||
uint64_t timeout, VkSemaphore semaphore,
|
||||
uint32_t *image_index);
|
||||
VkResult (*queue_present)(struct anv_swapchain *swap_chain,
|
||||
struct anv_queue *queue,
|
||||
uint32_t image_index);
|
||||
};
|
||||
|
||||
ANV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR)
|
||||
ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR)
|
||||
|
||||
VkResult anv_x11_init_wsi(struct anv_instance *instance);
|
||||
void anv_x11_finish_wsi(struct anv_instance *instance);
|
||||
VkResult anv_wl_init_wsi(struct anv_instance *instance);
|
||||
void anv_wl_finish_wsi(struct anv_instance *instance);
|
||||
871
src/intel/vulkan/anv_wsi_wayland.c
Normal file
871
src/intel/vulkan/anv_wsi_wayland.c
Normal file
|
|
@ -0,0 +1,871 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <wayland-client.h>
|
||||
#include <wayland-drm-client-protocol.h>
|
||||
|
||||
#include "anv_wsi.h"
|
||||
|
||||
#include <util/hash_table.h>
|
||||
|
||||
#define MIN_NUM_IMAGES 2
|
||||
|
||||
struct wsi_wl_display {
|
||||
struct wl_display * display;
|
||||
struct wl_drm * drm;
|
||||
|
||||
/* Vector of VkFormats supported */
|
||||
struct anv_vector formats;
|
||||
|
||||
uint32_t capabilities;
|
||||
};
|
||||
|
||||
struct wsi_wayland {
|
||||
struct anv_wsi_interface base;
|
||||
|
||||
struct anv_instance * instance;
|
||||
|
||||
pthread_mutex_t mutex;
|
||||
/* Hash table of wl_display -> wsi_wl_display mappings */
|
||||
struct hash_table * displays;
|
||||
};
|
||||
|
||||
static void
|
||||
wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format)
|
||||
{
|
||||
/* Don't add a format that's already in the list */
|
||||
VkFormat *f;
|
||||
anv_vector_foreach(f, &display->formats)
|
||||
if (*f == format)
|
||||
return;
|
||||
|
||||
/* Don't add formats which aren't supported by the driver */
|
||||
if (anv_format_for_vk_format(format)->isl_format ==
|
||||
ISL_FORMAT_UNSUPPORTED) {
|
||||
return;
|
||||
}
|
||||
|
||||
f = anv_vector_add(&display->formats);
|
||||
if (f)
|
||||
*f = format;
|
||||
}
|
||||
|
||||
static void
|
||||
drm_handle_device(void *data, struct wl_drm *drm, const char *name)
|
||||
{
|
||||
fprintf(stderr, "wl_drm.device(%s)\n", name);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha)
|
||||
{
|
||||
switch (vk_format) {
|
||||
/* TODO: Figure out what all the formats mean and make this table
|
||||
* correct.
|
||||
*/
|
||||
#if 0
|
||||
case VK_FORMAT_R4G4B4A4_UNORM:
|
||||
return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444;
|
||||
case VK_FORMAT_R5G6B5_UNORM:
|
||||
return WL_DRM_FORMAT_BGR565;
|
||||
case VK_FORMAT_R5G5B5A1_UNORM:
|
||||
return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555;
|
||||
case VK_FORMAT_R8G8B8_UNORM:
|
||||
return WL_DRM_FORMAT_XBGR8888;
|
||||
case VK_FORMAT_R8G8B8A8_UNORM:
|
||||
return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888;
|
||||
case VK_FORMAT_R10G10B10A2_UNORM:
|
||||
return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010;
|
||||
case VK_FORMAT_B4G4R4A4_UNORM:
|
||||
return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444;
|
||||
case VK_FORMAT_B5G6R5_UNORM:
|
||||
return WL_DRM_FORMAT_RGB565;
|
||||
case VK_FORMAT_B5G5R5A1_UNORM:
|
||||
return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555;
|
||||
#endif
|
||||
case VK_FORMAT_B8G8R8_SRGB:
|
||||
return WL_DRM_FORMAT_BGRX8888;
|
||||
case VK_FORMAT_B8G8R8A8_SRGB:
|
||||
return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888;
|
||||
#if 0
|
||||
case VK_FORMAT_B10G10R10A2_UNORM:
|
||||
return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010;
|
||||
#endif
|
||||
|
||||
default:
|
||||
assert("!Unsupported Vulkan format");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format)
|
||||
{
|
||||
struct wsi_wl_display *display = data;
|
||||
|
||||
switch (wl_format) {
|
||||
#if 0
|
||||
case WL_DRM_FORMAT_ABGR4444:
|
||||
case WL_DRM_FORMAT_XBGR4444:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM);
|
||||
break;
|
||||
case WL_DRM_FORMAT_BGR565:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM);
|
||||
break;
|
||||
case WL_DRM_FORMAT_ABGR1555:
|
||||
case WL_DRM_FORMAT_XBGR1555:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM);
|
||||
break;
|
||||
case WL_DRM_FORMAT_XBGR8888:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM);
|
||||
/* fallthrough */
|
||||
case WL_DRM_FORMAT_ABGR8888:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM);
|
||||
break;
|
||||
case WL_DRM_FORMAT_ABGR2101010:
|
||||
case WL_DRM_FORMAT_XBGR2101010:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM);
|
||||
break;
|
||||
case WL_DRM_FORMAT_ARGB4444:
|
||||
case WL_DRM_FORMAT_XRGB4444:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM);
|
||||
break;
|
||||
case WL_DRM_FORMAT_RGB565:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM);
|
||||
break;
|
||||
case WL_DRM_FORMAT_ARGB1555:
|
||||
case WL_DRM_FORMAT_XRGB1555:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM);
|
||||
break;
|
||||
#endif
|
||||
case WL_DRM_FORMAT_XRGB8888:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB);
|
||||
/* fallthrough */
|
||||
case WL_DRM_FORMAT_ARGB8888:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB);
|
||||
break;
|
||||
#if 0
|
||||
case WL_DRM_FORMAT_ARGB2101010:
|
||||
case WL_DRM_FORMAT_XRGB2101010:
|
||||
wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
drm_handle_authenticated(void *data, struct wl_drm *drm)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities)
|
||||
{
|
||||
struct wsi_wl_display *display = data;
|
||||
|
||||
display->capabilities = capabilities;
|
||||
}
|
||||
|
||||
static const struct wl_drm_listener drm_listener = {
|
||||
drm_handle_device,
|
||||
drm_handle_format,
|
||||
drm_handle_authenticated,
|
||||
drm_handle_capabilities,
|
||||
};
|
||||
|
||||
static void
|
||||
registry_handle_global(void *data, struct wl_registry *registry,
|
||||
uint32_t name, const char *interface, uint32_t version)
|
||||
{
|
||||
struct wsi_wl_display *display = data;
|
||||
|
||||
if (strcmp(interface, "wl_drm") == 0) {
|
||||
assert(display->drm == NULL);
|
||||
|
||||
assert(version >= 2);
|
||||
display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2);
|
||||
|
||||
if (display->drm)
|
||||
wl_drm_add_listener(display->drm, &drm_listener, display);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
registry_handle_global_remove(void *data, struct wl_registry *registry,
|
||||
uint32_t name)
|
||||
{ /* No-op */ }
|
||||
|
||||
static const struct wl_registry_listener registry_listener = {
|
||||
registry_handle_global,
|
||||
registry_handle_global_remove
|
||||
};
|
||||
|
||||
static void
|
||||
wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display)
|
||||
{
|
||||
anv_vector_finish(&display->formats);
|
||||
if (display->drm)
|
||||
wl_drm_destroy(display->drm);
|
||||
anv_free(&wsi->instance->alloc, display);
|
||||
}
|
||||
|
||||
static struct wsi_wl_display *
|
||||
wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display)
|
||||
{
|
||||
struct wsi_wl_display *display =
|
||||
anv_alloc(&wsi->instance->alloc, sizeof(*display), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
||||
if (!display)
|
||||
return NULL;
|
||||
|
||||
memset(display, 0, sizeof(*display));
|
||||
|
||||
display->display = wl_display;
|
||||
|
||||
if (!anv_vector_init(&display->formats, sizeof(VkFormat), 8))
|
||||
goto fail;
|
||||
|
||||
struct wl_registry *registry = wl_display_get_registry(wl_display);
|
||||
if (!registry)
|
||||
return NULL;
|
||||
|
||||
wl_registry_add_listener(registry, ®istry_listener, display);
|
||||
|
||||
/* Round-rip to get the wl_drm global */
|
||||
wl_display_roundtrip(wl_display);
|
||||
|
||||
if (!display->drm)
|
||||
goto fail;
|
||||
|
||||
/* Round-rip to get wl_drm formats and capabilities */
|
||||
wl_display_roundtrip(wl_display);
|
||||
|
||||
/* We need prime support */
|
||||
if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME))
|
||||
goto fail;
|
||||
|
||||
/* We don't need this anymore */
|
||||
wl_registry_destroy(registry);
|
||||
|
||||
return display;
|
||||
|
||||
fail:
|
||||
if (registry)
|
||||
wl_registry_destroy(registry);
|
||||
|
||||
wsi_wl_display_destroy(wsi, display);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct wsi_wl_display *
|
||||
wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display)
|
||||
{
|
||||
struct wsi_wayland *wsi =
|
||||
(struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
|
||||
|
||||
pthread_mutex_lock(&wsi->mutex);
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(wsi->displays,
|
||||
wl_display);
|
||||
if (!entry) {
|
||||
/* We're about to make a bunch of blocking calls. Let's drop the
|
||||
* mutex for now so we don't block up too badly.
|
||||
*/
|
||||
pthread_mutex_unlock(&wsi->mutex);
|
||||
|
||||
struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display);
|
||||
|
||||
pthread_mutex_lock(&wsi->mutex);
|
||||
|
||||
entry = _mesa_hash_table_search(wsi->displays, wl_display);
|
||||
if (entry) {
|
||||
/* Oops, someone raced us to it */
|
||||
wsi_wl_display_destroy(wsi, display);
|
||||
} else {
|
||||
entry = _mesa_hash_table_insert(wsi->displays, wl_display, display);
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&wsi->mutex);
|
||||
|
||||
return entry->data;
|
||||
}
|
||||
|
||||
VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
uint32_t queueFamilyIndex,
|
||||
struct wl_display* display)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
|
||||
|
||||
return wsi_wl_get_display(physical_device->instance, display) != NULL;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
wsi_wl_surface_get_support(VkIcdSurfaceBase *surface,
|
||||
struct anv_physical_device *device,
|
||||
uint32_t queueFamilyIndex,
|
||||
VkBool32* pSupported)
|
||||
{
|
||||
*pSupported = true;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static const VkPresentModeKHR present_modes[] = {
|
||||
VK_PRESENT_MODE_MAILBOX_KHR,
|
||||
VK_PRESENT_MODE_FIFO_KHR,
|
||||
};
|
||||
|
||||
static VkResult
|
||||
wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface,
|
||||
struct anv_physical_device *device,
|
||||
VkSurfaceCapabilitiesKHR* caps)
|
||||
{
|
||||
caps->minImageCount = MIN_NUM_IMAGES;
|
||||
caps->maxImageCount = 4;
|
||||
caps->currentExtent = (VkExtent2D) { -1, -1 };
|
||||
caps->minImageExtent = (VkExtent2D) { 1, 1 };
|
||||
caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX };
|
||||
caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
|
||||
caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
|
||||
caps->maxImageArrayLayers = 1;
|
||||
|
||||
caps->supportedCompositeAlpha =
|
||||
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR |
|
||||
VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR;
|
||||
|
||||
caps->supportedUsageFlags =
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface,
|
||||
struct anv_physical_device *device,
|
||||
uint32_t* pSurfaceFormatCount,
|
||||
VkSurfaceFormatKHR* pSurfaceFormats)
|
||||
{
|
||||
VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
|
||||
struct wsi_wl_display *display =
|
||||
wsi_wl_get_display(device->instance, surface->display);
|
||||
|
||||
uint32_t count = anv_vector_length(&display->formats);
|
||||
|
||||
if (pSurfaceFormats == NULL) {
|
||||
*pSurfaceFormatCount = count;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
assert(*pSurfaceFormatCount >= count);
|
||||
*pSurfaceFormatCount = count;
|
||||
|
||||
VkFormat *f;
|
||||
anv_vector_foreach(f, &display->formats) {
|
||||
*(pSurfaceFormats++) = (VkSurfaceFormatKHR) {
|
||||
.format = *f,
|
||||
/* TODO: We should get this from the compositor somehow */
|
||||
.colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR,
|
||||
};
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface,
|
||||
struct anv_physical_device *device,
|
||||
uint32_t* pPresentModeCount,
|
||||
VkPresentModeKHR* pPresentModes)
|
||||
{
|
||||
if (pPresentModes == NULL) {
|
||||
*pPresentModeCount = ARRAY_SIZE(present_modes);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
assert(*pPresentModeCount >= ARRAY_SIZE(present_modes));
|
||||
typed_memcpy(pPresentModes, present_modes, *pPresentModeCount);
|
||||
*pPresentModeCount = ARRAY_SIZE(present_modes);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface,
|
||||
struct anv_device *device,
|
||||
const VkSwapchainCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
struct anv_swapchain **swapchain);
|
||||
|
||||
VkResult anv_CreateWaylandSurfaceKHR(
|
||||
VkInstance _instance,
|
||||
const VkWaylandSurfaceCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSurfaceKHR* pSurface)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_instance, instance, _instance);
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR);
|
||||
|
||||
VkIcdSurfaceWayland *surface;
|
||||
|
||||
surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (surface == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND;
|
||||
surface->display = pCreateInfo->display;
|
||||
surface->surface = pCreateInfo->surface;
|
||||
|
||||
*pSurface = _VkIcdSurfaceBase_to_handle(&surface->base);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct wsi_wl_image {
|
||||
struct anv_image * image;
|
||||
struct anv_device_memory * memory;
|
||||
struct wl_buffer * buffer;
|
||||
bool busy;
|
||||
};
|
||||
|
||||
struct wsi_wl_swapchain {
|
||||
struct anv_swapchain base;
|
||||
|
||||
struct wsi_wl_display * display;
|
||||
struct wl_event_queue * queue;
|
||||
struct wl_surface * surface;
|
||||
|
||||
VkExtent2D extent;
|
||||
VkFormat vk_format;
|
||||
uint32_t drm_format;
|
||||
|
||||
VkPresentModeKHR present_mode;
|
||||
bool fifo_ready;
|
||||
|
||||
uint32_t image_count;
|
||||
struct wsi_wl_image images[0];
|
||||
};
|
||||
|
||||
static VkResult
|
||||
wsi_wl_swapchain_get_images(struct anv_swapchain *anv_chain,
|
||||
uint32_t *pCount, VkImage *pSwapchainImages)
|
||||
{
|
||||
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain;
|
||||
|
||||
if (pSwapchainImages == NULL) {
|
||||
*pCount = chain->image_count;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
assert(chain->image_count <= *pCount);
|
||||
for (uint32_t i = 0; i < chain->image_count; i++)
|
||||
pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image);
|
||||
|
||||
*pCount = chain->image_count;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain,
|
||||
uint64_t timeout,
|
||||
VkSemaphore semaphore,
|
||||
uint32_t *image_index)
|
||||
{
|
||||
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain;
|
||||
|
||||
int ret = wl_display_dispatch_queue_pending(chain->display->display,
|
||||
chain->queue);
|
||||
/* XXX: I'm not sure if out-of-date is the right error here. If
|
||||
* wl_display_dispatch_queue_pending fails it most likely means we got
|
||||
* kicked by the server so this seems more-or-less correct.
|
||||
*/
|
||||
if (ret < 0)
|
||||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
|
||||
|
||||
while (1) {
|
||||
for (uint32_t i = 0; i < chain->image_count; i++) {
|
||||
if (!chain->images[i].busy) {
|
||||
/* We found a non-busy image */
|
||||
*image_index = i;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* This time we do a blocking dispatch because we can't go
|
||||
* anywhere until we get an event.
|
||||
*/
|
||||
int ret = wl_display_roundtrip_queue(chain->display->display,
|
||||
chain->queue);
|
||||
if (ret < 0)
|
||||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial)
|
||||
{
|
||||
struct wsi_wl_swapchain *chain = data;
|
||||
|
||||
chain->fifo_ready = true;
|
||||
|
||||
wl_callback_destroy(callback);
|
||||
}
|
||||
|
||||
static const struct wl_callback_listener frame_listener = {
|
||||
frame_handle_done,
|
||||
};
|
||||
|
||||
static VkResult
|
||||
wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain,
|
||||
struct anv_queue *queue,
|
||||
uint32_t image_index)
|
||||
{
|
||||
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain;
|
||||
|
||||
if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) {
|
||||
while (!chain->fifo_ready) {
|
||||
int ret = wl_display_dispatch_queue(chain->display->display,
|
||||
chain->queue);
|
||||
if (ret < 0)
|
||||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
|
||||
}
|
||||
}
|
||||
|
||||
assert(image_index < chain->image_count);
|
||||
wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0);
|
||||
wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX);
|
||||
|
||||
if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) {
|
||||
struct wl_callback *frame = wl_surface_frame(chain->surface);
|
||||
wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue);
|
||||
wl_callback_add_listener(frame, &frame_listener, chain);
|
||||
chain->fifo_ready = false;
|
||||
}
|
||||
|
||||
chain->images[image_index].busy = true;
|
||||
wl_surface_commit(chain->surface);
|
||||
wl_display_flush(chain->display->display);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
VkDevice vk_device = anv_device_to_handle(chain->base.device);
|
||||
anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory),
|
||||
pAllocator);
|
||||
anv_DestroyImage(vk_device, anv_image_to_handle(image->image),
|
||||
pAllocator);
|
||||
}
|
||||
|
||||
static void
|
||||
buffer_handle_release(void *data, struct wl_buffer *buffer)
|
||||
{
|
||||
struct wsi_wl_image *image = data;
|
||||
|
||||
assert(image->buffer == buffer);
|
||||
|
||||
image->busy = false;
|
||||
}
|
||||
|
||||
static const struct wl_buffer_listener buffer_listener = {
|
||||
buffer_handle_release,
|
||||
};
|
||||
|
||||
static VkResult
|
||||
wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image,
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
VkDevice vk_device = anv_device_to_handle(chain->base.device);
|
||||
VkResult result;
|
||||
|
||||
VkImage vk_image;
|
||||
result = anv_image_create(vk_device,
|
||||
&(struct anv_image_create_info) {
|
||||
.isl_tiling_flags = ISL_TILING_X_BIT,
|
||||
.stride = 0,
|
||||
.vk_info =
|
||||
&(VkImageCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = chain->vk_format,
|
||||
.extent = {
|
||||
.width = chain->extent.width,
|
||||
.height = chain->extent.height,
|
||||
.depth = 1
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = 1,
|
||||
/* FIXME: Need a way to use X tiling to allow scanout */
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
.flags = 0,
|
||||
}},
|
||||
pAllocator,
|
||||
&vk_image);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
image->image = anv_image_from_handle(vk_image);
|
||||
assert(anv_format_is_color(image->image->format));
|
||||
|
||||
struct anv_surface *surface = &image->image->color_surface;
|
||||
|
||||
VkDeviceMemory vk_memory;
|
||||
result = anv_AllocateMemory(vk_device,
|
||||
&(VkMemoryAllocateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = image->image->size,
|
||||
.memoryTypeIndex = 0,
|
||||
},
|
||||
pAllocator,
|
||||
&vk_memory);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_image;
|
||||
|
||||
image->memory = anv_device_memory_from_handle(vk_memory);
|
||||
image->memory->bo.is_winsys_bo = true;
|
||||
|
||||
result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_mem;
|
||||
|
||||
int ret = anv_gem_set_tiling(chain->base.device,
|
||||
image->memory->bo.gem_handle,
|
||||
surface->isl.row_pitch, I915_TILING_X);
|
||||
if (ret) {
|
||||
/* FINISHME: Choose a better error. */
|
||||
result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail_mem;
|
||||
}
|
||||
|
||||
int fd = anv_gem_handle_to_fd(chain->base.device,
|
||||
image->memory->bo.gem_handle);
|
||||
if (fd == -1) {
|
||||
/* FINISHME: Choose a better error. */
|
||||
result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail_mem;
|
||||
}
|
||||
|
||||
image->buffer = wl_drm_create_prime_buffer(chain->display->drm,
|
||||
fd, /* name */
|
||||
chain->extent.width,
|
||||
chain->extent.height,
|
||||
chain->drm_format,
|
||||
surface->offset,
|
||||
surface->isl.row_pitch,
|
||||
0, 0, 0, 0 /* unused */);
|
||||
wl_display_roundtrip(chain->display->display);
|
||||
close(fd);
|
||||
|
||||
wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue);
|
||||
wl_buffer_add_listener(image->buffer, &buffer_listener, image);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_mem:
|
||||
anv_FreeMemory(vk_device, vk_memory, pAllocator);
|
||||
fail_image:
|
||||
anv_DestroyImage(vk_device, vk_image, pAllocator);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain;
|
||||
|
||||
for (uint32_t i = 0; i < chain->image_count; i++) {
|
||||
if (chain->images[i].buffer)
|
||||
wsi_wl_image_finish(chain, &chain->images[i], pAllocator);
|
||||
}
|
||||
|
||||
anv_free2(&chain->base.device->alloc, pAllocator, chain);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
||||
struct anv_device *device,
|
||||
const VkSwapchainCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
struct anv_swapchain **swapchain_out)
|
||||
{
|
||||
VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
|
||||
struct wsi_wl_swapchain *chain;
|
||||
VkResult result;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR);
|
||||
|
||||
int num_images = pCreateInfo->minImageCount;
|
||||
|
||||
assert(num_images >= MIN_NUM_IMAGES);
|
||||
|
||||
/* For true mailbox mode, we need at least 4 images:
|
||||
* 1) One to scan out from
|
||||
* 2) One to have queued for scan-out
|
||||
* 3) One to be currently held by the Wayland compositor
|
||||
* 4) One to render to
|
||||
*/
|
||||
if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR)
|
||||
num_images = MAX2(num_images, 4);
|
||||
|
||||
size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]);
|
||||
chain = anv_alloc2(&device->alloc, pAllocator, size, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (chain == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
chain->base.device = device;
|
||||
chain->base.destroy = wsi_wl_swapchain_destroy;
|
||||
chain->base.get_images = wsi_wl_swapchain_get_images;
|
||||
chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image;
|
||||
chain->base.queue_present = wsi_wl_swapchain_queue_present;
|
||||
|
||||
chain->surface = surface->surface;
|
||||
chain->extent = pCreateInfo->imageExtent;
|
||||
chain->vk_format = pCreateInfo->imageFormat;
|
||||
chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false);
|
||||
|
||||
chain->present_mode = pCreateInfo->presentMode;
|
||||
chain->fifo_ready = true;
|
||||
|
||||
chain->image_count = num_images;
|
||||
|
||||
/* Mark a bunch of stuff as NULL. This way we can just call
|
||||
* destroy_swapchain for cleanup.
|
||||
*/
|
||||
for (uint32_t i = 0; i < chain->image_count; i++)
|
||||
chain->images[i].buffer = NULL;
|
||||
chain->queue = NULL;
|
||||
|
||||
chain->display = wsi_wl_get_display(device->instance, surface->display);
|
||||
if (!chain->display)
|
||||
goto fail;
|
||||
|
||||
chain->queue = wl_display_create_queue(chain->display->display);
|
||||
if (!chain->queue)
|
||||
goto fail;
|
||||
|
||||
for (uint32_t i = 0; i < chain->image_count; i++) {
|
||||
result = wsi_wl_image_init(chain, &chain->images[i], pAllocator);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
chain->images[i].busy = false;
|
||||
}
|
||||
|
||||
*swapchain_out = &chain->base;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
wsi_wl_swapchain_destroy(&chain->base, pAllocator);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_wl_init_wsi(struct anv_instance *instance)
|
||||
{
|
||||
struct wsi_wayland *wsi;
|
||||
VkResult result;
|
||||
|
||||
wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
||||
if (!wsi) {
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
wsi->instance = instance;
|
||||
|
||||
int ret = pthread_mutex_init(&wsi->mutex, NULL);
|
||||
if (ret != 0) {
|
||||
if (ret == ENOMEM) {
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
} else {
|
||||
/* FINISHME: Choose a better error. */
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
if (!wsi->displays) {
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail_mutex;
|
||||
}
|
||||
|
||||
wsi->base.get_support = wsi_wl_surface_get_support;
|
||||
wsi->base.get_capabilities = wsi_wl_surface_get_capabilities;
|
||||
wsi->base.get_formats = wsi_wl_surface_get_formats;
|
||||
wsi->base.get_present_modes = wsi_wl_surface_get_present_modes;
|
||||
wsi->base.create_swapchain = wsi_wl_surface_create_swapchain;
|
||||
|
||||
instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_mutex:
|
||||
pthread_mutex_destroy(&wsi->mutex);
|
||||
|
||||
fail_alloc:
|
||||
anv_free(&instance->alloc, wsi);
|
||||
fail:
|
||||
instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
anv_wl_finish_wsi(struct anv_instance *instance)
|
||||
{
|
||||
struct wsi_wayland *wsi =
|
||||
(struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
|
||||
|
||||
if (wsi) {
|
||||
_mesa_hash_table_destroy(wsi->displays, NULL);
|
||||
|
||||
pthread_mutex_destroy(&wsi->mutex);
|
||||
|
||||
anv_free(&instance->alloc, wsi);
|
||||
}
|
||||
}
|
||||
902
src/intel/vulkan/anv_wsi_x11.c
Normal file
902
src/intel/vulkan/anv_wsi_x11.c
Normal file
|
|
@ -0,0 +1,902 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <X11/xshmfence.h>
|
||||
#include <xcb/xcb.h>
|
||||
#include <xcb/dri3.h>
|
||||
#include <xcb/present.h>
|
||||
|
||||
#include "anv_wsi.h"
|
||||
|
||||
#include "util/hash_table.h"
|
||||
|
||||
struct wsi_x11_connection {
|
||||
bool has_dri3;
|
||||
bool has_present;
|
||||
};
|
||||
|
||||
struct wsi_x11 {
|
||||
struct anv_wsi_interface base;
|
||||
|
||||
pthread_mutex_t mutex;
|
||||
/* Hash table of xcb_connection -> wsi_x11_connection mappings */
|
||||
struct hash_table *connections;
|
||||
};
|
||||
|
||||
static struct wsi_x11_connection *
|
||||
wsi_x11_connection_create(struct anv_instance *instance, xcb_connection_t *conn)
|
||||
{
|
||||
xcb_query_extension_cookie_t dri3_cookie, pres_cookie;
|
||||
xcb_query_extension_reply_t *dri3_reply, *pres_reply;
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
anv_alloc(&instance->alloc, sizeof(*wsi_conn), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
||||
if (!wsi_conn)
|
||||
return NULL;
|
||||
|
||||
dri3_cookie = xcb_query_extension(conn, 4, "DRI3");
|
||||
pres_cookie = xcb_query_extension(conn, 7, "PRESENT");
|
||||
|
||||
dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL);
|
||||
pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL);
|
||||
if (dri3_reply == NULL || pres_reply == NULL) {
|
||||
free(dri3_reply);
|
||||
free(pres_reply);
|
||||
anv_free(&instance->alloc, wsi_conn);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
wsi_conn->has_dri3 = dri3_reply->present != 0;
|
||||
wsi_conn->has_present = pres_reply->present != 0;
|
||||
|
||||
free(dri3_reply);
|
||||
free(pres_reply);
|
||||
|
||||
return wsi_conn;
|
||||
}
|
||||
|
||||
static void
|
||||
wsi_x11_connection_destroy(struct anv_instance *instance,
|
||||
struct wsi_x11_connection *conn)
|
||||
{
|
||||
anv_free(&instance->alloc, conn);
|
||||
}
|
||||
|
||||
static struct wsi_x11_connection *
|
||||
wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn)
|
||||
{
|
||||
struct wsi_x11 *wsi =
|
||||
(struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB];
|
||||
|
||||
pthread_mutex_lock(&wsi->mutex);
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn);
|
||||
if (!entry) {
|
||||
/* We're about to make a bunch of blocking calls. Let's drop the
|
||||
* mutex for now so we don't block up too badly.
|
||||
*/
|
||||
pthread_mutex_unlock(&wsi->mutex);
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_connection_create(instance, conn);
|
||||
|
||||
pthread_mutex_lock(&wsi->mutex);
|
||||
|
||||
entry = _mesa_hash_table_search(wsi->connections, conn);
|
||||
if (entry) {
|
||||
/* Oops, someone raced us to it */
|
||||
wsi_x11_connection_destroy(instance, wsi_conn);
|
||||
} else {
|
||||
entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn);
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&wsi->mutex);
|
||||
|
||||
return entry->data;
|
||||
}
|
||||
|
||||
static const VkSurfaceFormatKHR formats[] = {
|
||||
{ .format = VK_FORMAT_B8G8R8A8_SRGB, },
|
||||
};
|
||||
|
||||
static const VkPresentModeKHR present_modes[] = {
|
||||
VK_PRESENT_MODE_MAILBOX_KHR,
|
||||
};
|
||||
|
||||
static xcb_screen_t *
|
||||
get_screen_for_root(xcb_connection_t *conn, xcb_window_t root)
|
||||
{
|
||||
xcb_screen_iterator_t screen_iter =
|
||||
xcb_setup_roots_iterator(xcb_get_setup(conn));
|
||||
|
||||
for (; screen_iter.rem; xcb_screen_next (&screen_iter)) {
|
||||
if (screen_iter.data->root == root)
|
||||
return screen_iter.data;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static xcb_visualtype_t *
|
||||
screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id,
|
||||
unsigned *depth)
|
||||
{
|
||||
xcb_depth_iterator_t depth_iter =
|
||||
xcb_screen_allowed_depths_iterator(screen);
|
||||
|
||||
for (; depth_iter.rem; xcb_depth_next (&depth_iter)) {
|
||||
xcb_visualtype_iterator_t visual_iter =
|
||||
xcb_depth_visuals_iterator (depth_iter.data);
|
||||
|
||||
for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) {
|
||||
if (visual_iter.data->visual_id == visual_id) {
|
||||
if (depth)
|
||||
*depth = depth_iter.data->depth;
|
||||
return visual_iter.data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static xcb_visualtype_t *
|
||||
connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id,
|
||||
unsigned *depth)
|
||||
{
|
||||
xcb_screen_iterator_t screen_iter =
|
||||
xcb_setup_roots_iterator(xcb_get_setup(conn));
|
||||
|
||||
/* For this we have to iterate over all of the screens which is rather
|
||||
* annoying. Fortunately, there is probably only 1.
|
||||
*/
|
||||
for (; screen_iter.rem; xcb_screen_next (&screen_iter)) {
|
||||
xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data,
|
||||
visual_id, depth);
|
||||
if (visual)
|
||||
return visual;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static xcb_visualtype_t *
|
||||
get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window,
|
||||
unsigned *depth)
|
||||
{
|
||||
xcb_query_tree_cookie_t tree_cookie;
|
||||
xcb_get_window_attributes_cookie_t attrib_cookie;
|
||||
xcb_query_tree_reply_t *tree;
|
||||
xcb_get_window_attributes_reply_t *attrib;
|
||||
|
||||
tree_cookie = xcb_query_tree(conn, window);
|
||||
attrib_cookie = xcb_get_window_attributes(conn, window);
|
||||
|
||||
tree = xcb_query_tree_reply(conn, tree_cookie, NULL);
|
||||
attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL);
|
||||
if (attrib == NULL || tree == NULL) {
|
||||
free(attrib);
|
||||
free(tree);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
xcb_window_t root = tree->root;
|
||||
xcb_visualid_t visual_id = attrib->visual;
|
||||
free(attrib);
|
||||
free(tree);
|
||||
|
||||
xcb_screen_t *screen = get_screen_for_root(conn, root);
|
||||
if (screen == NULL)
|
||||
return NULL;
|
||||
|
||||
return screen_get_visualtype(screen, visual_id, depth);
|
||||
}
|
||||
|
||||
static bool
|
||||
visual_has_alpha(xcb_visualtype_t *visual, unsigned depth)
|
||||
{
|
||||
uint32_t rgb_mask = visual->red_mask |
|
||||
visual->green_mask |
|
||||
visual->blue_mask;
|
||||
|
||||
uint32_t all_mask = 0xffffffff >> (32 - depth);
|
||||
|
||||
/* Do we have bits left over after RGB? */
|
||||
return (all_mask & ~rgb_mask) != 0;
|
||||
}
|
||||
|
||||
VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
uint32_t queueFamilyIndex,
|
||||
xcb_connection_t* connection,
|
||||
xcb_visualid_t visual_id)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_get_connection(device->instance, connection);
|
||||
|
||||
if (!wsi_conn->has_dri3) {
|
||||
fprintf(stderr, "vulkan: No DRI3 support\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned visual_depth;
|
||||
if (!connection_get_visualtype(connection, visual_id, &visual_depth))
|
||||
return false;
|
||||
|
||||
if (visual_depth != 24 && visual_depth != 32)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
|
||||
struct anv_physical_device *device,
|
||||
uint32_t queueFamilyIndex,
|
||||
VkBool32* pSupported)
|
||||
{
|
||||
VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface;
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_get_connection(device->instance, surface->connection);
|
||||
if (!wsi_conn)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
if (!wsi_conn->has_dri3) {
|
||||
fprintf(stderr, "vulkan: No DRI3 support\n");
|
||||
*pSupported = false;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
unsigned visual_depth;
|
||||
if (!get_visualtype_for_window(surface->connection, surface->window,
|
||||
&visual_depth)) {
|
||||
*pSupported = false;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
if (visual_depth != 24 && visual_depth != 32) {
|
||||
*pSupported = false;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
*pSupported = true;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
|
||||
struct anv_physical_device *device,
|
||||
VkSurfaceCapabilitiesKHR *caps)
|
||||
{
|
||||
VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface;
|
||||
xcb_get_geometry_cookie_t geom_cookie;
|
||||
xcb_generic_error_t *err;
|
||||
xcb_get_geometry_reply_t *geom;
|
||||
unsigned visual_depth;
|
||||
|
||||
geom_cookie = xcb_get_geometry(surface->connection, surface->window);
|
||||
|
||||
/* This does a round-trip. This is why we do get_geometry first and
|
||||
* wait to read the reply until after we have a visual.
|
||||
*/
|
||||
xcb_visualtype_t *visual =
|
||||
get_visualtype_for_window(surface->connection, surface->window,
|
||||
&visual_depth);
|
||||
|
||||
geom = xcb_get_geometry_reply(surface->connection, geom_cookie, &err);
|
||||
if (geom) {
|
||||
VkExtent2D extent = { geom->width, geom->height };
|
||||
caps->currentExtent = extent;
|
||||
caps->minImageExtent = extent;
|
||||
caps->maxImageExtent = extent;
|
||||
} else {
|
||||
/* This can happen if the client didn't wait for the configure event
|
||||
* to come back from the compositor. In that case, we don't know the
|
||||
* size of the window so we just return valid "I don't know" stuff.
|
||||
*/
|
||||
caps->currentExtent = (VkExtent2D) { -1, -1 };
|
||||
caps->minImageExtent = (VkExtent2D) { 1, 1 };
|
||||
caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX };
|
||||
}
|
||||
free(err);
|
||||
free(geom);
|
||||
|
||||
if (visual_has_alpha(visual, visual_depth)) {
|
||||
caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR |
|
||||
VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR;
|
||||
} else {
|
||||
caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR |
|
||||
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
|
||||
}
|
||||
|
||||
caps->minImageCount = 2;
|
||||
caps->maxImageCount = 4;
|
||||
caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
|
||||
caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
|
||||
caps->maxImageArrayLayers = 1;
|
||||
caps->supportedUsageFlags =
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_surface_get_formats(VkIcdSurfaceBase *surface,
|
||||
struct anv_physical_device *device,
|
||||
uint32_t *pSurfaceFormatCount,
|
||||
VkSurfaceFormatKHR *pSurfaceFormats)
|
||||
{
|
||||
if (pSurfaceFormats == NULL) {
|
||||
*pSurfaceFormatCount = ARRAY_SIZE(formats);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats));
|
||||
typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount);
|
||||
*pSurfaceFormatCount = ARRAY_SIZE(formats);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_surface_get_present_modes(VkIcdSurfaceBase *surface,
|
||||
struct anv_physical_device *device,
|
||||
uint32_t *pPresentModeCount,
|
||||
VkPresentModeKHR *pPresentModes)
|
||||
{
|
||||
if (pPresentModes == NULL) {
|
||||
*pPresentModeCount = ARRAY_SIZE(present_modes);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
assert(*pPresentModeCount >= ARRAY_SIZE(present_modes));
|
||||
typed_memcpy(pPresentModes, present_modes, *pPresentModeCount);
|
||||
*pPresentModeCount = ARRAY_SIZE(present_modes);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_surface_create_swapchain(VkIcdSurfaceBase *surface,
|
||||
struct anv_device *device,
|
||||
const VkSwapchainCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
struct anv_swapchain **swapchain);
|
||||
|
||||
VkResult anv_CreateXcbSurfaceKHR(
|
||||
VkInstance _instance,
|
||||
const VkXcbSurfaceCreateInfoKHR* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSurfaceKHR* pSurface)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_instance, instance, _instance);
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR);
|
||||
|
||||
VkIcdSurfaceXcb *surface;
|
||||
|
||||
surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (surface == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
surface->base.platform = VK_ICD_WSI_PLATFORM_XCB;
|
||||
surface->connection = pCreateInfo->connection;
|
||||
surface->window = pCreateInfo->window;
|
||||
|
||||
*pSurface = _VkIcdSurfaceBase_to_handle(&surface->base);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct x11_image {
|
||||
struct anv_image * image;
|
||||
struct anv_device_memory * memory;
|
||||
xcb_pixmap_t pixmap;
|
||||
bool busy;
|
||||
struct xshmfence * shm_fence;
|
||||
uint32_t sync_fence;
|
||||
};
|
||||
|
||||
struct x11_swapchain {
|
||||
struct anv_swapchain base;
|
||||
|
||||
xcb_connection_t * conn;
|
||||
xcb_window_t window;
|
||||
xcb_gc_t gc;
|
||||
VkExtent2D extent;
|
||||
uint32_t image_count;
|
||||
|
||||
xcb_present_event_t event_id;
|
||||
xcb_special_event_t * special_event;
|
||||
uint64_t send_sbc;
|
||||
uint32_t stamp;
|
||||
|
||||
struct x11_image images[0];
|
||||
};
|
||||
|
||||
static VkResult
|
||||
x11_get_images(struct anv_swapchain *anv_chain,
|
||||
uint32_t* pCount, VkImage *pSwapchainImages)
|
||||
{
|
||||
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
|
||||
|
||||
if (pSwapchainImages == NULL) {
|
||||
*pCount = chain->image_count;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
assert(chain->image_count <= *pCount);
|
||||
for (uint32_t i = 0; i < chain->image_count; i++)
|
||||
pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image);
|
||||
|
||||
*pCount = chain->image_count;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_handle_dri3_present_event(struct x11_swapchain *chain,
|
||||
xcb_present_generic_event_t *event)
|
||||
{
|
||||
switch (event->evtype) {
|
||||
case XCB_PRESENT_CONFIGURE_NOTIFY: {
|
||||
xcb_present_configure_notify_event_t *config = (void *) event;
|
||||
|
||||
if (config->width != chain->extent.width ||
|
||||
config->height != chain->extent.height)
|
||||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case XCB_PRESENT_EVENT_IDLE_NOTIFY: {
|
||||
xcb_present_idle_notify_event_t *idle = (void *) event;
|
||||
|
||||
for (unsigned i = 0; i < chain->image_count; i++) {
|
||||
if (chain->images[i].pixmap == idle->pixmap) {
|
||||
chain->images[i].busy = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case XCB_PRESENT_COMPLETE_NOTIFY:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_acquire_next_image(struct anv_swapchain *anv_chain,
|
||||
uint64_t timeout,
|
||||
VkSemaphore semaphore,
|
||||
uint32_t *image_index)
|
||||
{
|
||||
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
|
||||
|
||||
while (1) {
|
||||
for (uint32_t i = 0; i < chain->image_count; i++) {
|
||||
if (!chain->images[i].busy) {
|
||||
/* We found a non-busy image */
|
||||
xshmfence_await(chain->images[i].shm_fence);
|
||||
*image_index = i;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
xcb_flush(chain->conn);
|
||||
xcb_generic_event_t *event =
|
||||
xcb_wait_for_special_event(chain->conn, chain->special_event);
|
||||
if (!event)
|
||||
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
|
||||
|
||||
VkResult result = x11_handle_dri3_present_event(chain, (void *)event);
|
||||
free(event);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_queue_present(struct anv_swapchain *anv_chain,
|
||||
struct anv_queue *queue,
|
||||
uint32_t image_index)
|
||||
{
|
||||
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
|
||||
struct x11_image *image = &chain->images[image_index];
|
||||
|
||||
assert(image_index < chain->image_count);
|
||||
|
||||
uint32_t options = XCB_PRESENT_OPTION_NONE;
|
||||
|
||||
int64_t target_msc = 0;
|
||||
int64_t divisor = 0;
|
||||
int64_t remainder = 0;
|
||||
|
||||
options |= XCB_PRESENT_OPTION_ASYNC;
|
||||
|
||||
xshmfence_reset(image->shm_fence);
|
||||
|
||||
xcb_void_cookie_t cookie =
|
||||
xcb_present_pixmap(chain->conn,
|
||||
chain->window,
|
||||
image->pixmap,
|
||||
(uint32_t) chain->send_sbc,
|
||||
0, /* valid */
|
||||
0, /* update */
|
||||
0, /* x_off */
|
||||
0, /* y_off */
|
||||
XCB_NONE, /* target_crtc */
|
||||
XCB_NONE,
|
||||
image->sync_fence,
|
||||
options,
|
||||
target_msc,
|
||||
divisor,
|
||||
remainder, 0, NULL);
|
||||
xcb_discard_reply(chain->conn, cookie.sequence);
|
||||
image->busy = true;
|
||||
|
||||
xcb_flush(chain->conn);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_image_init(struct anv_device *device, struct x11_swapchain *chain,
|
||||
const VkSwapchainCreateInfoKHR *pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
struct x11_image *image)
|
||||
{
|
||||
xcb_void_cookie_t cookie;
|
||||
VkResult result;
|
||||
|
||||
VkImage image_h;
|
||||
result = anv_image_create(anv_device_to_handle(device),
|
||||
&(struct anv_image_create_info) {
|
||||
.isl_tiling_flags = ISL_TILING_X_BIT,
|
||||
.stride = 0,
|
||||
.vk_info =
|
||||
&(VkImageCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = pCreateInfo->imageFormat,
|
||||
.extent = {
|
||||
.width = pCreateInfo->imageExtent.width,
|
||||
.height = pCreateInfo->imageExtent.height,
|
||||
.depth = 1
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = 1,
|
||||
/* FIXME: Need a way to use X tiling to allow scanout */
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
.flags = 0,
|
||||
}},
|
||||
NULL,
|
||||
&image_h);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
image->image = anv_image_from_handle(image_h);
|
||||
assert(anv_format_is_color(image->image->format));
|
||||
|
||||
VkDeviceMemory memory_h;
|
||||
result = anv_AllocateMemory(anv_device_to_handle(device),
|
||||
&(VkMemoryAllocateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = image->image->size,
|
||||
.memoryTypeIndex = 0,
|
||||
},
|
||||
NULL /* XXX: pAllocator */,
|
||||
&memory_h);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_create_image;
|
||||
|
||||
image->memory = anv_device_memory_from_handle(memory_h);
|
||||
image->memory->bo.is_winsys_bo = true;
|
||||
|
||||
anv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0);
|
||||
|
||||
struct anv_surface *surface = &image->image->color_surface;
|
||||
assert(surface->isl.tiling == ISL_TILING_X);
|
||||
|
||||
int ret = anv_gem_set_tiling(device, image->memory->bo.gem_handle,
|
||||
surface->isl.row_pitch, I915_TILING_X);
|
||||
if (ret) {
|
||||
/* FINISHME: Choose a better error. */
|
||||
result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"set_tiling failed: %m");
|
||||
goto fail_alloc_memory;
|
||||
}
|
||||
|
||||
int fd = anv_gem_handle_to_fd(device, image->memory->bo.gem_handle);
|
||||
if (fd == -1) {
|
||||
/* FINISHME: Choose a better error. */
|
||||
result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"handle_to_fd failed: %m");
|
||||
goto fail_alloc_memory;
|
||||
}
|
||||
|
||||
uint32_t bpp = 32;
|
||||
uint32_t depth = 24;
|
||||
image->pixmap = xcb_generate_id(chain->conn);
|
||||
|
||||
cookie =
|
||||
xcb_dri3_pixmap_from_buffer_checked(chain->conn,
|
||||
image->pixmap,
|
||||
chain->window,
|
||||
image->image->size,
|
||||
pCreateInfo->imageExtent.width,
|
||||
pCreateInfo->imageExtent.height,
|
||||
surface->isl.row_pitch,
|
||||
depth, bpp, fd);
|
||||
xcb_discard_reply(chain->conn, cookie.sequence);
|
||||
|
||||
int fence_fd = xshmfence_alloc_shm();
|
||||
if (fence_fd < 0)
|
||||
goto fail_pixmap;
|
||||
|
||||
image->shm_fence = xshmfence_map_shm(fence_fd);
|
||||
if (image->shm_fence == NULL)
|
||||
goto fail_shmfence_alloc;
|
||||
|
||||
image->sync_fence = xcb_generate_id(chain->conn);
|
||||
xcb_dri3_fence_from_fd(chain->conn,
|
||||
image->pixmap,
|
||||
image->sync_fence,
|
||||
false,
|
||||
fence_fd);
|
||||
|
||||
image->busy = false;
|
||||
xshmfence_trigger(image->shm_fence);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_shmfence_alloc:
|
||||
close(fence_fd);
|
||||
|
||||
fail_pixmap:
|
||||
cookie = xcb_free_pixmap(chain->conn, image->pixmap);
|
||||
xcb_discard_reply(chain->conn, cookie.sequence);
|
||||
|
||||
fail_alloc_memory:
|
||||
anv_FreeMemory(anv_device_to_handle(chain->base.device),
|
||||
anv_device_memory_to_handle(image->memory), pAllocator);
|
||||
|
||||
fail_create_image:
|
||||
anv_DestroyImage(anv_device_to_handle(chain->base.device),
|
||||
anv_image_to_handle(image->image), pAllocator);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
x11_image_finish(struct x11_swapchain *chain,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
struct x11_image *image)
|
||||
{
|
||||
xcb_void_cookie_t cookie;
|
||||
|
||||
cookie = xcb_sync_destroy_fence(chain->conn, image->sync_fence);
|
||||
xcb_discard_reply(chain->conn, cookie.sequence);
|
||||
xshmfence_unmap_shm(image->shm_fence);
|
||||
|
||||
cookie = xcb_free_pixmap(chain->conn, image->pixmap);
|
||||
xcb_discard_reply(chain->conn, cookie.sequence);
|
||||
|
||||
anv_DestroyImage(anv_device_to_handle(chain->base.device),
|
||||
anv_image_to_handle(image->image), pAllocator);
|
||||
|
||||
anv_FreeMemory(anv_device_to_handle(chain->base.device),
|
||||
anv_device_memory_to_handle(image->memory), pAllocator);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_swapchain_destroy(struct anv_swapchain *anv_chain,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
|
||||
|
||||
for (uint32_t i = 0; i < chain->image_count; i++)
|
||||
x11_image_finish(chain, pAllocator, &chain->images[i]);
|
||||
|
||||
xcb_unregister_for_special_event(chain->conn, chain->special_event);
|
||||
|
||||
anv_free2(&chain->base.device->alloc, pAllocator, chain);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
||||
struct anv_device *device,
|
||||
const VkSwapchainCreateInfoKHR *pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
struct anv_swapchain **swapchain_out)
|
||||
{
|
||||
VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface;
|
||||
struct x11_swapchain *chain;
|
||||
xcb_void_cookie_t cookie;
|
||||
VkResult result;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR);
|
||||
|
||||
int num_images = pCreateInfo->minImageCount;
|
||||
|
||||
/* For true mailbox mode, we need at least 4 images:
|
||||
* 1) One to scan out from
|
||||
* 2) One to have queued for scan-out
|
||||
* 3) One to be currently held by the Wayland compositor
|
||||
* 4) One to render to
|
||||
*/
|
||||
if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR)
|
||||
num_images = MAX2(num_images, 4);
|
||||
|
||||
size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]);
|
||||
chain = anv_alloc2(&device->alloc, pAllocator, size, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (chain == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
chain->base.device = device;
|
||||
chain->base.destroy = x11_swapchain_destroy;
|
||||
chain->base.get_images = x11_get_images;
|
||||
chain->base.acquire_next_image = x11_acquire_next_image;
|
||||
chain->base.queue_present = x11_queue_present;
|
||||
|
||||
chain->conn = surface->connection;
|
||||
chain->window = surface->window;
|
||||
chain->extent = pCreateInfo->imageExtent;
|
||||
chain->image_count = num_images;
|
||||
|
||||
chain->event_id = xcb_generate_id(chain->conn);
|
||||
xcb_present_select_input(chain->conn, chain->event_id, chain->window,
|
||||
XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY |
|
||||
XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY |
|
||||
XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY);
|
||||
|
||||
/* Create an XCB event queue to hold present events outside of the usual
|
||||
* application event queue
|
||||
*/
|
||||
chain->special_event =
|
||||
xcb_register_for_special_xge(chain->conn, &xcb_present_id,
|
||||
chain->event_id, NULL);
|
||||
|
||||
chain->gc = xcb_generate_id(chain->conn);
|
||||
if (!chain->gc) {
|
||||
/* FINISHME: Choose a better error. */
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail_register;
|
||||
}
|
||||
|
||||
cookie = xcb_create_gc(chain->conn,
|
||||
chain->gc,
|
||||
chain->window,
|
||||
XCB_GC_GRAPHICS_EXPOSURES,
|
||||
(uint32_t []) { 0 });
|
||||
xcb_discard_reply(chain->conn, cookie.sequence);
|
||||
|
||||
uint32_t image = 0;
|
||||
for (; image < chain->image_count; image++) {
|
||||
result = x11_image_init(device, chain, pCreateInfo, pAllocator,
|
||||
&chain->images[image]);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_init_images;
|
||||
}
|
||||
|
||||
*swapchain_out = &chain->base;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_init_images:
|
||||
for (uint32_t j = 0; j < image; j++)
|
||||
x11_image_finish(chain, pAllocator, &chain->images[j]);
|
||||
|
||||
fail_register:
|
||||
xcb_unregister_for_special_event(chain->conn, chain->special_event);
|
||||
|
||||
anv_free2(&device->alloc, pAllocator, chain);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_x11_init_wsi(struct anv_instance *instance)
|
||||
{
|
||||
struct wsi_x11 *wsi;
|
||||
VkResult result;
|
||||
|
||||
wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
||||
if (!wsi) {
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
int ret = pthread_mutex_init(&wsi->mutex, NULL);
|
||||
if (ret != 0) {
|
||||
if (ret == ENOMEM) {
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
} else {
|
||||
/* FINISHME: Choose a better error. */
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
if (!wsi->connections) {
|
||||
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto fail_mutex;
|
||||
}
|
||||
|
||||
wsi->base.get_support = x11_surface_get_support;
|
||||
wsi->base.get_capabilities = x11_surface_get_capabilities;
|
||||
wsi->base.get_formats = x11_surface_get_formats;
|
||||
wsi->base.get_present_modes = x11_surface_get_present_modes;
|
||||
wsi->base.create_swapchain = x11_surface_create_swapchain;
|
||||
|
||||
instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_mutex:
|
||||
pthread_mutex_destroy(&wsi->mutex);
|
||||
fail_alloc:
|
||||
anv_free(&instance->alloc, wsi);
|
||||
fail:
|
||||
instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
anv_x11_finish_wsi(struct anv_instance *instance)
|
||||
{
|
||||
struct wsi_x11 *wsi =
|
||||
(struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB];
|
||||
|
||||
if (wsi) {
|
||||
_mesa_hash_table_destroy(wsi->connections, NULL);
|
||||
|
||||
pthread_mutex_destroy(&wsi->mutex);
|
||||
|
||||
anv_free(&instance->alloc, wsi);
|
||||
}
|
||||
}
|
||||
7
src/intel/vulkan/dev_icd.json.in
Normal file
7
src/intel/vulkan/dev_icd.json.in
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"file_format_version": "1.0.0",
|
||||
"ICD": {
|
||||
"library_path": "@build_libdir@/libvulkan_intel.so",
|
||||
"abi_versions": "1.0.3"
|
||||
}
|
||||
}
|
||||
533
src/intel/vulkan/gen7_cmd_buffer.c
Normal file
533
src/intel/vulkan/gen7_cmd_buffer.c
Normal file
|
|
@ -0,0 +1,533 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
#if GEN_GEN == 7 && !GEN_IS_HASWELL
|
||||
void
|
||||
gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer,
|
||||
uint32_t stages)
|
||||
{
|
||||
static const uint32_t sampler_state_opcodes[] = {
|
||||
[MESA_SHADER_VERTEX] = 43,
|
||||
[MESA_SHADER_TESS_CTRL] = 44, /* HS */
|
||||
[MESA_SHADER_TESS_EVAL] = 45, /* DS */
|
||||
[MESA_SHADER_GEOMETRY] = 46,
|
||||
[MESA_SHADER_FRAGMENT] = 47,
|
||||
[MESA_SHADER_COMPUTE] = 0,
|
||||
};
|
||||
|
||||
static const uint32_t binding_table_opcodes[] = {
|
||||
[MESA_SHADER_VERTEX] = 38,
|
||||
[MESA_SHADER_TESS_CTRL] = 39,
|
||||
[MESA_SHADER_TESS_EVAL] = 40,
|
||||
[MESA_SHADER_GEOMETRY] = 41,
|
||||
[MESA_SHADER_FRAGMENT] = 42,
|
||||
[MESA_SHADER_COMPUTE] = 0,
|
||||
};
|
||||
|
||||
anv_foreach_stage(s, stages) {
|
||||
if (cmd_buffer->state.samplers[s].alloc_size > 0) {
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS),
|
||||
._3DCommandSubOpcode = sampler_state_opcodes[s],
|
||||
.PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset);
|
||||
}
|
||||
|
||||
/* Always emit binding table pointers if we're asked to, since on SKL
|
||||
* this is what flushes push constants. */
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GENX(3DSTATE_BINDING_TABLE_POINTERS_VS),
|
||||
._3DCommandSubOpcode = binding_table_opcodes[s],
|
||||
.PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty &
|
||||
cmd_buffer->state.pipeline->active_stages;
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
anv_foreach_stage(s, dirty) {
|
||||
result = anv_cmd_buffer_emit_samplers(cmd_buffer, s,
|
||||
&cmd_buffer->state.samplers[s]);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s,
|
||||
&cmd_buffer->state.binding_tables[s]);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
}
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
|
||||
result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
|
||||
assert(result == VK_SUCCESS);
|
||||
|
||||
/* Re-emit state base addresses so we get the new surface state base
|
||||
* address before we start emitting binding tables etc.
|
||||
*/
|
||||
anv_cmd_buffer_emit_state_base_address(cmd_buffer);
|
||||
|
||||
/* Re-emit all active binding tables */
|
||||
dirty |= cmd_buffer->state.pipeline->active_stages;
|
||||
anv_foreach_stage(s, dirty) {
|
||||
result = anv_cmd_buffer_emit_samplers(cmd_buffer, s,
|
||||
&cmd_buffer->state.samplers[s]);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s,
|
||||
&cmd_buffer->state.binding_tables[s]);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
cmd_buffer->state.descriptors_dirty &= ~dirty;
|
||||
|
||||
return dirty;
|
||||
}
|
||||
#endif /* GEN_GEN == 7 && !GEN_IS_HASWELL */
|
||||
|
||||
static inline int64_t
|
||||
clamp_int64(int64_t x, int64_t min, int64_t max)
|
||||
{
|
||||
if (x < min)
|
||||
return min;
|
||||
else if (x < max)
|
||||
return x;
|
||||
else
|
||||
return max;
|
||||
}
|
||||
|
||||
#if GEN_GEN == 7 && !GEN_IS_HASWELL
|
||||
void
|
||||
gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
uint32_t count = cmd_buffer->state.dynamic.scissor.count;
|
||||
const VkRect2D *scissors = cmd_buffer->state.dynamic.scissor.scissors;
|
||||
struct anv_state scissor_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
const VkRect2D *s = &scissors[i];
|
||||
|
||||
/* Since xmax and ymax are inclusive, we have to have xmax < xmin or
|
||||
* ymax < ymin for empty clips. In case clip x, y, width height are all
|
||||
* 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
|
||||
* what we want. Just special case empty clips and produce a canonical
|
||||
* empty clip. */
|
||||
static const struct GEN7_SCISSOR_RECT empty_scissor = {
|
||||
.ScissorRectangleYMin = 1,
|
||||
.ScissorRectangleXMin = 1,
|
||||
.ScissorRectangleYMax = 0,
|
||||
.ScissorRectangleXMax = 0
|
||||
};
|
||||
|
||||
const int max = 0xffff;
|
||||
struct GEN7_SCISSOR_RECT scissor = {
|
||||
/* Do this math using int64_t so overflow gets clamped correctly. */
|
||||
.ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
|
||||
.ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
|
||||
.ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max),
|
||||
.ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max)
|
||||
};
|
||||
|
||||
if (s->extent.width <= 0 || s->extent.height <= 0) {
|
||||
GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8,
|
||||
&empty_scissor);
|
||||
} else {
|
||||
GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor);
|
||||
}
|
||||
}
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS,
|
||||
.ScissorRectPointer = scissor_state.offset);
|
||||
|
||||
if (!cmd_buffer->device->info.has_llc)
|
||||
anv_state_clflush(scissor_state);
|
||||
}
|
||||
#endif
|
||||
|
||||
static const uint32_t vk_to_gen_index_type[] = {
|
||||
[VK_INDEX_TYPE_UINT16] = INDEX_WORD,
|
||||
[VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
|
||||
};
|
||||
|
||||
static const uint32_t restart_index_for_type[] = {
|
||||
[VK_INDEX_TYPE_UINT16] = UINT16_MAX,
|
||||
[VK_INDEX_TYPE_UINT32] = UINT32_MAX,
|
||||
};
|
||||
|
||||
void genX(CmdBindIndexBuffer)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkBuffer _buffer,
|
||||
VkDeviceSize offset,
|
||||
VkIndexType indexType)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
|
||||
|
||||
cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
|
||||
if (GEN_IS_HASWELL)
|
||||
cmd_buffer->state.restart_index = restart_index_for_type[indexType];
|
||||
cmd_buffer->state.gen7.index_buffer = buffer;
|
||||
cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType];
|
||||
cmd_buffer->state.gen7.index_offset = offset;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
|
||||
struct anv_state surfaces = { 0, }, samplers = { 0, };
|
||||
VkResult result;
|
||||
|
||||
result = anv_cmd_buffer_emit_samplers(cmd_buffer,
|
||||
MESA_SHADER_COMPUTE, &samplers);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
|
||||
MESA_SHADER_COMPUTE, &surfaces);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
|
||||
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
|
||||
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
|
||||
unsigned push_constant_data_size =
|
||||
(prog_data->nr_params + local_id_dwords) * 4;
|
||||
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
|
||||
unsigned push_constant_regs = reg_aligned_constant_size / 32;
|
||||
|
||||
if (push_state.alloc_size) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD),
|
||||
.CURBETotalDataLength = push_state.alloc_size,
|
||||
.CURBEDataStartAddress = push_state.offset);
|
||||
}
|
||||
|
||||
assert(prog_data->total_shared <= 64 * 1024);
|
||||
uint32_t slm_size = 0;
|
||||
if (prog_data->total_shared > 0) {
|
||||
/* slm_size is in 4k increments, but must be a power of 2. */
|
||||
slm_size = 4 * 1024;
|
||||
while (slm_size < prog_data->total_shared)
|
||||
slm_size <<= 1;
|
||||
slm_size /= 4 * 1024;
|
||||
}
|
||||
|
||||
struct anv_state state =
|
||||
anv_state_pool_emit(&device->dynamic_state_pool,
|
||||
GENX(INTERFACE_DESCRIPTOR_DATA), 64,
|
||||
.KernelStartPointer = pipeline->cs_simd,
|
||||
.BindingTablePointer = surfaces.offset,
|
||||
.SamplerStatePointer = samplers.offset,
|
||||
.ConstantURBEntryReadLength =
|
||||
push_constant_regs,
|
||||
#if !GEN_IS_HASWELL
|
||||
.ConstantURBEntryReadOffset = 0,
|
||||
#endif
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
.SharedLocalMemorySize = slm_size,
|
||||
.NumberofThreadsinGPGPUThreadGroup =
|
||||
pipeline->cs_thread_width_max);
|
||||
|
||||
const uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
|
||||
.InterfaceDescriptorTotalLength = size,
|
||||
.InterfaceDescriptorDataStartAddress = state.offset);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
#define emit_lri(batch, reg, imm) \
|
||||
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), \
|
||||
.RegisterOffset = __anv_reg_num(reg), \
|
||||
.DataDWord = imm)
|
||||
|
||||
void
|
||||
genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
|
||||
{
|
||||
/* References for GL state:
|
||||
*
|
||||
* - commits e307cfa..228d5a3
|
||||
* - src/mesa/drivers/dri/i965/gen7_l3_state.c
|
||||
*/
|
||||
|
||||
uint32_t l3cr2_slm, l3cr2_noslm;
|
||||
anv_pack_struct(&l3cr2_noslm, GENX(L3CNTLREG2),
|
||||
.URBAllocation = 24,
|
||||
.ROAllocation = 0,
|
||||
.DCAllocation = 16);
|
||||
anv_pack_struct(&l3cr2_slm, GENX(L3CNTLREG2),
|
||||
.SLMEnable = 1,
|
||||
.URBAllocation = 16,
|
||||
.URBLowBandwidth = 1,
|
||||
.ROAllocation = 0,
|
||||
.DCAllocation = 8);
|
||||
const uint32_t l3cr2_val = enable_slm ? l3cr2_slm : l3cr2_noslm;
|
||||
bool changed = cmd_buffer->state.current_l3_config != l3cr2_val;
|
||||
|
||||
if (changed) {
|
||||
/* According to the hardware docs, the L3 partitioning can only be
|
||||
* changed while the pipeline is completely drained and the caches are
|
||||
* flushed, which involves a first PIPE_CONTROL flush which stalls the
|
||||
* pipeline...
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
|
||||
.DCFlushEnable = true,
|
||||
.PostSyncOperation = NoWrite,
|
||||
.CommandStreamerStallEnable = true);
|
||||
|
||||
/* ...followed by a second pipelined PIPE_CONTROL that initiates
|
||||
* invalidation of the relevant caches. Note that because RO
|
||||
* invalidation happens at the top of the pipeline (i.e. right away as
|
||||
* the PIPE_CONTROL command is processed by the CS) we cannot combine it
|
||||
* with the previous stalling flush as the hardware documentation
|
||||
* suggests, because that would cause the CS to stall on previous
|
||||
* rendering *after* RO invalidation and wouldn't prevent the RO caches
|
||||
* from being polluted by concurrent rendering before the stall
|
||||
* completes. This intentionally doesn't implement the SKL+ hardware
|
||||
* workaround suggesting to enable CS stall on PIPE_CONTROLs with the
|
||||
* texture cache invalidation bit set for GPGPU workloads because the
|
||||
* previous and subsequent PIPE_CONTROLs already guarantee that there is
|
||||
* no concurrent GPGPU kernel execution (see SKL HSD 2132585).
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
|
||||
.TextureCacheInvalidationEnable = true,
|
||||
.ConstantCacheInvalidationEnable = true,
|
||||
.InstructionCacheInvalidateEnable = true,
|
||||
.StateCacheInvalidationEnable = true,
|
||||
.PostSyncOperation = NoWrite);
|
||||
|
||||
/* Now send a third stalling flush to make sure that invalidation is
|
||||
* complete when the L3 configuration registers are modified.
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
|
||||
.DCFlushEnable = true,
|
||||
.PostSyncOperation = NoWrite,
|
||||
.CommandStreamerStallEnable = true);
|
||||
|
||||
anv_finishme("write GEN7_L3SQCREG1");
|
||||
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2), l3cr2_val);
|
||||
|
||||
uint32_t l3cr3_slm, l3cr3_noslm;
|
||||
anv_pack_struct(&l3cr3_noslm, GENX(L3CNTLREG3),
|
||||
.ISAllocation = 8,
|
||||
.CAllocation = 4,
|
||||
.TAllocation = 8);
|
||||
anv_pack_struct(&l3cr3_slm, GENX(L3CNTLREG3),
|
||||
.ISAllocation = 8,
|
||||
.CAllocation = 8,
|
||||
.TAllocation = 8);
|
||||
const uint32_t l3cr3_val = enable_slm ? l3cr3_slm : l3cr3_noslm;
|
||||
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3), l3cr3_val);
|
||||
|
||||
cmd_buffer->state.current_l3_config = l3cr2_val;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
VkResult result;
|
||||
|
||||
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
bool needs_slm = cs_prog_data->base.total_shared > 0;
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm);
|
||||
|
||||
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
|
||||
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
|
||||
|
||||
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
|
||||
(cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
|
||||
/* FIXME: figure out descriptors for gen7 */
|
||||
result = flush_compute_descriptor_set(cmd_buffer);
|
||||
assert(result == VK_SUCCESS);
|
||||
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
}
|
||||
|
||||
cmd_buffer->state.compute_dirty = 0;
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||
|
||||
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_RENDER_TARGETS |
|
||||
ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |
|
||||
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) {
|
||||
|
||||
const struct anv_image_view *iview =
|
||||
anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
|
||||
const struct anv_image *image = iview ? iview->image : NULL;
|
||||
const struct anv_format *anv_format =
|
||||
iview ? anv_format_for_vk_format(iview->vk_format) : NULL;
|
||||
const bool has_depth = iview && anv_format->has_depth;
|
||||
const uint32_t depth_format = has_depth ?
|
||||
isl_surf_get_depth_format(&cmd_buffer->device->isl_dev,
|
||||
&image->depth_surface.isl) : D16_UNORM;
|
||||
|
||||
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
|
||||
struct GENX(3DSTATE_SF) sf = {
|
||||
GENX(3DSTATE_SF_header),
|
||||
.DepthBufferSurfaceFormat = depth_format,
|
||||
.LineWidth = cmd_buffer->state.dynamic.line_width,
|
||||
.GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
|
||||
.GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
|
||||
.GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
|
||||
};
|
||||
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
|
||||
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf);
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
|
||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
|
||||
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
struct anv_state cc_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
GENX(COLOR_CALC_STATE_length) * 4,
|
||||
64);
|
||||
struct GENX(COLOR_CALC_STATE) cc = {
|
||||
.BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
|
||||
.BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
|
||||
.BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
|
||||
.BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
|
||||
.StencilReferenceValue = d->stencil_reference.front & 0xff,
|
||||
.BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff,
|
||||
};
|
||||
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
|
||||
if (!cmd_buffer->device->info.has_llc)
|
||||
anv_state_clflush(cc_state);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GENX(3DSTATE_CC_STATE_POINTERS),
|
||||
.ColorCalcStatePointer = cc_state.offset);
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_RENDER_TARGETS |
|
||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
|
||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
|
||||
uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)];
|
||||
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
|
||||
struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
|
||||
.StencilTestMask = d->stencil_compare_mask.front & 0xff,
|
||||
.StencilWriteMask = d->stencil_write_mask.front & 0xff,
|
||||
|
||||
.BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
|
||||
.BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
|
||||
};
|
||||
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
|
||||
|
||||
struct anv_state ds_state =
|
||||
anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw,
|
||||
pipeline->gen7.depth_stencil_state,
|
||||
GENX(DEPTH_STENCIL_STATE_length), 64);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS),
|
||||
.PointertoDEPTH_STENCIL_STATE = ds_state.offset);
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.gen7.index_buffer &&
|
||||
cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_INDEX_BUFFER)) {
|
||||
struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer;
|
||||
uint32_t offset = cmd_buffer->state.gen7.index_offset;
|
||||
|
||||
#if GEN_IS_HASWELL
|
||||
anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF,
|
||||
.IndexedDrawCutIndexEnable = pipeline->primitive_restart,
|
||||
.CutIndex = cmd_buffer->state.restart_index);
|
||||
#endif
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER),
|
||||
#if !GEN_IS_HASWELL
|
||||
.CutIndexEnable = pipeline->primitive_restart,
|
||||
#endif
|
||||
.IndexFormat = cmd_buffer->state.gen7.index_type,
|
||||
.MemoryObjectControlState = GENX(MOCS),
|
||||
.BufferStartingAddress = { buffer->bo, buffer->offset + offset },
|
||||
.BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size });
|
||||
}
|
||||
|
||||
cmd_buffer->state.dirty = 0;
|
||||
}
|
||||
|
||||
void genX(CmdSetEvent)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkEvent event,
|
||||
VkPipelineStageFlags stageMask)
|
||||
{
|
||||
stub();
|
||||
}
|
||||
|
||||
void genX(CmdResetEvent)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkEvent event,
|
||||
VkPipelineStageFlags stageMask)
|
||||
{
|
||||
stub();
|
||||
}
|
||||
|
||||
void genX(CmdWaitEvents)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t eventCount,
|
||||
const VkEvent* pEvents,
|
||||
VkPipelineStageFlags srcStageMask,
|
||||
VkPipelineStageFlags destStageMask,
|
||||
uint32_t memoryBarrierCount,
|
||||
const VkMemoryBarrier* pMemoryBarriers,
|
||||
uint32_t bufferMemoryBarrierCount,
|
||||
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
|
||||
uint32_t imageMemoryBarrierCount,
|
||||
const VkImageMemoryBarrier* pImageMemoryBarriers)
|
||||
{
|
||||
stub();
|
||||
}
|
||||
402
src/intel/vulkan/gen7_pipeline.c
Normal file
402
src/intel/vulkan/gen7_pipeline.c
Normal file
|
|
@ -0,0 +1,402 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
#include "genX_pipeline_util.h"
|
||||
|
||||
static void
|
||||
gen7_emit_rs_state(struct anv_pipeline *pipeline,
|
||||
const VkPipelineRasterizationStateCreateInfo *info,
|
||||
const struct anv_graphics_pipeline_create_info *extra)
|
||||
{
|
||||
struct GENX(3DSTATE_SF) sf = {
|
||||
GENX(3DSTATE_SF_header),
|
||||
|
||||
/* LegacyGlobalDepthBiasEnable */
|
||||
|
||||
.StatisticsEnable = true,
|
||||
.FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
|
||||
.BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
|
||||
.ViewTransformEnable = !(extra && extra->use_rectlist),
|
||||
.FrontWinding = vk_to_gen_front_face[info->frontFace],
|
||||
/* bool AntiAliasingEnable; */
|
||||
|
||||
.CullMode = vk_to_gen_cullmode[info->cullMode],
|
||||
|
||||
/* uint32_t LineEndCapAntialiasingRegionWidth; */
|
||||
.ScissorRectangleEnable = !(extra && extra->use_rectlist),
|
||||
|
||||
/* uint32_t MultisampleRasterizationMode; */
|
||||
/* bool LastPixelEnable; */
|
||||
|
||||
.TriangleStripListProvokingVertexSelect = 0,
|
||||
.LineStripListProvokingVertexSelect = 0,
|
||||
.TriangleFanProvokingVertexSelect = 1,
|
||||
|
||||
/* uint32_t AALineDistanceMode; */
|
||||
/* uint32_t VertexSubPixelPrecisionSelect; */
|
||||
.UsePointWidthState = false,
|
||||
.PointWidth = 1.0,
|
||||
.GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
|
||||
.GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
|
||||
.GlobalDepthOffsetEnablePoint = info->depthBiasEnable,
|
||||
};
|
||||
|
||||
GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf);
|
||||
}
|
||||
|
||||
static void
|
||||
gen7_emit_ds_state(struct anv_pipeline *pipeline,
|
||||
const VkPipelineDepthStencilStateCreateInfo *info)
|
||||
{
|
||||
if (info == NULL) {
|
||||
/* We're going to OR this together with the dynamic state. We need
|
||||
* to make sure it's initialized to something useful.
|
||||
*/
|
||||
memset(pipeline->gen7.depth_stencil_state, 0,
|
||||
sizeof(pipeline->gen7.depth_stencil_state));
|
||||
return;
|
||||
}
|
||||
|
||||
struct GENX(DEPTH_STENCIL_STATE) state = {
|
||||
.DepthTestEnable = info->depthTestEnable,
|
||||
.DepthBufferWriteEnable = info->depthWriteEnable,
|
||||
.DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp],
|
||||
.DoubleSidedStencilEnable = true,
|
||||
|
||||
.StencilTestEnable = info->stencilTestEnable,
|
||||
.StencilBufferWriteEnable = info->stencilTestEnable,
|
||||
.StencilFailOp = vk_to_gen_stencil_op[info->front.failOp],
|
||||
.StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp],
|
||||
.StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp],
|
||||
.StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp],
|
||||
|
||||
.BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp],
|
||||
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp],
|
||||
.BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.depthFailOp],
|
||||
.BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp],
|
||||
};
|
||||
|
||||
GENX(DEPTH_STENCIL_STATE_pack)(NULL, &pipeline->gen7.depth_stencil_state, &state);
|
||||
}
|
||||
|
||||
static void
|
||||
gen7_emit_cb_state(struct anv_pipeline *pipeline,
|
||||
const VkPipelineColorBlendStateCreateInfo *info,
|
||||
const VkPipelineMultisampleStateCreateInfo *ms_info)
|
||||
{
|
||||
struct anv_device *device = pipeline->device;
|
||||
|
||||
if (info == NULL || info->attachmentCount == 0) {
|
||||
pipeline->blend_state =
|
||||
anv_state_pool_emit(&device->dynamic_state_pool,
|
||||
GENX(BLEND_STATE), 64,
|
||||
.ColorBufferBlendEnable = false,
|
||||
.WriteDisableAlpha = true,
|
||||
.WriteDisableRed = true,
|
||||
.WriteDisableGreen = true,
|
||||
.WriteDisableBlue = true);
|
||||
} else {
|
||||
const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0];
|
||||
struct GENX(BLEND_STATE) blend = {
|
||||
.AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
|
||||
.AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
|
||||
|
||||
.LogicOpEnable = info->logicOpEnable,
|
||||
.LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
|
||||
.ColorBufferBlendEnable = a->blendEnable,
|
||||
.ColorClampRange = COLORCLAMP_RTFORMAT,
|
||||
.PreBlendColorClampEnable = true,
|
||||
.PostBlendColorClampEnable = true,
|
||||
.SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
|
||||
.DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
|
||||
.ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
|
||||
.SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
|
||||
.DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
|
||||
.AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
|
||||
.WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
|
||||
.WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
|
||||
.WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
|
||||
.WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
|
||||
};
|
||||
|
||||
/* Our hardware applies the blend factor prior to the blend function
|
||||
* regardless of what function is used. Technically, this means the
|
||||
* hardware can do MORE than GL or Vulkan specify. However, it also
|
||||
* means that, for MIN and MAX, we have to stomp the blend factor to
|
||||
* ONE to make it a no-op.
|
||||
*/
|
||||
if (a->colorBlendOp == VK_BLEND_OP_MIN ||
|
||||
a->colorBlendOp == VK_BLEND_OP_MAX) {
|
||||
blend.SourceBlendFactor = BLENDFACTOR_ONE;
|
||||
blend.DestinationBlendFactor = BLENDFACTOR_ONE;
|
||||
}
|
||||
if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
|
||||
a->alphaBlendOp == VK_BLEND_OP_MAX) {
|
||||
blend.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
|
||||
blend.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
|
||||
}
|
||||
|
||||
pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool,
|
||||
GENX(BLEND_STATE_length) * 4,
|
||||
64);
|
||||
GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend);
|
||||
if (pipeline->device->info.has_llc)
|
||||
anv_state_clflush(pipeline->blend_state);
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS),
|
||||
.BlendStatePointer = pipeline->blend_state.offset);
|
||||
}
|
||||
|
||||
VkResult
|
||||
genX(graphics_pipeline_create)(
|
||||
VkDevice _device,
|
||||
struct anv_pipeline_cache * cache,
|
||||
const VkGraphicsPipelineCreateInfo* pCreateInfo,
|
||||
const struct anv_graphics_pipeline_create_info *extra,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkPipeline* pPipeline)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_pipeline *pipeline;
|
||||
VkResult result;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
|
||||
|
||||
pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (pipeline == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = anv_pipeline_init(pipeline, device, cache,
|
||||
pCreateInfo, extra, pAllocator);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_free2(&device->alloc, pAllocator, pipeline);
|
||||
return result;
|
||||
}
|
||||
|
||||
assert(pCreateInfo->pVertexInputState);
|
||||
emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra);
|
||||
|
||||
assert(pCreateInfo->pRasterizationState);
|
||||
gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra);
|
||||
|
||||
gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState);
|
||||
|
||||
gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
|
||||
pCreateInfo->pMultisampleState);
|
||||
|
||||
emit_urb_setup(pipeline);
|
||||
|
||||
const VkPipelineRasterizationStateCreateInfo *rs_info =
|
||||
pCreateInfo->pRasterizationState;
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP),
|
||||
.FrontWinding = vk_to_gen_front_face[rs_info->frontFace],
|
||||
.CullMode = vk_to_gen_cullmode[rs_info->cullMode],
|
||||
.ClipEnable = !(extra && extra->use_rectlist),
|
||||
.APIMode = APIMODE_OGL,
|
||||
.ViewportXYClipTestEnable = true,
|
||||
.ClipMode = CLIPMODE_NORMAL,
|
||||
.TriangleStripListProvokingVertexSelect = 0,
|
||||
.LineStripListProvokingVertexSelect = 0,
|
||||
.TriangleFanProvokingVertexSelect = 1,
|
||||
.MinimumPointWidth = 0.125,
|
||||
.MaximumPointWidth = 255.875,
|
||||
.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1);
|
||||
|
||||
if (pCreateInfo->pMultisampleState &&
|
||||
pCreateInfo->pMultisampleState->rasterizationSamples > 1)
|
||||
anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO");
|
||||
|
||||
uint32_t samples = 1;
|
||||
uint32_t log2_samples = __builtin_ffs(samples) - 1;
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE),
|
||||
.PixelLocation = PIXLOC_CENTER,
|
||||
.NumberofMultisamples = log2_samples);
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK),
|
||||
.SampleMask = 0xff);
|
||||
|
||||
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
||||
|
||||
#if 0
|
||||
/* From gen7_vs_state.c */
|
||||
|
||||
/**
|
||||
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
|
||||
* Geometry > Geometry Shader > State:
|
||||
*
|
||||
* "Note: Because of corruption in IVB:GT2, software needs to flush the
|
||||
* whole fixed function pipeline when the GS enable changes value in
|
||||
* the 3DSTATE_GS."
|
||||
*
|
||||
* The hardware architects have clarified that in this context "flush the
|
||||
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
|
||||
* Stall" bit set.
|
||||
*/
|
||||
if (!brw->is_haswell && !brw->is_baytrail)
|
||||
gen7_emit_vs_workaround_flush(brw);
|
||||
#endif
|
||||
|
||||
if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs))
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false);
|
||||
else
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS),
|
||||
.KernelStartPointer = pipeline->vs_vec4,
|
||||
.ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX],
|
||||
.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base),
|
||||
|
||||
.DispatchGRFStartRegisterforURBData =
|
||||
vs_prog_data->base.base.dispatch_grf_start_reg,
|
||||
.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length,
|
||||
.VertexURBEntryReadOffset = 0,
|
||||
|
||||
.MaximumNumberofThreads = device->info.max_vs_threads - 1,
|
||||
.StatisticsEnable = true,
|
||||
.VSFunctionEnable = true);
|
||||
|
||||
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
|
||||
|
||||
if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) {
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false);
|
||||
} else {
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS),
|
||||
.KernelStartPointer = pipeline->gs_kernel,
|
||||
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
|
||||
.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base),
|
||||
|
||||
.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1,
|
||||
.OutputTopology = gs_prog_data->output_topology,
|
||||
.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length,
|
||||
.IncludeVertexHandles = gs_prog_data->base.include_vue_handles,
|
||||
.DispatchGRFStartRegisterforURBData =
|
||||
gs_prog_data->base.base.dispatch_grf_start_reg,
|
||||
|
||||
.MaximumNumberofThreads = device->info.max_gs_threads - 1,
|
||||
/* This in the next dword on HSW. */
|
||||
.ControlDataFormat = gs_prog_data->control_data_format,
|
||||
.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords,
|
||||
.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1,
|
||||
.DispatchMode = gs_prog_data->base.dispatch_mode,
|
||||
.GSStatisticsEnable = true,
|
||||
.IncludePrimitiveID = gs_prog_data->include_primitive_id,
|
||||
# if (GEN_IS_HASWELL)
|
||||
.ReorderMode = REORDER_TRAILING,
|
||||
# else
|
||||
.ReorderEnable = true,
|
||||
# endif
|
||||
.GSEnable = true);
|
||||
}
|
||||
|
||||
if (pipeline->ps_ksp0 == NO_KERNEL) {
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE));
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM),
|
||||
.StatisticsEnable = true,
|
||||
.ThreadDispatchEnable = false,
|
||||
.LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */
|
||||
.LineAntialiasingRegionWidth = 1, /* 1.0 pixels */
|
||||
.EarlyDepthStencilControl = EDSC_NORMAL,
|
||||
.PointRasterizationRule = RASTRULE_UPPER_RIGHT);
|
||||
|
||||
/* Even if no fragments are ever dispatched, the hardware hangs if we
|
||||
* don't at least set the maximum number of threads.
|
||||
*/
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
|
||||
.MaximumNumberofThreads = device->info.max_wm_threads - 1);
|
||||
} else {
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 ||
|
||||
wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1)
|
||||
anv_finishme("two-sided color needs sbe swizzling setup");
|
||||
if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1)
|
||||
anv_finishme("primitive_id needs sbe swizzling setup");
|
||||
|
||||
emit_3dstate_sbe(pipeline);
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
|
||||
.KernelStartPointer0 = pipeline->ps_ksp0,
|
||||
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
|
||||
.PerThreadScratchSpace = scratch_space(&wm_prog_data->base),
|
||||
|
||||
.MaximumNumberofThreads = device->info.max_wm_threads - 1,
|
||||
.PushConstantEnable = wm_prog_data->base.nr_params > 0,
|
||||
.AttributeEnable = wm_prog_data->num_varying_inputs > 0,
|
||||
.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask,
|
||||
|
||||
.RenderTargetFastClearEnable = false,
|
||||
.DualSourceBlendEnable = false,
|
||||
.RenderTargetResolveEnable = false,
|
||||
|
||||
.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
||||
POSOFFSET_SAMPLE : POSOFFSET_NONE,
|
||||
|
||||
._32PixelDispatchEnable = false,
|
||||
._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL,
|
||||
._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL,
|
||||
|
||||
.DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0,
|
||||
.DispatchGRFStartRegisterforConstantSetupData1 = 0,
|
||||
.DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2,
|
||||
|
||||
#if 0
|
||||
/* Haswell requires the sample mask to be set in this packet as well as
|
||||
* in 3DSTATE_SAMPLE_MASK; the values should match. */
|
||||
/* _NEW_BUFFERS, _NEW_MULTISAMPLE */
|
||||
#endif
|
||||
|
||||
.KernelStartPointer1 = 0,
|
||||
.KernelStartPointer2 = pipeline->ps_ksp2);
|
||||
|
||||
/* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM),
|
||||
.StatisticsEnable = true,
|
||||
.ThreadDispatchEnable = true,
|
||||
.LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */
|
||||
.LineAntialiasingRegionWidth = 1, /* 1.0 pixels */
|
||||
.EarlyDepthStencilControl = EDSC_NORMAL,
|
||||
.PointRasterizationRule = RASTRULE_UPPER_RIGHT,
|
||||
.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode,
|
||||
.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth,
|
||||
.PixelShaderUsesSourceW = wm_prog_data->uses_src_w,
|
||||
.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask,
|
||||
.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes);
|
||||
}
|
||||
|
||||
*pPipeline = anv_pipeline_to_handle(pipeline);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
529
src/intel/vulkan/gen8_cmd_buffer.c
Normal file
529
src/intel/vulkan/gen8_cmd_buffer.c
Normal file
|
|
@ -0,0 +1,529 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
#if GEN_GEN == 8
|
||||
void
|
||||
gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
uint32_t count = cmd_buffer->state.dynamic.viewport.count;
|
||||
const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
|
||||
struct anv_state sf_clip_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
|
||||
struct anv_state cc_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
const VkViewport *vp = &viewports[i];
|
||||
|
||||
/* The gen7 state struct has just the matrix and guardband fields, the
|
||||
* gen8 struct adds the min/max viewport fields. */
|
||||
struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
|
||||
.ViewportMatrixElementm00 = vp->width / 2,
|
||||
.ViewportMatrixElementm11 = vp->height / 2,
|
||||
.ViewportMatrixElementm22 = 1.0,
|
||||
.ViewportMatrixElementm30 = vp->x + vp->width / 2,
|
||||
.ViewportMatrixElementm31 = vp->y + vp->height / 2,
|
||||
.ViewportMatrixElementm32 = 0.0,
|
||||
.XMinClipGuardband = -1.0f,
|
||||
.XMaxClipGuardband = 1.0f,
|
||||
.YMinClipGuardband = -1.0f,
|
||||
.YMaxClipGuardband = 1.0f,
|
||||
.XMinViewPort = vp->x,
|
||||
.XMaxViewPort = vp->x + vp->width - 1,
|
||||
.YMinViewPort = vp->y,
|
||||
.YMaxViewPort = vp->y + vp->height - 1,
|
||||
};
|
||||
|
||||
struct GENX(CC_VIEWPORT) cc_viewport = {
|
||||
.MinimumDepth = vp->minDepth,
|
||||
.MaximumDepth = vp->maxDepth
|
||||
};
|
||||
|
||||
GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
|
||||
&sf_clip_viewport);
|
||||
GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
|
||||
}
|
||||
|
||||
if (!cmd_buffer->device->info.has_llc) {
|
||||
anv_state_clflush(sf_clip_state);
|
||||
anv_state_clflush(cc_state);
|
||||
}
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC),
|
||||
.CCViewportPointer = cc_state.offset);
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP),
|
||||
.SFClipViewportPointer = sf_clip_state.offset);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define emit_lri(batch, reg, imm) \
|
||||
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), \
|
||||
.RegisterOffset = __anv_reg_num(reg), \
|
||||
.DataDWord = imm)
|
||||
|
||||
void
|
||||
genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
|
||||
{
|
||||
/* References for GL state:
|
||||
*
|
||||
* - commits e307cfa..228d5a3
|
||||
* - src/mesa/drivers/dri/i965/gen7_l3_state.c
|
||||
*/
|
||||
|
||||
uint32_t l3cr_slm, l3cr_noslm;
|
||||
anv_pack_struct(&l3cr_noslm, GENX(L3CNTLREG),
|
||||
.URBAllocation = 48,
|
||||
.AllAllocation = 48);
|
||||
anv_pack_struct(&l3cr_slm, GENX(L3CNTLREG),
|
||||
.SLMEnable = 1,
|
||||
.URBAllocation = 16,
|
||||
.AllAllocation = 48);
|
||||
const uint32_t l3cr_val = enable_slm ? l3cr_slm : l3cr_noslm;
|
||||
bool changed = cmd_buffer->state.current_l3_config != l3cr_val;
|
||||
|
||||
if (changed) {
|
||||
/* According to the hardware docs, the L3 partitioning can only be
|
||||
* changed while the pipeline is completely drained and the caches are
|
||||
* flushed, which involves a first PIPE_CONTROL flush which stalls the
|
||||
* pipeline...
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
|
||||
.DCFlushEnable = true,
|
||||
.PostSyncOperation = NoWrite,
|
||||
.CommandStreamerStallEnable = true);
|
||||
|
||||
/* ...followed by a second pipelined PIPE_CONTROL that initiates
|
||||
* invalidation of the relevant caches. Note that because RO
|
||||
* invalidation happens at the top of the pipeline (i.e. right away as
|
||||
* the PIPE_CONTROL command is processed by the CS) we cannot combine it
|
||||
* with the previous stalling flush as the hardware documentation
|
||||
* suggests, because that would cause the CS to stall on previous
|
||||
* rendering *after* RO invalidation and wouldn't prevent the RO caches
|
||||
* from being polluted by concurrent rendering before the stall
|
||||
* completes. This intentionally doesn't implement the SKL+ hardware
|
||||
* workaround suggesting to enable CS stall on PIPE_CONTROLs with the
|
||||
* texture cache invalidation bit set for GPGPU workloads because the
|
||||
* previous and subsequent PIPE_CONTROLs already guarantee that there is
|
||||
* no concurrent GPGPU kernel execution (see SKL HSD 2132585).
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
|
||||
.TextureCacheInvalidationEnable = true,
|
||||
.ConstantCacheInvalidationEnable = true,
|
||||
.InstructionCacheInvalidateEnable = true,
|
||||
.StateCacheInvalidationEnable = true,
|
||||
.PostSyncOperation = NoWrite);
|
||||
|
||||
/* Now send a third stalling flush to make sure that invalidation is
|
||||
* complete when the L3 configuration registers are modified.
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
|
||||
.DCFlushEnable = true,
|
||||
.PostSyncOperation = NoWrite,
|
||||
.CommandStreamerStallEnable = true);
|
||||
|
||||
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG), l3cr_val);
|
||||
cmd_buffer->state.current_l3_config = l3cr_val;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
|
||||
struct GENX(3DSTATE_SF) sf = {
|
||||
GENX(3DSTATE_SF_header),
|
||||
.LineWidth = cmd_buffer->state.dynamic.line_width,
|
||||
};
|
||||
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
|
||||
/* FIXME: gen9.fs */
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw,
|
||||
cmd_buffer->state.pipeline->gen8.sf);
|
||||
}
|
||||
|
||||
#include "genxml/gen9_pack.h"
|
||||
static void
|
||||
__emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
|
||||
struct GEN9_3DSTATE_SF sf = {
|
||||
GEN9_3DSTATE_SF_header,
|
||||
.LineWidth = cmd_buffer->state.dynamic.line_width,
|
||||
};
|
||||
GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf);
|
||||
/* FIXME: gen9.fs */
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw,
|
||||
cmd_buffer->state.pipeline->gen8.sf);
|
||||
}
|
||||
|
||||
static void
|
||||
__emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
if (cmd_buffer->device->info.is_cherryview)
|
||||
__emit_gen9_sf_state(cmd_buffer);
|
||||
else
|
||||
__emit_genx_sf_state(cmd_buffer);
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
|
||||
|
||||
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {
|
||||
__emit_sf_state(cmd_buffer);
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){
|
||||
uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
|
||||
struct GENX(3DSTATE_RASTER) raster = {
|
||||
GENX(3DSTATE_RASTER_header),
|
||||
.GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
|
||||
.GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
|
||||
.GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
|
||||
};
|
||||
GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
|
||||
pipeline->gen8.raster);
|
||||
}
|
||||
|
||||
/* Stencil reference values moved from COLOR_CALC_STATE in gen8 to
|
||||
* 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split
|
||||
* across different state packets for gen8 and gen9. We handle that by
|
||||
* using a big old #if switch here.
|
||||
*/
|
||||
#if GEN_GEN == 8
|
||||
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
|
||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
|
||||
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
struct anv_state cc_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
GENX(COLOR_CALC_STATE_length) * 4,
|
||||
64);
|
||||
struct GENX(COLOR_CALC_STATE) cc = {
|
||||
.BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
|
||||
.BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
|
||||
.BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
|
||||
.BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
|
||||
.StencilReferenceValue = d->stencil_reference.front & 0xff,
|
||||
.BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff,
|
||||
};
|
||||
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
|
||||
|
||||
if (!cmd_buffer->device->info.has_llc)
|
||||
anv_state_clflush(cc_state);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GENX(3DSTATE_CC_STATE_POINTERS),
|
||||
.ColorCalcStatePointer = cc_state.offset,
|
||||
.ColorCalcStatePointerValid = true);
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
|
||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
|
||||
uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
|
||||
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
|
||||
struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = {
|
||||
GENX(3DSTATE_WM_DEPTH_STENCIL_header),
|
||||
|
||||
.StencilTestMask = d->stencil_compare_mask.front & 0xff,
|
||||
.StencilWriteMask = d->stencil_write_mask.front & 0xff,
|
||||
|
||||
.BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
|
||||
.BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
|
||||
};
|
||||
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw,
|
||||
&wm_depth_stencil);
|
||||
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
|
||||
pipeline->gen8.wm_depth_stencil);
|
||||
}
|
||||
#else
|
||||
if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
|
||||
struct anv_state cc_state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
GEN9_COLOR_CALC_STATE_length * 4,
|
||||
64);
|
||||
struct GEN9_COLOR_CALC_STATE cc = {
|
||||
.BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
|
||||
.BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
|
||||
.BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
|
||||
.BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
|
||||
};
|
||||
GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
|
||||
|
||||
if (!cmd_buffer->device->info.has_llc)
|
||||
anv_state_clflush(cc_state);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
GEN9_3DSTATE_CC_STATE_POINTERS,
|
||||
.ColorCalcStatePointer = cc_state.offset,
|
||||
.ColorCalcStatePointerValid = true);
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
|
||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
|
||||
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
|
||||
uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length];
|
||||
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
|
||||
GEN9_3DSTATE_WM_DEPTH_STENCIL_header,
|
||||
|
||||
.StencilTestMask = d->stencil_compare_mask.front & 0xff,
|
||||
.StencilWriteMask = d->stencil_write_mask.front & 0xff,
|
||||
|
||||
.BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
|
||||
.BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
|
||||
|
||||
.StencilReferenceValue = d->stencil_reference.front & 0xff,
|
||||
.BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,
|
||||
};
|
||||
GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil);
|
||||
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, dwords,
|
||||
pipeline->gen9.wm_depth_stencil);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_INDEX_BUFFER)) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF),
|
||||
.IndexedDrawCutIndexEnable = pipeline->primitive_restart,
|
||||
.CutIndex = cmd_buffer->state.restart_index,
|
||||
);
|
||||
}
|
||||
|
||||
cmd_buffer->state.dirty = 0;
|
||||
}
|
||||
|
||||
void genX(CmdBindIndexBuffer)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkBuffer _buffer,
|
||||
VkDeviceSize offset,
|
||||
VkIndexType indexType)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
|
||||
|
||||
static const uint32_t vk_to_gen_index_type[] = {
|
||||
[VK_INDEX_TYPE_UINT16] = INDEX_WORD,
|
||||
[VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
|
||||
};
|
||||
|
||||
static const uint32_t restart_index_for_type[] = {
|
||||
[VK_INDEX_TYPE_UINT16] = UINT16_MAX,
|
||||
[VK_INDEX_TYPE_UINT32] = UINT32_MAX,
|
||||
};
|
||||
|
||||
cmd_buffer->state.restart_index = restart_index_for_type[indexType];
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER),
|
||||
.IndexFormat = vk_to_gen_index_type[indexType],
|
||||
.MemoryObjectControlState = GENX(MOCS),
|
||||
.BufferStartingAddress = { buffer->bo, buffer->offset + offset },
|
||||
.BufferSize = buffer->size - offset);
|
||||
|
||||
cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
|
||||
struct anv_state surfaces = { 0, }, samplers = { 0, };
|
||||
VkResult result;
|
||||
|
||||
result = anv_cmd_buffer_emit_samplers(cmd_buffer,
|
||||
MESA_SHADER_COMPUTE, &samplers);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
|
||||
MESA_SHADER_COMPUTE, &surfaces);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
|
||||
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
|
||||
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
|
||||
unsigned push_constant_data_size =
|
||||
(prog_data->nr_params + local_id_dwords) * 4;
|
||||
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
|
||||
unsigned push_constant_regs = reg_aligned_constant_size / 32;
|
||||
|
||||
if (push_state.alloc_size) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD),
|
||||
.CURBETotalDataLength = push_state.alloc_size,
|
||||
.CURBEDataStartAddress = push_state.offset);
|
||||
}
|
||||
|
||||
assert(prog_data->total_shared <= 64 * 1024);
|
||||
uint32_t slm_size = 0;
|
||||
if (prog_data->total_shared > 0) {
|
||||
/* slm_size is in 4k increments, but must be a power of 2. */
|
||||
slm_size = 4 * 1024;
|
||||
while (slm_size < prog_data->total_shared)
|
||||
slm_size <<= 1;
|
||||
slm_size /= 4 * 1024;
|
||||
}
|
||||
|
||||
struct anv_state state =
|
||||
anv_state_pool_emit(&device->dynamic_state_pool,
|
||||
GENX(INTERFACE_DESCRIPTOR_DATA), 64,
|
||||
.KernelStartPointer = pipeline->cs_simd,
|
||||
.KernelStartPointerHigh = 0,
|
||||
.BindingTablePointer = surfaces.offset,
|
||||
.BindingTableEntryCount = 0,
|
||||
.SamplerStatePointer = samplers.offset,
|
||||
.SamplerCount = 0,
|
||||
.ConstantIndirectURBEntryReadLength = push_constant_regs,
|
||||
.ConstantURBEntryReadOffset = 0,
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
.SharedLocalMemorySize = slm_size,
|
||||
.NumberofThreadsinGPGPUThreadGroup =
|
||||
pipeline->cs_thread_width_max);
|
||||
|
||||
uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
|
||||
.InterfaceDescriptorTotalLength = size,
|
||||
.InterfaceDescriptorDataStartAddress = state.offset);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
VkResult result;
|
||||
|
||||
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
bool needs_slm = cs_prog_data->base.total_shared > 0;
|
||||
genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm);
|
||||
|
||||
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
|
||||
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
|
||||
|
||||
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
|
||||
(cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
|
||||
result = flush_compute_descriptor_set(cmd_buffer);
|
||||
assert(result == VK_SUCCESS);
|
||||
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
}
|
||||
|
||||
cmd_buffer->state.compute_dirty = 0;
|
||||
}
|
||||
|
||||
void genX(CmdSetEvent)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkEvent _event,
|
||||
VkPipelineStageFlags stageMask)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_event, event, _event);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
|
||||
.DestinationAddressType = DAT_PPGTT,
|
||||
.PostSyncOperation = WriteImmediateData,
|
||||
.Address = {
|
||||
&cmd_buffer->device->dynamic_state_block_pool.bo,
|
||||
event->state.offset
|
||||
},
|
||||
.ImmediateData = VK_EVENT_SET);
|
||||
}
|
||||
|
||||
void genX(CmdResetEvent)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
VkEvent _event,
|
||||
VkPipelineStageFlags stageMask)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_event, event, _event);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
|
||||
.DestinationAddressType = DAT_PPGTT,
|
||||
.PostSyncOperation = WriteImmediateData,
|
||||
.Address = {
|
||||
&cmd_buffer->device->dynamic_state_block_pool.bo,
|
||||
event->state.offset
|
||||
},
|
||||
.ImmediateData = VK_EVENT_RESET);
|
||||
}
|
||||
|
||||
void genX(CmdWaitEvents)(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t eventCount,
|
||||
const VkEvent* pEvents,
|
||||
VkPipelineStageFlags srcStageMask,
|
||||
VkPipelineStageFlags destStageMask,
|
||||
uint32_t memoryBarrierCount,
|
||||
const VkMemoryBarrier* pMemoryBarriers,
|
||||
uint32_t bufferMemoryBarrierCount,
|
||||
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
|
||||
uint32_t imageMemoryBarrierCount,
|
||||
const VkImageMemoryBarrier* pImageMemoryBarriers)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
for (uint32_t i = 0; i < eventCount; i++) {
|
||||
ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT),
|
||||
.WaitMode = PollingMode,
|
||||
.CompareOperation = COMPARE_SAD_EQUAL_SDD,
|
||||
.SemaphoreDataDword = VK_EVENT_SET,
|
||||
.SemaphoreAddress = {
|
||||
&cmd_buffer->device->dynamic_state_block_pool.bo,
|
||||
event->state.offset
|
||||
});
|
||||
}
|
||||
|
||||
genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
|
||||
false, /* byRegion */
|
||||
memoryBarrierCount, pMemoryBarriers,
|
||||
bufferMemoryBarrierCount, pBufferMemoryBarriers,
|
||||
imageMemoryBarrierCount, pImageMemoryBarriers);
|
||||
}
|
||||
538
src/intel/vulkan/gen8_pipeline.c
Normal file
538
src/intel/vulkan/gen8_pipeline.c
Normal file
|
|
@ -0,0 +1,538 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
#include "genX_pipeline_util.h"
|
||||
|
||||
static void
|
||||
emit_ia_state(struct anv_pipeline *pipeline,
|
||||
const VkPipelineInputAssemblyStateCreateInfo *info,
|
||||
const struct anv_graphics_pipeline_create_info *extra)
|
||||
{
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY),
|
||||
.PrimitiveTopologyType = pipeline->topology);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_rs_state(struct anv_pipeline *pipeline,
|
||||
const VkPipelineRasterizationStateCreateInfo *info,
|
||||
const VkPipelineMultisampleStateCreateInfo *ms_info,
|
||||
const struct anv_graphics_pipeline_create_info *extra)
|
||||
{
|
||||
uint32_t samples = 1;
|
||||
|
||||
if (ms_info)
|
||||
samples = ms_info->rasterizationSamples;
|
||||
|
||||
struct GENX(3DSTATE_SF) sf = {
|
||||
GENX(3DSTATE_SF_header),
|
||||
.ViewportTransformEnable = !(extra && extra->use_rectlist),
|
||||
.TriangleStripListProvokingVertexSelect = 0,
|
||||
.LineStripListProvokingVertexSelect = 0,
|
||||
.TriangleFanProvokingVertexSelect = 1,
|
||||
.PointWidthSource = Vertex,
|
||||
.PointWidth = 1.0,
|
||||
};
|
||||
|
||||
/* FINISHME: VkBool32 rasterizerDiscardEnable; */
|
||||
|
||||
GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf);
|
||||
|
||||
struct GENX(3DSTATE_RASTER) raster = {
|
||||
GENX(3DSTATE_RASTER_header),
|
||||
|
||||
/* For details on 3DSTATE_RASTER multisample state, see the BSpec table
|
||||
* "Multisample Modes State".
|
||||
*/
|
||||
.DXMultisampleRasterizationEnable = samples > 1,
|
||||
.ForcedSampleCount = FSC_NUMRASTSAMPLES_0,
|
||||
.ForceMultisampling = false,
|
||||
|
||||
.FrontWinding = vk_to_gen_front_face[info->frontFace],
|
||||
.CullMode = vk_to_gen_cullmode[info->cullMode],
|
||||
.FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
|
||||
.BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
|
||||
.ScissorRectangleEnable = !(extra && extra->use_rectlist),
|
||||
#if GEN_GEN == 8
|
||||
.ViewportZClipTestEnable = true,
|
||||
#else
|
||||
/* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
|
||||
.ViewportZFarClipTestEnable = true,
|
||||
.ViewportZNearClipTestEnable = true,
|
||||
#endif
|
||||
.GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
|
||||
.GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
|
||||
.GlobalDepthOffsetEnablePoint = info->depthBiasEnable,
|
||||
};
|
||||
|
||||
GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_cb_state(struct anv_pipeline *pipeline,
|
||||
const VkPipelineColorBlendStateCreateInfo *info,
|
||||
const VkPipelineMultisampleStateCreateInfo *ms_info)
|
||||
{
|
||||
struct anv_device *device = pipeline->device;
|
||||
|
||||
uint32_t num_dwords = GENX(BLEND_STATE_length);
|
||||
pipeline->blend_state =
|
||||
anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
|
||||
|
||||
struct GENX(BLEND_STATE) blend_state = {
|
||||
.AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
|
||||
.AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
|
||||
};
|
||||
|
||||
/* Default everything to disabled */
|
||||
for (uint32_t i = 0; i < 8; i++) {
|
||||
blend_state.Entry[i].WriteDisableAlpha = true;
|
||||
blend_state.Entry[i].WriteDisableRed = true;
|
||||
blend_state.Entry[i].WriteDisableGreen = true;
|
||||
blend_state.Entry[i].WriteDisableBlue = true;
|
||||
}
|
||||
|
||||
struct anv_pipeline_bind_map *map =
|
||||
&pipeline->bindings[MESA_SHADER_FRAGMENT];
|
||||
|
||||
bool has_writeable_rt = false;
|
||||
for (unsigned i = 0; i < map->surface_count; i++) {
|
||||
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
|
||||
|
||||
/* All color attachments are at the beginning of the binding table */
|
||||
if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
|
||||
break;
|
||||
|
||||
/* We can have at most 8 attachments */
|
||||
assert(i < 8);
|
||||
|
||||
if (binding->offset >= info->attachmentCount)
|
||||
continue;
|
||||
|
||||
const VkPipelineColorBlendAttachmentState *a =
|
||||
&info->pAttachments[binding->offset];
|
||||
|
||||
if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
|
||||
a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
|
||||
a->colorBlendOp != a->alphaBlendOp) {
|
||||
blend_state.IndependentAlphaBlendEnable = true;
|
||||
}
|
||||
|
||||
blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) {
|
||||
.LogicOpEnable = info->logicOpEnable,
|
||||
.LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
|
||||
.ColorBufferBlendEnable = a->blendEnable,
|
||||
.PreBlendSourceOnlyClampEnable = false,
|
||||
.ColorClampRange = COLORCLAMP_RTFORMAT,
|
||||
.PreBlendColorClampEnable = true,
|
||||
.PostBlendColorClampEnable = true,
|
||||
.SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
|
||||
.DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
|
||||
.ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
|
||||
.SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
|
||||
.DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
|
||||
.AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
|
||||
.WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
|
||||
.WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
|
||||
.WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
|
||||
.WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
|
||||
};
|
||||
|
||||
if (a->colorWriteMask != 0)
|
||||
has_writeable_rt = true;
|
||||
|
||||
/* Our hardware applies the blend factor prior to the blend function
|
||||
* regardless of what function is used. Technically, this means the
|
||||
* hardware can do MORE than GL or Vulkan specify. However, it also
|
||||
* means that, for MIN and MAX, we have to stomp the blend factor to
|
||||
* ONE to make it a no-op.
|
||||
*/
|
||||
if (a->colorBlendOp == VK_BLEND_OP_MIN ||
|
||||
a->colorBlendOp == VK_BLEND_OP_MAX) {
|
||||
blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE;
|
||||
blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE;
|
||||
}
|
||||
if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
|
||||
a->alphaBlendOp == VK_BLEND_OP_MAX) {
|
||||
blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE;
|
||||
blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
|
||||
}
|
||||
}
|
||||
|
||||
struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0];
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND),
|
||||
.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable,
|
||||
.HasWriteableRT = has_writeable_rt,
|
||||
.ColorBufferBlendEnable = bs0->ColorBufferBlendEnable,
|
||||
.SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor,
|
||||
.DestinationAlphaBlendFactor =
|
||||
bs0->DestinationAlphaBlendFactor,
|
||||
.SourceBlendFactor = bs0->SourceBlendFactor,
|
||||
.DestinationBlendFactor = bs0->DestinationBlendFactor,
|
||||
.AlphaTestEnable = false,
|
||||
.IndependentAlphaBlendEnable =
|
||||
blend_state.IndependentAlphaBlendEnable);
|
||||
|
||||
GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
|
||||
if (!device->info.has_llc)
|
||||
anv_state_clflush(pipeline->blend_state);
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS),
|
||||
.BlendStatePointer = pipeline->blend_state.offset,
|
||||
.BlendStatePointerValid = true);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_ds_state(struct anv_pipeline *pipeline,
|
||||
const VkPipelineDepthStencilStateCreateInfo *info)
|
||||
{
|
||||
uint32_t *dw = GEN_GEN == 8 ?
|
||||
pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil;
|
||||
|
||||
if (info == NULL) {
|
||||
/* We're going to OR this together with the dynamic state. We need
|
||||
* to make sure it's initialized to something useful.
|
||||
*/
|
||||
memset(pipeline->gen8.wm_depth_stencil, 0,
|
||||
sizeof(pipeline->gen8.wm_depth_stencil));
|
||||
memset(pipeline->gen9.wm_depth_stencil, 0,
|
||||
sizeof(pipeline->gen9.wm_depth_stencil));
|
||||
return;
|
||||
}
|
||||
|
||||
/* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
|
||||
|
||||
struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = {
|
||||
.DepthTestEnable = info->depthTestEnable,
|
||||
.DepthBufferWriteEnable = info->depthWriteEnable,
|
||||
.DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp],
|
||||
.DoubleSidedStencilEnable = true,
|
||||
|
||||
.StencilTestEnable = info->stencilTestEnable,
|
||||
.StencilBufferWriteEnable = info->stencilTestEnable,
|
||||
.StencilFailOp = vk_to_gen_stencil_op[info->front.failOp],
|
||||
.StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp],
|
||||
.StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp],
|
||||
.StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp],
|
||||
.BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp],
|
||||
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp],
|
||||
.BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp],
|
||||
.BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp],
|
||||
};
|
||||
|
||||
/* From the Broadwell PRM:
|
||||
*
|
||||
* "If Depth_Test_Enable = 1 AND Depth_Test_func = EQUAL, the
|
||||
* Depth_Write_Enable must be set to 0."
|
||||
*/
|
||||
if (info->depthTestEnable && info->depthCompareOp == VK_COMPARE_OP_EQUAL)
|
||||
wm_depth_stencil.DepthBufferWriteEnable = false;
|
||||
|
||||
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_ms_state(struct anv_pipeline *pipeline,
|
||||
const VkPipelineMultisampleStateCreateInfo *info)
|
||||
{
|
||||
uint32_t samples = 1;
|
||||
uint32_t log2_samples = 0;
|
||||
|
||||
/* From the Vulkan 1.0 spec:
|
||||
* If pSampleMask is NULL, it is treated as if the mask has all bits
|
||||
* enabled, i.e. no coverage is removed from fragments.
|
||||
*
|
||||
* 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
|
||||
*/
|
||||
uint32_t sample_mask = 0xffff;
|
||||
|
||||
if (info) {
|
||||
samples = info->rasterizationSamples;
|
||||
log2_samples = __builtin_ffs(samples) - 1;
|
||||
}
|
||||
|
||||
if (info && info->pSampleMask)
|
||||
sample_mask &= info->pSampleMask[0];
|
||||
|
||||
if (info && info->sampleShadingEnable)
|
||||
anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable");
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE),
|
||||
|
||||
/* The PRM says that this bit is valid only for DX9:
|
||||
*
|
||||
* SW can choose to set this bit only for DX9 API. DX10/OGL API's
|
||||
* should not have any effect by setting or not setting this bit.
|
||||
*/
|
||||
.PixelPositionOffsetEnable = false,
|
||||
|
||||
.PixelLocation = CENTER,
|
||||
.NumberofMultisamples = log2_samples);
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK),
|
||||
.SampleMask = sample_mask);
|
||||
}
|
||||
|
||||
VkResult
|
||||
genX(graphics_pipeline_create)(
|
||||
VkDevice _device,
|
||||
struct anv_pipeline_cache * cache,
|
||||
const VkGraphicsPipelineCreateInfo* pCreateInfo,
|
||||
const struct anv_graphics_pipeline_create_info *extra,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkPipeline* pPipeline)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_pipeline *pipeline;
|
||||
VkResult result;
|
||||
uint32_t offset, length;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
|
||||
|
||||
pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (pipeline == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = anv_pipeline_init(pipeline, device, cache,
|
||||
pCreateInfo, extra, pAllocator);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_free2(&device->alloc, pAllocator, pipeline);
|
||||
return result;
|
||||
}
|
||||
|
||||
assert(pCreateInfo->pVertexInputState);
|
||||
emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra);
|
||||
assert(pCreateInfo->pInputAssemblyState);
|
||||
emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra);
|
||||
assert(pCreateInfo->pRasterizationState);
|
||||
emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
|
||||
pCreateInfo->pMultisampleState, extra);
|
||||
emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
|
||||
emit_ds_state(pipeline, pCreateInfo->pDepthStencilState);
|
||||
emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
|
||||
pCreateInfo->pMultisampleState);
|
||||
|
||||
emit_urb_setup(pipeline);
|
||||
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP),
|
||||
.ClipEnable = !(extra && extra->use_rectlist),
|
||||
.EarlyCullEnable = true,
|
||||
.APIMode = 1, /* D3D */
|
||||
.ViewportXYClipTestEnable = true,
|
||||
|
||||
.ClipMode =
|
||||
pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
|
||||
REJECT_ALL : NORMAL,
|
||||
|
||||
.NonPerspectiveBarycentricEnable = wm_prog_data ?
|
||||
(wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0,
|
||||
|
||||
.TriangleStripListProvokingVertexSelect = 0,
|
||||
.LineStripListProvokingVertexSelect = 0,
|
||||
.TriangleFanProvokingVertexSelect = 1,
|
||||
|
||||
.MinimumPointWidth = 0.125,
|
||||
.MaximumPointWidth = 255.875,
|
||||
.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1);
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM),
|
||||
.StatisticsEnable = true,
|
||||
.LineEndCapAntialiasingRegionWidth = _05pixels,
|
||||
.LineAntialiasingRegionWidth = _10pixels,
|
||||
.EarlyDepthStencilControl = NORMAL,
|
||||
.ForceThreadDispatchEnable = NORMAL,
|
||||
.PointRasterizationRule = RASTRULE_UPPER_RIGHT,
|
||||
.BarycentricInterpolationMode =
|
||||
pipeline->ps_ksp0 == NO_KERNEL ?
|
||||
0 : wm_prog_data->barycentric_interp_modes);
|
||||
|
||||
if (pipeline->gs_kernel == NO_KERNEL) {
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false);
|
||||
} else {
|
||||
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
|
||||
offset = 1;
|
||||
length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS),
|
||||
.SingleProgramFlow = false,
|
||||
.KernelStartPointer = pipeline->gs_kernel,
|
||||
.VectorMaskEnable = false,
|
||||
.SamplerCount = 0,
|
||||
.BindingTableEntryCount = 0,
|
||||
.ExpectedVertexCount = gs_prog_data->vertices_in,
|
||||
|
||||
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
|
||||
.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base),
|
||||
|
||||
.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1,
|
||||
.OutputTopology = gs_prog_data->output_topology,
|
||||
.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length,
|
||||
.IncludeVertexHandles = gs_prog_data->base.include_vue_handles,
|
||||
.DispatchGRFStartRegisterForURBData =
|
||||
gs_prog_data->base.base.dispatch_grf_start_reg,
|
||||
|
||||
.MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1,
|
||||
.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords,
|
||||
.DispatchMode = gs_prog_data->base.dispatch_mode,
|
||||
.StatisticsEnable = true,
|
||||
.IncludePrimitiveID = gs_prog_data->include_primitive_id,
|
||||
.ReorderMode = TRAILING,
|
||||
.Enable = true,
|
||||
|
||||
.ControlDataFormat = gs_prog_data->control_data_format,
|
||||
|
||||
.StaticOutput = gs_prog_data->static_vertex_count >= 0,
|
||||
.StaticOutputVertexCount =
|
||||
gs_prog_data->static_vertex_count >= 0 ?
|
||||
gs_prog_data->static_vertex_count : 0,
|
||||
|
||||
/* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
|
||||
* UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
|
||||
* UserClipDistanceCullTestEnableBitmask(v)
|
||||
*/
|
||||
|
||||
.VertexURBEntryOutputReadOffset = offset,
|
||||
.VertexURBEntryOutputLength = length);
|
||||
}
|
||||
|
||||
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
||||
/* Skip the VUE header and position slots */
|
||||
offset = 1;
|
||||
length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
|
||||
|
||||
uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 :
|
||||
pipeline->vs_vec4;
|
||||
|
||||
if (vs_start == NO_KERNEL || (extra && extra->disable_vs))
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS),
|
||||
.FunctionEnable = false,
|
||||
/* Even if VS is disabled, SBE still gets the amount of
|
||||
* vertex data to read from this field. */
|
||||
.VertexURBEntryOutputReadOffset = offset,
|
||||
.VertexURBEntryOutputLength = length);
|
||||
else
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS),
|
||||
.KernelStartPointer = vs_start,
|
||||
.SingleVertexDispatch = false,
|
||||
.VectorMaskEnable = false,
|
||||
.SamplerCount = 0,
|
||||
.BindingTableEntryCount =
|
||||
vs_prog_data->base.base.binding_table.size_bytes / 4,
|
||||
.ThreadDispatchPriority = false,
|
||||
.FloatingPointMode = IEEE754,
|
||||
.IllegalOpcodeExceptionEnable = false,
|
||||
.AccessesUAV = false,
|
||||
.SoftwareExceptionEnable = false,
|
||||
|
||||
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX],
|
||||
.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base),
|
||||
|
||||
.DispatchGRFStartRegisterForURBData =
|
||||
vs_prog_data->base.base.dispatch_grf_start_reg,
|
||||
.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length,
|
||||
.VertexURBEntryReadOffset = 0,
|
||||
|
||||
.MaximumNumberofThreads = device->info.max_vs_threads - 1,
|
||||
.StatisticsEnable = false,
|
||||
.SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL,
|
||||
.VertexCacheDisable = false,
|
||||
.FunctionEnable = true,
|
||||
|
||||
.VertexURBEntryOutputReadOffset = offset,
|
||||
.VertexURBEntryOutputLength = length,
|
||||
.UserClipDistanceClipTestEnableBitmask = 0,
|
||||
.UserClipDistanceCullTestEnableBitmask = 0);
|
||||
|
||||
const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
|
||||
if (pipeline->ps_ksp0 == NO_KERNEL) {
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS));
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
|
||||
.PixelShaderValid = false);
|
||||
} else {
|
||||
emit_3dstate_sbe(pipeline);
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
|
||||
.KernelStartPointer0 = pipeline->ps_ksp0,
|
||||
|
||||
.SingleProgramFlow = false,
|
||||
.VectorMaskEnable = true,
|
||||
.SamplerCount = 1,
|
||||
|
||||
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
|
||||
.PerThreadScratchSpace = scratch_space(&wm_prog_data->base),
|
||||
|
||||
.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias,
|
||||
.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
||||
POSOFFSET_SAMPLE: POSOFFSET_NONE,
|
||||
.PushConstantEnable = wm_prog_data->base.nr_params > 0,
|
||||
._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL,
|
||||
._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL,
|
||||
._32PixelDispatchEnable = false,
|
||||
|
||||
.DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0,
|
||||
.DispatchGRFStartRegisterForConstantSetupData1 = 0,
|
||||
.DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2,
|
||||
|
||||
.KernelStartPointer1 = 0,
|
||||
.KernelStartPointer2 = pipeline->ps_ksp2);
|
||||
|
||||
bool per_sample_ps = pCreateInfo->pMultisampleState &&
|
||||
pCreateInfo->pMultisampleState->sampleShadingEnable;
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
|
||||
.PixelShaderValid = true,
|
||||
.PixelShaderKillsPixel = wm_prog_data->uses_kill,
|
||||
.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode,
|
||||
.AttributeEnable = wm_prog_data->num_varying_inputs > 0,
|
||||
.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask,
|
||||
.PixelShaderIsPerSample = per_sample_ps,
|
||||
.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth,
|
||||
.PixelShaderUsesSourceW = wm_prog_data->uses_src_w,
|
||||
#if GEN_GEN >= 9
|
||||
.PixelShaderPullsBary = wm_prog_data->pulls_bary,
|
||||
.InputCoverageMaskState = wm_prog_data->uses_sample_mask ?
|
||||
ICMS_INNER_CONSERVATIVE : ICMS_NONE,
|
||||
#else
|
||||
.PixelShaderUsesInputCoverageMask =
|
||||
wm_prog_data->uses_sample_mask,
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
*pPipeline = anv_pipeline_to_handle(pipeline);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
1304
src/intel/vulkan/genX_cmd_buffer.c
Normal file
1304
src/intel/vulkan/genX_cmd_buffer.c
Normal file
File diff suppressed because it is too large
Load diff
129
src/intel/vulkan/genX_pipeline.c
Normal file
129
src/intel/vulkan/genX_pipeline.c
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
VkResult
|
||||
genX(compute_pipeline_create)(
|
||||
VkDevice _device,
|
||||
struct anv_pipeline_cache * cache,
|
||||
const VkComputePipelineCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkPipeline* pPipeline)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_pipeline *pipeline;
|
||||
VkResult result;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
|
||||
|
||||
pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (pipeline == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pipeline->device = device;
|
||||
pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
|
||||
|
||||
pipeline->blend_state.map = NULL;
|
||||
|
||||
result = anv_reloc_list_init(&pipeline->batch_relocs,
|
||||
pAllocator ? pAllocator : &device->alloc);
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_free2(&device->alloc, pAllocator, pipeline);
|
||||
return result;
|
||||
}
|
||||
pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
|
||||
pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
|
||||
pipeline->batch.relocs = &pipeline->batch_relocs;
|
||||
|
||||
/* When we free the pipeline, we detect stages based on the NULL status
|
||||
* of various prog_data pointers. Make them NULL by default.
|
||||
*/
|
||||
memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
|
||||
memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
|
||||
memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
|
||||
|
||||
pipeline->vs_simd8 = NO_KERNEL;
|
||||
pipeline->vs_vec4 = NO_KERNEL;
|
||||
pipeline->gs_kernel = NO_KERNEL;
|
||||
|
||||
pipeline->active_stages = 0;
|
||||
pipeline->total_scratch = 0;
|
||||
|
||||
assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module);
|
||||
anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
|
||||
pCreateInfo->stage.pName,
|
||||
pCreateInfo->stage.pSpecializationInfo);
|
||||
|
||||
pipeline->use_repclear = false;
|
||||
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
|
||||
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
|
||||
unsigned push_constant_data_size =
|
||||
(prog_data->nr_params + local_id_dwords) * 4;
|
||||
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
|
||||
unsigned push_constant_regs = reg_aligned_constant_size / 32;
|
||||
|
||||
uint32_t group_size = cs_prog_data->local_size[0] *
|
||||
cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
|
||||
pipeline->cs_thread_width_max =
|
||||
DIV_ROUND_UP(group_size, cs_prog_data->simd_size);
|
||||
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
|
||||
|
||||
if (remainder > 0)
|
||||
pipeline->cs_right_mask = ~0u >> (32 - remainder);
|
||||
else
|
||||
pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
|
||||
|
||||
const uint32_t vfe_curbe_allocation =
|
||||
push_constant_regs * pipeline->cs_thread_width_max;
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE),
|
||||
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE],
|
||||
.PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048),
|
||||
#if GEN_GEN > 7
|
||||
.ScratchSpaceBasePointerHigh = 0,
|
||||
.StackSize = 0,
|
||||
#else
|
||||
.GPGPUMode = true,
|
||||
#endif
|
||||
.MaximumNumberofThreads = device->info.max_cs_threads - 1,
|
||||
.NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2,
|
||||
.ResetGatewayTimer = true,
|
||||
#if GEN_GEN <= 8
|
||||
.BypassGatewayControl = true,
|
||||
#endif
|
||||
.URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2,
|
||||
.CURBEAllocationSize = vfe_curbe_allocation);
|
||||
|
||||
*pPipeline = anv_pipeline_to_handle(pipeline);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
433
src/intel/vulkan/genX_pipeline_util.h
Normal file
433
src/intel/vulkan/genX_pipeline_util.h
Normal file
|
|
@ -0,0 +1,433 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
static uint32_t
|
||||
vertex_element_comp_control(enum isl_format format, unsigned comp)
|
||||
{
|
||||
uint8_t bits;
|
||||
switch (comp) {
|
||||
case 0: bits = isl_format_layouts[format].channels.r.bits; break;
|
||||
case 1: bits = isl_format_layouts[format].channels.g.bits; break;
|
||||
case 2: bits = isl_format_layouts[format].channels.b.bits; break;
|
||||
case 3: bits = isl_format_layouts[format].channels.a.bits; break;
|
||||
default: unreachable("Invalid component");
|
||||
}
|
||||
|
||||
if (bits) {
|
||||
return VFCOMP_STORE_SRC;
|
||||
} else if (comp < 3) {
|
||||
return VFCOMP_STORE_0;
|
||||
} else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
|
||||
isl_format_layouts[format].channels.r.type == ISL_SINT) {
|
||||
assert(comp == 3);
|
||||
return VFCOMP_STORE_1_INT;
|
||||
} else {
|
||||
assert(comp == 3);
|
||||
return VFCOMP_STORE_1_FP;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_vertex_input(struct anv_pipeline *pipeline,
|
||||
const VkPipelineVertexInputStateCreateInfo *info,
|
||||
const struct anv_graphics_pipeline_create_info *extra)
|
||||
{
|
||||
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
||||
|
||||
uint32_t elements;
|
||||
if (extra && extra->disable_vs) {
|
||||
/* If the VS is disabled, just assume the user knows what they're
|
||||
* doing and apply the layout blindly. This can only come from
|
||||
* meta, so this *should* be safe.
|
||||
*/
|
||||
elements = 0;
|
||||
for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++)
|
||||
elements |= (1 << info->pVertexAttributeDescriptions[i].location);
|
||||
} else {
|
||||
/* Pull inputs_read out of the VS prog data */
|
||||
uint64_t inputs_read = vs_prog_data->inputs_read;
|
||||
assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
|
||||
elements = inputs_read >> VERT_ATTRIB_GENERIC0;
|
||||
}
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
/* On BDW+, we only need to allocate space for base ids. Setting up
|
||||
* the actual vertex and instance id is a separate packet.
|
||||
*/
|
||||
const bool needs_svgs_elem = vs_prog_data->uses_basevertex ||
|
||||
vs_prog_data->uses_baseinstance;
|
||||
#else
|
||||
/* On Haswell and prior, vertex and instance id are created by using the
|
||||
* ComponentControl fields, so we need an element for any of them.
|
||||
*/
|
||||
const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
|
||||
vs_prog_data->uses_instanceid ||
|
||||
vs_prog_data->uses_basevertex ||
|
||||
vs_prog_data->uses_baseinstance;
|
||||
#endif
|
||||
|
||||
uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem;
|
||||
if (elem_count == 0)
|
||||
return;
|
||||
|
||||
uint32_t *p;
|
||||
|
||||
const uint32_t num_dwords = 1 + elem_count * 2;
|
||||
p = anv_batch_emitn(&pipeline->batch, num_dwords,
|
||||
GENX(3DSTATE_VERTEX_ELEMENTS));
|
||||
memset(p + 1, 0, (num_dwords - 1) * 4);
|
||||
|
||||
for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
|
||||
const VkVertexInputAttributeDescription *desc =
|
||||
&info->pVertexAttributeDescriptions[i];
|
||||
enum isl_format format = anv_get_isl_format(desc->format,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
VK_IMAGE_TILING_LINEAR,
|
||||
NULL);
|
||||
|
||||
assert(desc->binding < 32);
|
||||
|
||||
if ((elements & (1 << desc->location)) == 0)
|
||||
continue; /* Binding unused */
|
||||
|
||||
uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1));
|
||||
|
||||
struct GENX(VERTEX_ELEMENT_STATE) element = {
|
||||
.VertexBufferIndex = desc->binding,
|
||||
.Valid = true,
|
||||
.SourceElementFormat = format,
|
||||
.EdgeFlagEnable = false,
|
||||
.SourceElementOffset = desc->offset,
|
||||
.Component0Control = vertex_element_comp_control(format, 0),
|
||||
.Component1Control = vertex_element_comp_control(format, 1),
|
||||
.Component2Control = vertex_element_comp_control(format, 2),
|
||||
.Component3Control = vertex_element_comp_control(format, 3),
|
||||
};
|
||||
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
/* On Broadwell and later, we have a separate VF_INSTANCING packet
|
||||
* that controls instancing. On Haswell and prior, that's part of
|
||||
* VERTEX_BUFFER_STATE which we emit later.
|
||||
*/
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING),
|
||||
.InstancingEnable = pipeline->instancing_enable[desc->binding],
|
||||
.VertexElementIndex = slot,
|
||||
/* Vulkan so far doesn't have an instance divisor, so
|
||||
* this is always 1 (ignored if not instancing). */
|
||||
.InstanceDataStepRate = 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
const uint32_t id_slot = __builtin_popcount(elements);
|
||||
if (needs_svgs_elem) {
|
||||
/* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
|
||||
* "Within a VERTEX_ELEMENT_STATE structure, if a Component
|
||||
* Control field is set to something other than VFCOMP_STORE_SRC,
|
||||
* no higher-numbered Component Control fields may be set to
|
||||
* VFCOMP_STORE_SRC"
|
||||
*
|
||||
* This means, that if we have BaseInstance, we need BaseVertex as
|
||||
* well. Just do all or nothing.
|
||||
*/
|
||||
uint32_t base_ctrl = (vs_prog_data->uses_basevertex ||
|
||||
vs_prog_data->uses_baseinstance) ?
|
||||
VFCOMP_STORE_SRC : VFCOMP_STORE_0;
|
||||
|
||||
struct GENX(VERTEX_ELEMENT_STATE) element = {
|
||||
.VertexBufferIndex = 32, /* Reserved for this */
|
||||
.Valid = true,
|
||||
.SourceElementFormat = ISL_FORMAT_R32G32_UINT,
|
||||
.Component0Control = base_ctrl,
|
||||
.Component1Control = base_ctrl,
|
||||
#if GEN_GEN >= 8
|
||||
.Component2Control = VFCOMP_STORE_0,
|
||||
.Component3Control = VFCOMP_STORE_0,
|
||||
#else
|
||||
.Component2Control = VFCOMP_STORE_VID,
|
||||
.Component3Control = VFCOMP_STORE_IID,
|
||||
#endif
|
||||
};
|
||||
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
|
||||
}
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS),
|
||||
.VertexIDEnable = vs_prog_data->uses_vertexid,
|
||||
.VertexIDComponentNumber = 2,
|
||||
.VertexIDElementOffset = id_slot,
|
||||
.InstanceIDEnable = vs_prog_data->uses_instanceid,
|
||||
.InstanceIDComponentNumber = 3,
|
||||
.InstanceIDElementOffset = id_slot);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void
|
||||
emit_urb_setup(struct anv_pipeline *pipeline)
|
||||
{
|
||||
#if GEN_GEN == 7 && !GEN_IS_HASWELL
|
||||
struct anv_device *device = pipeline->device;
|
||||
|
||||
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
|
||||
*
|
||||
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
|
||||
* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
|
||||
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
|
||||
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
|
||||
* needs to be sent before any combination of VS associated 3DSTATE."
|
||||
*/
|
||||
anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL,
|
||||
.DepthStallEnable = true,
|
||||
.PostSyncOperation = WriteImmediateData,
|
||||
.Address = { &device->workaround_bo, 0 });
|
||||
#endif
|
||||
|
||||
unsigned push_start = 0;
|
||||
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
|
||||
unsigned push_size = pipeline->urb.push_size[i];
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS),
|
||||
._3DCommandSubOpcode = 18 + i,
|
||||
.ConstantBufferOffset = (push_size > 0) ? push_start : 0,
|
||||
.ConstantBufferSize = push_size);
|
||||
push_start += pipeline->urb.push_size[i];
|
||||
}
|
||||
|
||||
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS),
|
||||
._3DCommandSubOpcode = 48 + i,
|
||||
.VSURBStartingAddress = pipeline->urb.start[i],
|
||||
.VSURBEntryAllocationSize = pipeline->urb.size[i] - 1,
|
||||
.VSNumberofURBEntries = pipeline->urb.entries[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_3dstate_sbe(struct anv_pipeline *pipeline)
|
||||
{
|
||||
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
||||
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
const struct brw_vue_map *fs_input_map;
|
||||
|
||||
if (pipeline->gs_kernel == NO_KERNEL)
|
||||
fs_input_map = &vs_prog_data->base.vue_map;
|
||||
else
|
||||
fs_input_map = &gs_prog_data->base.vue_map;
|
||||
|
||||
struct GENX(3DSTATE_SBE) sbe = {
|
||||
GENX(3DSTATE_SBE_header),
|
||||
.AttributeSwizzleEnable = true,
|
||||
.PointSpriteTextureCoordinateOrigin = UPPERLEFT,
|
||||
.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
|
||||
.ConstantInterpolationEnable = wm_prog_data->flat_inputs,
|
||||
|
||||
#if GEN_GEN >= 9
|
||||
.Attribute0ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute1ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute2ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute3ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute4ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute5ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute6ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute7ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute8ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute9ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute10ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute11ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute12ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute13ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute14ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute15ActiveComponentFormat = ACF_XYZW,
|
||||
/* wow, much field, very attribute */
|
||||
.Attribute16ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute17ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute18ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute19ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute20ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute21ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute22ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute23ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute24ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute25ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute26ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute27ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute28ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute29ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute28ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute29ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute30ActiveComponentFormat = ACF_XYZW,
|
||||
#endif
|
||||
};
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
/* On Broadwell, they broke 3DSTATE_SBE into two packets */
|
||||
struct GENX(3DSTATE_SBE_SWIZ) swiz = {
|
||||
GENX(3DSTATE_SBE_SWIZ_header),
|
||||
};
|
||||
#else
|
||||
# define swiz sbe
|
||||
#endif
|
||||
|
||||
int max_source_attr = 0;
|
||||
for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
|
||||
int input_index = wm_prog_data->urb_setup[attr];
|
||||
|
||||
if (input_index < 0)
|
||||
continue;
|
||||
|
||||
const int slot = fs_input_map->varying_to_slot[attr];
|
||||
|
||||
if (input_index >= 16)
|
||||
continue;
|
||||
|
||||
if (slot == -1) {
|
||||
/* This attribute does not exist in the VUE--that means that the
|
||||
* vertex shader did not write to it. It could be that it's a
|
||||
* regular varying read by the fragment shader but not written by
|
||||
* the vertex shader or it's gl_PrimitiveID. In the first case the
|
||||
* value is undefined, in the second it needs to be
|
||||
* gl_PrimitiveID.
|
||||
*/
|
||||
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
|
||||
swiz.Attribute[input_index].ComponentOverrideX = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideY = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideZ = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideW = true;
|
||||
} else {
|
||||
assert(slot >= 2);
|
||||
const int source_attr = slot - 2;
|
||||
max_source_attr = MAX2(max_source_attr, source_attr);
|
||||
/* We have to subtract two slots to accout for the URB entry output
|
||||
* read offset in the VS and GS stages.
|
||||
*/
|
||||
swiz.Attribute[input_index].SourceAttribute = source_attr;
|
||||
}
|
||||
}
|
||||
|
||||
sbe.VertexURBEntryReadOffset = 1; /* Skip the VUE header and position slots */
|
||||
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
|
||||
|
||||
uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch,
|
||||
GENX(3DSTATE_SBE_length));
|
||||
GENX(3DSTATE_SBE_pack)(&pipeline->batch, dw, &sbe);
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
dw = anv_batch_emit_dwords(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ_length));
|
||||
GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
scratch_space(const struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
return ffs(prog_data->total_scratch / 2048);
|
||||
}
|
||||
|
||||
static const uint32_t vk_to_gen_cullmode[] = {
|
||||
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
|
||||
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
|
||||
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
|
||||
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_fillmode[] = {
|
||||
[VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
|
||||
[VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
|
||||
[VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_front_face[] = {
|
||||
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
|
||||
[VK_FRONT_FACE_CLOCKWISE] = 0
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_logic_op[] = {
|
||||
[VK_LOGIC_OP_COPY] = LOGICOP_COPY,
|
||||
[VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
|
||||
[VK_LOGIC_OP_AND] = LOGICOP_AND,
|
||||
[VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
|
||||
[VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
|
||||
[VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
|
||||
[VK_LOGIC_OP_XOR] = LOGICOP_XOR,
|
||||
[VK_LOGIC_OP_OR] = LOGICOP_OR,
|
||||
[VK_LOGIC_OP_NOR] = LOGICOP_NOR,
|
||||
[VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
|
||||
[VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
|
||||
[VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
|
||||
[VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
|
||||
[VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
|
||||
[VK_LOGIC_OP_NAND] = LOGICOP_NAND,
|
||||
[VK_LOGIC_OP_SET] = LOGICOP_SET,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_blend[] = {
|
||||
[VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
|
||||
[VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
|
||||
[VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
|
||||
[VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
|
||||
[VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
|
||||
[VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
|
||||
[VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
|
||||
[VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
|
||||
[VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
|
||||
[VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
|
||||
[VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
|
||||
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
|
||||
[VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
|
||||
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
|
||||
[VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
|
||||
[VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
|
||||
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
|
||||
[VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
|
||||
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_blend_op[] = {
|
||||
[VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
|
||||
[VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
|
||||
[VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
|
||||
[VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
|
||||
[VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_compare_op[] = {
|
||||
[VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
|
||||
[VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
|
||||
[VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
|
||||
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL,
|
||||
[VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
|
||||
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
|
||||
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL,
|
||||
[VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_stencil_op[] = {
|
||||
[VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
|
||||
[VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
|
||||
[VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
|
||||
[VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
|
||||
[VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
|
||||
[VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
|
||||
[VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
|
||||
[VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
|
||||
};
|
||||
270
src/intel/vulkan/genX_state.c
Normal file
270
src/intel/vulkan/genX_state.c
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "genxml/genX_pack.h"
|
||||
|
||||
VkResult
|
||||
genX(init_device_state)(struct anv_device *device)
|
||||
{
|
||||
GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
|
||||
&GENX(MOCS));
|
||||
|
||||
struct anv_batch batch;
|
||||
|
||||
uint32_t cmds[64];
|
||||
batch.start = batch.next = cmds;
|
||||
batch.end = (void *) cmds + sizeof(cmds);
|
||||
|
||||
anv_batch_emit(&batch, GENX(PIPELINE_SELECT),
|
||||
#if GEN_GEN >= 9
|
||||
.MaskBits = 3,
|
||||
#endif
|
||||
.PipelineSelection = _3D);
|
||||
|
||||
anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS),
|
||||
.StatisticsEnable = true);
|
||||
anv_batch_emit(&batch, GENX(3DSTATE_HS));
|
||||
anv_batch_emit(&batch, GENX(3DSTATE_TE));
|
||||
anv_batch_emit(&batch, GENX(3DSTATE_DS));
|
||||
|
||||
anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false);
|
||||
anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS));
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY),
|
||||
.ChromaKeyKillEnable = false);
|
||||
|
||||
/* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
|
||||
* VkPhysicalDeviceFeatures::standardSampleLocations.
|
||||
*/
|
||||
anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN),
|
||||
._1xSample0XOffset = 0.5,
|
||||
._1xSample0YOffset = 0.5,
|
||||
._2xSample0XOffset = 0.25,
|
||||
._2xSample0YOffset = 0.25,
|
||||
._2xSample1XOffset = 0.75,
|
||||
._2xSample1YOffset = 0.75,
|
||||
._4xSample0XOffset = 0.375,
|
||||
._4xSample0YOffset = 0.125,
|
||||
._4xSample1XOffset = 0.875,
|
||||
._4xSample1YOffset = 0.375,
|
||||
._4xSample2XOffset = 0.125,
|
||||
._4xSample2YOffset = 0.625,
|
||||
._4xSample3XOffset = 0.625,
|
||||
._4xSample3YOffset = 0.875,
|
||||
._8xSample0XOffset = 0.5625,
|
||||
._8xSample0YOffset = 0.3125,
|
||||
._8xSample1XOffset = 0.4375,
|
||||
._8xSample1YOffset = 0.6875,
|
||||
._8xSample2XOffset = 0.8125,
|
||||
._8xSample2YOffset = 0.5625,
|
||||
._8xSample3XOffset = 0.3125,
|
||||
._8xSample3YOffset = 0.1875,
|
||||
._8xSample4XOffset = 0.1875,
|
||||
._8xSample4YOffset = 0.8125,
|
||||
._8xSample5XOffset = 0.0625,
|
||||
._8xSample5YOffset = 0.4375,
|
||||
._8xSample6XOffset = 0.6875,
|
||||
._8xSample6YOffset = 0.9375,
|
||||
._8xSample7XOffset = 0.9375,
|
||||
._8xSample7YOffset = 0.0625,
|
||||
#if GEN_GEN >= 9
|
||||
._16xSample0XOffset = 0.5625,
|
||||
._16xSample0YOffset = 0.5625,
|
||||
._16xSample1XOffset = 0.4375,
|
||||
._16xSample1YOffset = 0.3125,
|
||||
._16xSample2XOffset = 0.3125,
|
||||
._16xSample2YOffset = 0.6250,
|
||||
._16xSample3XOffset = 0.7500,
|
||||
._16xSample3YOffset = 0.4375,
|
||||
._16xSample4XOffset = 0.1875,
|
||||
._16xSample4YOffset = 0.3750,
|
||||
._16xSample5XOffset = 0.6250,
|
||||
._16xSample5YOffset = 0.8125,
|
||||
._16xSample6XOffset = 0.8125,
|
||||
._16xSample6YOffset = 0.6875,
|
||||
._16xSample7XOffset = 0.6875,
|
||||
._16xSample7YOffset = 0.1875,
|
||||
._16xSample8XOffset = 0.3750,
|
||||
._16xSample8YOffset = 0.8750,
|
||||
._16xSample9XOffset = 0.5000,
|
||||
._16xSample9YOffset = 0.0625,
|
||||
._16xSample10XOffset = 0.2500,
|
||||
._16xSample10YOffset = 0.1250,
|
||||
._16xSample11XOffset = 0.1250,
|
||||
._16xSample11YOffset = 0.7500,
|
||||
._16xSample12XOffset = 0.0000,
|
||||
._16xSample12YOffset = 0.5000,
|
||||
._16xSample13XOffset = 0.9375,
|
||||
._16xSample13YOffset = 0.2500,
|
||||
._16xSample14XOffset = 0.8750,
|
||||
._16xSample14YOffset = 0.9375,
|
||||
._16xSample15XOffset = 0.0625,
|
||||
._16xSample15YOffset = 0.0000,
|
||||
#endif
|
||||
);
|
||||
#endif
|
||||
|
||||
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END));
|
||||
|
||||
assert(batch.next <= batch.end);
|
||||
|
||||
return anv_device_submit_simple_batch(device, &batch);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable)
|
||||
{
|
||||
switch (filter) {
|
||||
default:
|
||||
assert(!"Invalid filter");
|
||||
case VK_FILTER_NEAREST:
|
||||
return MAPFILTER_NEAREST;
|
||||
case VK_FILTER_LINEAR:
|
||||
return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
vk_to_gen_max_anisotropy(float ratio)
|
||||
{
|
||||
return (anv_clamp_f(ratio, 2, 16) - 2) / 2;
|
||||
}
|
||||
|
||||
static const uint32_t vk_to_gen_mipmap_mode[] = {
|
||||
[VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
|
||||
[VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_tex_address[] = {
|
||||
[VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP,
|
||||
[VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR,
|
||||
[VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP,
|
||||
[VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,
|
||||
[VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,
|
||||
};
|
||||
|
||||
/* Vulkan specifies the result of shadow comparisons as:
|
||||
* 1 if ref <op> texel,
|
||||
* 0 otherwise.
|
||||
*
|
||||
* The hardware does:
|
||||
* 0 if texel <op> ref,
|
||||
* 1 otherwise.
|
||||
*
|
||||
* So, these look a bit strange because there's both a negation
|
||||
* and swapping of the arguments involved.
|
||||
*/
|
||||
static const uint32_t vk_to_gen_shadow_compare_op[] = {
|
||||
[VK_COMPARE_OP_NEVER] = PREFILTEROPALWAYS,
|
||||
[VK_COMPARE_OP_LESS] = PREFILTEROPLEQUAL,
|
||||
[VK_COMPARE_OP_EQUAL] = PREFILTEROPNOTEQUAL,
|
||||
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLESS,
|
||||
[VK_COMPARE_OP_GREATER] = PREFILTEROPGEQUAL,
|
||||
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPEQUAL,
|
||||
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGREATER,
|
||||
[VK_COMPARE_OP_ALWAYS] = PREFILTEROPNEVER,
|
||||
};
|
||||
|
||||
VkResult genX(CreateSampler)(
|
||||
VkDevice _device,
|
||||
const VkSamplerCreateInfo* pCreateInfo,
|
||||
const VkAllocationCallbacks* pAllocator,
|
||||
VkSampler* pSampler)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_sampler *sampler;
|
||||
|
||||
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
|
||||
|
||||
sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!sampler)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
uint32_t border_color_offset = device->border_colors.offset +
|
||||
pCreateInfo->borderColor * 64;
|
||||
|
||||
struct GENX(SAMPLER_STATE) sampler_state = {
|
||||
.SamplerDisable = false,
|
||||
.TextureBorderColorMode = DX10OGL,
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
.LODPreClampMode = CLAMP_MODE_OGL,
|
||||
#else
|
||||
.LODPreClampEnable = CLAMP_ENABLE_OGL,
|
||||
#endif
|
||||
|
||||
#if GEN_GEN == 8
|
||||
.BaseMipLevel = 0.0,
|
||||
#endif
|
||||
.MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode],
|
||||
.MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter,
|
||||
pCreateInfo->anisotropyEnable),
|
||||
.MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter,
|
||||
pCreateInfo->anisotropyEnable),
|
||||
.TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996),
|
||||
.AnisotropicAlgorithm = EWAApproximation,
|
||||
.MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14),
|
||||
.MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14),
|
||||
.ChromaKeyEnable = 0,
|
||||
.ChromaKeyIndex = 0,
|
||||
.ChromaKeyMode = 0,
|
||||
.ShadowFunction = vk_to_gen_shadow_compare_op[pCreateInfo->compareOp],
|
||||
.CubeSurfaceControlMode = OVERRIDE,
|
||||
|
||||
.BorderColorPointer = border_color_offset,
|
||||
|
||||
#if GEN_GEN >= 8
|
||||
.LODClampMagnificationMode = MIPNONE,
|
||||
#endif
|
||||
|
||||
.MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy),
|
||||
.RAddressMinFilterRoundingEnable = 0,
|
||||
.RAddressMagFilterRoundingEnable = 0,
|
||||
.VAddressMinFilterRoundingEnable = 0,
|
||||
.VAddressMagFilterRoundingEnable = 0,
|
||||
.UAddressMinFilterRoundingEnable = 0,
|
||||
.UAddressMagFilterRoundingEnable = 0,
|
||||
.TrilinearFilterQuality = 0,
|
||||
.NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates,
|
||||
.TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU],
|
||||
.TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV],
|
||||
.TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW],
|
||||
};
|
||||
|
||||
GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state);
|
||||
|
||||
*pSampler = anv_sampler_to_handle(sampler);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
7
src/intel/vulkan/intel_icd.json
Normal file
7
src/intel/vulkan/intel_icd.json
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"file_format_version": "1.0.0",
|
||||
"ICD": {
|
||||
"library_path": "libvulkan_intel.so",
|
||||
"abi_versions": "1.0.3"
|
||||
}
|
||||
}
|
||||
5
src/intel/vulkan/tests/.gitignore
vendored
Normal file
5
src/intel/vulkan/tests/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
block_pool
|
||||
block_pool_no_free
|
||||
state_pool
|
||||
state_pool_free_list_only
|
||||
state_pool_no_free
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue