Merge branch 'vulkan'

This commit is contained in:
Jason Ekstrand 2016-04-15 13:52:34 -07:00
commit cab30cc5f9
106 changed files with 57524 additions and 0 deletions

View file

@ -2621,6 +2621,11 @@ AC_CONFIG_FILES([Makefile
src/glx/apple/Makefile
src/glx/tests/Makefile
src/gtest/Makefile
src/intel/Makefile
src/intel/genxml/Makefile
src/intel/isl/Makefile
src/intel/vulkan/Makefile
src/intel/vulkan/tests/Makefile
src/loader/Makefile
src/mapi/Makefile
src/mapi/es1api/glesv1_cm.pc

85
include/vulkan/vk_icd.h Normal file
View file

@ -0,0 +1,85 @@
#ifndef VKICD_H
#define VKICD_H
#include "vk_platform.h"
/*
* The ICD must reserve space for a pointer for the loader's dispatch
* table, at the start of <each object>.
* The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro.
*/
#define ICD_LOADER_MAGIC 0x01CDC0DE
typedef union _VK_LOADER_DATA {
uintptr_t loaderMagic;
void *loaderData;
} VK_LOADER_DATA;
static inline void set_loader_magic_value(void* pNewObject) {
VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
loader_info->loaderMagic = ICD_LOADER_MAGIC;
}
static inline bool valid_loader_magic_value(void* pNewObject) {
const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC;
}
/*
* Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that
* contains the platform-specific connection and surface information.
*/
typedef enum _VkIcdWsiPlatform {
VK_ICD_WSI_PLATFORM_MIR,
VK_ICD_WSI_PLATFORM_WAYLAND,
VK_ICD_WSI_PLATFORM_WIN32,
VK_ICD_WSI_PLATFORM_XCB,
VK_ICD_WSI_PLATFORM_XLIB,
} VkIcdWsiPlatform;
typedef struct _VkIcdSurfaceBase {
VkIcdWsiPlatform platform;
} VkIcdSurfaceBase;
#ifdef VK_USE_PLATFORM_MIR_KHR
typedef struct _VkIcdSurfaceMir {
VkIcdSurfaceBase base;
MirConnection* connection;
MirSurface* mirSurface;
} VkIcdSurfaceMir;
#endif // VK_USE_PLATFORM_MIR_KHR
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
typedef struct _VkIcdSurfaceWayland {
VkIcdSurfaceBase base;
struct wl_display* display;
struct wl_surface* surface;
} VkIcdSurfaceWayland;
#endif // VK_USE_PLATFORM_WAYLAND_KHR
#ifdef VK_USE_PLATFORM_WIN32_KHR
typedef struct _VkIcdSurfaceWin32 {
VkIcdSurfaceBase base;
HINSTANCE hinstance;
HWND hwnd;
} VkIcdSurfaceWin32;
#endif // VK_USE_PLATFORM_WIN32_KHR
#ifdef VK_USE_PLATFORM_XCB_KHR
typedef struct _VkIcdSurfaceXcb {
VkIcdSurfaceBase base;
xcb_connection_t* connection;
xcb_window_t window;
} VkIcdSurfaceXcb;
#endif // VK_USE_PLATFORM_XCB_KHR
#ifdef VK_USE_PLATFORM_XLIB_KHR
typedef struct _VkIcdSurfaceXlib {
VkIcdSurfaceBase base;
Display* dpy;
Window window;
} VkIcdSurfaceXlib;
#endif // VK_USE_PLATFORM_XLIB_KHR
#endif // VKICD_H

View file

@ -0,0 +1,127 @@
//
// File: vk_platform.h
//
/*
** Copyright (c) 2014-2015 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
#ifndef VK_PLATFORM_H_
#define VK_PLATFORM_H_
#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus
/*
***************************************************************************************************
* Platform-specific directives and type declarations
***************************************************************************************************
*/
/* Platform-specific calling convention macros.
*
* Platforms should define these so that Vulkan clients call Vulkan commands
* with the same calling conventions that the Vulkan implementation expects.
*
* VKAPI_ATTR - Placed before the return type in function declarations.
* Useful for C++11 and GCC/Clang-style function attribute syntax.
* VKAPI_CALL - Placed after the return type in function declarations.
* Useful for MSVC-style calling convention syntax.
* VKAPI_PTR - Placed between the '(' and '*' in function pointer types.
*
* Function declaration: VKAPI_ATTR void VKAPI_CALL vkCommand(void);
* Function pointer type: typedef void (VKAPI_PTR *PFN_vkCommand)(void);
*/
#if defined(_WIN32)
// On Windows, Vulkan commands use the stdcall convention
#define VKAPI_ATTR
#define VKAPI_CALL __stdcall
#define VKAPI_PTR VKAPI_CALL
#elif defined(__ANDROID__) && defined(__ARM_EABI__) && !defined(__ARM_ARCH_7A__)
// Android does not support Vulkan in native code using the "armeabi" ABI.
#error "Vulkan requires the 'armeabi-v7a' or 'armeabi-v7a-hard' ABI on 32-bit ARM CPUs"
#elif defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
// On Android/ARMv7a, Vulkan functions use the armeabi-v7a-hard calling
// convention, even if the application's native code is compiled with the
// armeabi-v7a calling convention.
#define VKAPI_ATTR __attribute__((pcs("aapcs-vfp")))
#define VKAPI_CALL
#define VKAPI_PTR VKAPI_ATTR
#else
// On other platforms, use the default calling convention
#define VKAPI_ATTR
#define VKAPI_CALL
#define VKAPI_PTR
#endif
#include <stddef.h>
#if !defined(VK_NO_STDINT_H)
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef signed __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef signed __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef signed __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
#else
#include <stdint.h>
#endif
#endif // !defined(VK_NO_STDINT_H)
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
// Platform-specific headers required by platform window system extensions.
// These are enabled prior to #including "vulkan.h". The same enable then
// controls inclusion of the extension interfaces in vulkan.h.
#ifdef VK_USE_PLATFORM_ANDROID_KHR
#include <android/native_window.h>
#endif
#ifdef VK_USE_PLATFORM_MIR_KHR
#include <mir_toolkit/client_types.h>
#endif
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
#include <wayland-client.h>
#endif
#ifdef VK_USE_PLATFORM_WIN32_KHR
#include <windows.h>
#endif
#ifdef VK_USE_PLATFORM_XLIB_KHR
#include <X11/Xlib.h>
#endif
#ifdef VK_USE_PLATFORM_XCB_KHR
#include <xcb/xcb.h>
#endif
#endif

3800
include/vulkan/vulkan.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,62 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef __VULKAN_INTEL_H__
#define __VULKAN_INTEL_H__
#include "vulkan.h"
#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus
#define VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL 1024
typedef struct VkDmaBufImageCreateInfo_
{
VkStructureType sType; // Must be VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL
const void* pNext; // Pointer to next structure.
int fd;
VkFormat format;
VkExtent3D extent; // Depth must be 1
uint32_t strideInBytes;
} VkDmaBufImageCreateInfo;
typedef VkResult (VKAPI_PTR *PFN_vkCreateDmaBufImageINTEL)(VkDevice device, const VkDmaBufImageCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDeviceMemory* pMem, VkImage* pImage);
#ifdef VK_PROTOTYPES
VKAPI_ATTR VkResult VKAPI_CALL vkCreateDmaBufImageINTEL(
VkDevice _device,
const VkDmaBufImageCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkDeviceMemory* pMem,
VkImage* pImage);
#endif
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // __VULKAN_INTEL_H__

View file

@ -56,6 +56,10 @@ EXTRA_DIST = \
AM_CFLAGS = $(VISIBILITY_CFLAGS)
AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
if HAVE_INTEL_DRIVERS
SUBDIRS += intel
endif
AM_CPPFLAGS = \
-I$(top_srcdir)/include/ \
-I$(top_srcdir)/src/mapi/ \

View file

@ -33,6 +33,7 @@ nir_libnir_la_LIBADD = \
nir_libnir_la_SOURCES = \
$(NIR_FILES) \
$(SPIRV_FILES) \
$(NIR_GENERATED_FILES)
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

View file

@ -233,3 +233,12 @@ NIR_FILES = \
nir/nir_vla.h \
nir/nir_worklist.c \
nir/nir_worklist.h
SPIRV_FILES = \
spirv/nir_spirv.h \
spirv/spirv_to_nir.c \
spirv/vtn_alu.c \
spirv/vtn_cfg.c \
spirv/vtn_glsl450.c \
spirv/vtn_private.h \
spirv/vtn_variables.c

View file

@ -345,6 +345,9 @@ LOAD(output, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
LOAD(per_vertex_output, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
/* src[] = { offset }. const_index[] = { base } */
LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
/* src[] = { offset }. const_index[] = { base, range } */
LOAD(push_constant, 1, 2, BASE, RANGE, xx,
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/*
* Stores work the same way as loads, except now the first source is the value

View file

@ -0,0 +1,127 @@
/*
** Copyright (c) 2014-2015 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a copy
** of this software and/or associated documentation files (the "Materials"),
** to deal in the Materials without restriction, including without limitation
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
** and/or sell copies of the Materials, and to permit persons to whom the
** Materials are furnished to do so, subject to the following conditions:
**
** The above copyright notice and this permission notice shall be included in
** all copies or substantial portions of the Materials.
**
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
** IN THE MATERIALS.
*/
#ifndef GLSLstd450_H
#define GLSLstd450_H
const int GLSLstd450Version = 99;
const int GLSLstd450Revision = 3;
enum GLSLstd450 {
GLSLstd450Bad = 0, // Don't use
GLSLstd450Round = 1,
GLSLstd450RoundEven = 2,
GLSLstd450Trunc = 3,
GLSLstd450FAbs = 4,
GLSLstd450SAbs = 5,
GLSLstd450FSign = 6,
GLSLstd450SSign = 7,
GLSLstd450Floor = 8,
GLSLstd450Ceil = 9,
GLSLstd450Fract = 10,
GLSLstd450Radians = 11,
GLSLstd450Degrees = 12,
GLSLstd450Sin = 13,
GLSLstd450Cos = 14,
GLSLstd450Tan = 15,
GLSLstd450Asin = 16,
GLSLstd450Acos = 17,
GLSLstd450Atan = 18,
GLSLstd450Sinh = 19,
GLSLstd450Cosh = 20,
GLSLstd450Tanh = 21,
GLSLstd450Asinh = 22,
GLSLstd450Acosh = 23,
GLSLstd450Atanh = 24,
GLSLstd450Atan2 = 25,
GLSLstd450Pow = 26,
GLSLstd450Exp = 27,
GLSLstd450Log = 28,
GLSLstd450Exp2 = 29,
GLSLstd450Log2 = 30,
GLSLstd450Sqrt = 31,
GLSLstd450InverseSqrt = 32,
GLSLstd450Determinant = 33,
GLSLstd450MatrixInverse = 34,
GLSLstd450Modf = 35, // second operand needs an OpVariable to write to
GLSLstd450ModfStruct = 36, // no OpVariable operand
GLSLstd450FMin = 37,
GLSLstd450UMin = 38,
GLSLstd450SMin = 39,
GLSLstd450FMax = 40,
GLSLstd450UMax = 41,
GLSLstd450SMax = 42,
GLSLstd450FClamp = 43,
GLSLstd450UClamp = 44,
GLSLstd450SClamp = 45,
GLSLstd450FMix = 46,
GLSLstd450IMix = 47,
GLSLstd450Step = 48,
GLSLstd450SmoothStep = 49,
GLSLstd450Fma = 50,
GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to
GLSLstd450FrexpStruct = 52, // no OpVariable operand
GLSLstd450Ldexp = 53,
GLSLstd450PackSnorm4x8 = 54,
GLSLstd450PackUnorm4x8 = 55,
GLSLstd450PackSnorm2x16 = 56,
GLSLstd450PackUnorm2x16 = 57,
GLSLstd450PackHalf2x16 = 58,
GLSLstd450PackDouble2x32 = 59,
GLSLstd450UnpackSnorm2x16 = 60,
GLSLstd450UnpackUnorm2x16 = 61,
GLSLstd450UnpackHalf2x16 = 62,
GLSLstd450UnpackSnorm4x8 = 63,
GLSLstd450UnpackUnorm4x8 = 64,
GLSLstd450UnpackDouble2x32 = 65,
GLSLstd450Length = 66,
GLSLstd450Distance = 67,
GLSLstd450Cross = 68,
GLSLstd450Normalize = 69,
GLSLstd450FaceForward = 70,
GLSLstd450Reflect = 71,
GLSLstd450Refract = 72,
GLSLstd450FindILsb = 73,
GLSLstd450FindSMsb = 74,
GLSLstd450FindUMsb = 75,
GLSLstd450InterpolateAtCentroid = 76,
GLSLstd450InterpolateAtSample = 77,
GLSLstd450InterpolateAtOffset = 78,
GLSLstd450Count
};
#endif // #ifndef GLSLstd450_H

View file

@ -0,0 +1,54 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Jason Ekstrand (jason@jlekstrand.net)
*
*/
#pragma once
#ifndef _NIR_SPIRV_H_
#define _NIR_SPIRV_H_
#include "nir/nir.h"
#ifdef __cplusplus
extern "C" {
#endif
struct nir_spirv_specialization {
uint32_t id;
uint32_t data;
};
nir_function *spirv_to_nir(const uint32_t *words, size_t word_count,
struct nir_spirv_specialization *specializations,
unsigned num_specializations,
gl_shader_stage stage, const char *entry_point_name,
const nir_shader_compiler_options *options);
#ifdef __cplusplus
}
#endif
#endif /* _NIR_SPIRV_H_ */

870
src/compiler/spirv/spirv.h Normal file
View file

@ -0,0 +1,870 @@
/*
** Copyright (c) 2014-2015 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a copy
** of this software and/or associated documentation files (the "Materials"),
** to deal in the Materials without restriction, including without limitation
** the rights to use, copy, modify, merge, publish, distribute, sublicense,
** and/or sell copies of the Materials, and to permit persons to whom the
** Materials are furnished to do so, subject to the following conditions:
**
** The above copyright notice and this permission notice shall be included in
** all copies or substantial portions of the Materials.
**
** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
** IN THE MATERIALS.
*/
/*
** This header is automatically generated by the same tool that creates
** the Binary Section of the SPIR-V specification.
*/
/*
** Enumeration tokens for SPIR-V, in various styles:
** C, C++, C++11, JSON, Lua, Python
**
** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL
** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL
** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL
** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL
** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL']
**
** Some tokens act like mask values, which can be OR'd together,
** while others are mutually exclusive. The mask-like ones have
** "Mask" in their name, and a parallel enum that has the shift
** amount (1 << x) for each corresponding enumerant.
*/
#ifndef spirv_H
#define spirv_H
typedef unsigned int SpvId;
#define SPV_VERSION 0x10000
#define SPV_REVISION 2
static const unsigned int SpvMagicNumber = 0x07230203;
static const unsigned int SpvVersion = 0x00010000;
static const unsigned int SpvRevision = 2;
static const unsigned int SpvOpCodeMask = 0xffff;
static const unsigned int SpvWordCountShift = 16;
typedef enum SpvSourceLanguage_ {
SpvSourceLanguageUnknown = 0,
SpvSourceLanguageESSL = 1,
SpvSourceLanguageGLSL = 2,
SpvSourceLanguageOpenCL_C = 3,
SpvSourceLanguageOpenCL_CPP = 4,
} SpvSourceLanguage;
typedef enum SpvExecutionModel_ {
SpvExecutionModelVertex = 0,
SpvExecutionModelTessellationControl = 1,
SpvExecutionModelTessellationEvaluation = 2,
SpvExecutionModelGeometry = 3,
SpvExecutionModelFragment = 4,
SpvExecutionModelGLCompute = 5,
SpvExecutionModelKernel = 6,
} SpvExecutionModel;
typedef enum SpvAddressingModel_ {
SpvAddressingModelLogical = 0,
SpvAddressingModelPhysical32 = 1,
SpvAddressingModelPhysical64 = 2,
} SpvAddressingModel;
typedef enum SpvMemoryModel_ {
SpvMemoryModelSimple = 0,
SpvMemoryModelGLSL450 = 1,
SpvMemoryModelOpenCL = 2,
} SpvMemoryModel;
typedef enum SpvExecutionMode_ {
SpvExecutionModeInvocations = 0,
SpvExecutionModeSpacingEqual = 1,
SpvExecutionModeSpacingFractionalEven = 2,
SpvExecutionModeSpacingFractionalOdd = 3,
SpvExecutionModeVertexOrderCw = 4,
SpvExecutionModeVertexOrderCcw = 5,
SpvExecutionModePixelCenterInteger = 6,
SpvExecutionModeOriginUpperLeft = 7,
SpvExecutionModeOriginLowerLeft = 8,
SpvExecutionModeEarlyFragmentTests = 9,
SpvExecutionModePointMode = 10,
SpvExecutionModeXfb = 11,
SpvExecutionModeDepthReplacing = 12,
SpvExecutionModeDepthGreater = 14,
SpvExecutionModeDepthLess = 15,
SpvExecutionModeDepthUnchanged = 16,
SpvExecutionModeLocalSize = 17,
SpvExecutionModeLocalSizeHint = 18,
SpvExecutionModeInputPoints = 19,
SpvExecutionModeInputLines = 20,
SpvExecutionModeInputLinesAdjacency = 21,
SpvExecutionModeTriangles = 22,
SpvExecutionModeInputTrianglesAdjacency = 23,
SpvExecutionModeQuads = 24,
SpvExecutionModeIsolines = 25,
SpvExecutionModeOutputVertices = 26,
SpvExecutionModeOutputPoints = 27,
SpvExecutionModeOutputLineStrip = 28,
SpvExecutionModeOutputTriangleStrip = 29,
SpvExecutionModeVecTypeHint = 30,
SpvExecutionModeContractionOff = 31,
} SpvExecutionMode;
typedef enum SpvStorageClass_ {
SpvStorageClassUniformConstant = 0,
SpvStorageClassInput = 1,
SpvStorageClassUniform = 2,
SpvStorageClassOutput = 3,
SpvStorageClassWorkgroup = 4,
SpvStorageClassCrossWorkgroup = 5,
SpvStorageClassPrivate = 6,
SpvStorageClassFunction = 7,
SpvStorageClassGeneric = 8,
SpvStorageClassPushConstant = 9,
SpvStorageClassAtomicCounter = 10,
SpvStorageClassImage = 11,
} SpvStorageClass;
typedef enum SpvDim_ {
SpvDim1D = 0,
SpvDim2D = 1,
SpvDim3D = 2,
SpvDimCube = 3,
SpvDimRect = 4,
SpvDimBuffer = 5,
SpvDimSubpassData = 6,
} SpvDim;
typedef enum SpvSamplerAddressingMode_ {
SpvSamplerAddressingModeNone = 0,
SpvSamplerAddressingModeClampToEdge = 1,
SpvSamplerAddressingModeClamp = 2,
SpvSamplerAddressingModeRepeat = 3,
SpvSamplerAddressingModeRepeatMirrored = 4,
} SpvSamplerAddressingMode;
typedef enum SpvSamplerFilterMode_ {
SpvSamplerFilterModeNearest = 0,
SpvSamplerFilterModeLinear = 1,
} SpvSamplerFilterMode;
typedef enum SpvImageFormat_ {
SpvImageFormatUnknown = 0,
SpvImageFormatRgba32f = 1,
SpvImageFormatRgba16f = 2,
SpvImageFormatR32f = 3,
SpvImageFormatRgba8 = 4,
SpvImageFormatRgba8Snorm = 5,
SpvImageFormatRg32f = 6,
SpvImageFormatRg16f = 7,
SpvImageFormatR11fG11fB10f = 8,
SpvImageFormatR16f = 9,
SpvImageFormatRgba16 = 10,
SpvImageFormatRgb10A2 = 11,
SpvImageFormatRg16 = 12,
SpvImageFormatRg8 = 13,
SpvImageFormatR16 = 14,
SpvImageFormatR8 = 15,
SpvImageFormatRgba16Snorm = 16,
SpvImageFormatRg16Snorm = 17,
SpvImageFormatRg8Snorm = 18,
SpvImageFormatR16Snorm = 19,
SpvImageFormatR8Snorm = 20,
SpvImageFormatRgba32i = 21,
SpvImageFormatRgba16i = 22,
SpvImageFormatRgba8i = 23,
SpvImageFormatR32i = 24,
SpvImageFormatRg32i = 25,
SpvImageFormatRg16i = 26,
SpvImageFormatRg8i = 27,
SpvImageFormatR16i = 28,
SpvImageFormatR8i = 29,
SpvImageFormatRgba32ui = 30,
SpvImageFormatRgba16ui = 31,
SpvImageFormatRgba8ui = 32,
SpvImageFormatR32ui = 33,
SpvImageFormatRgb10a2ui = 34,
SpvImageFormatRg32ui = 35,
SpvImageFormatRg16ui = 36,
SpvImageFormatRg8ui = 37,
SpvImageFormatR16ui = 38,
SpvImageFormatR8ui = 39,
} SpvImageFormat;
typedef enum SpvImageChannelOrder_ {
SpvImageChannelOrderR = 0,
SpvImageChannelOrderA = 1,
SpvImageChannelOrderRG = 2,
SpvImageChannelOrderRA = 3,
SpvImageChannelOrderRGB = 4,
SpvImageChannelOrderRGBA = 5,
SpvImageChannelOrderBGRA = 6,
SpvImageChannelOrderARGB = 7,
SpvImageChannelOrderIntensity = 8,
SpvImageChannelOrderLuminance = 9,
SpvImageChannelOrderRx = 10,
SpvImageChannelOrderRGx = 11,
SpvImageChannelOrderRGBx = 12,
SpvImageChannelOrderDepth = 13,
SpvImageChannelOrderDepthStencil = 14,
SpvImageChannelOrdersRGB = 15,
SpvImageChannelOrdersRGBx = 16,
SpvImageChannelOrdersRGBA = 17,
SpvImageChannelOrdersBGRA = 18,
} SpvImageChannelOrder;
typedef enum SpvImageChannelDataType_ {
SpvImageChannelDataTypeSnormInt8 = 0,
SpvImageChannelDataTypeSnormInt16 = 1,
SpvImageChannelDataTypeUnormInt8 = 2,
SpvImageChannelDataTypeUnormInt16 = 3,
SpvImageChannelDataTypeUnormShort565 = 4,
SpvImageChannelDataTypeUnormShort555 = 5,
SpvImageChannelDataTypeUnormInt101010 = 6,
SpvImageChannelDataTypeSignedInt8 = 7,
SpvImageChannelDataTypeSignedInt16 = 8,
SpvImageChannelDataTypeSignedInt32 = 9,
SpvImageChannelDataTypeUnsignedInt8 = 10,
SpvImageChannelDataTypeUnsignedInt16 = 11,
SpvImageChannelDataTypeUnsignedInt32 = 12,
SpvImageChannelDataTypeHalfFloat = 13,
SpvImageChannelDataTypeFloat = 14,
SpvImageChannelDataTypeUnormInt24 = 15,
SpvImageChannelDataTypeUnormInt101010_2 = 16,
} SpvImageChannelDataType;
typedef enum SpvImageOperandsShift_ {
SpvImageOperandsBiasShift = 0,
SpvImageOperandsLodShift = 1,
SpvImageOperandsGradShift = 2,
SpvImageOperandsConstOffsetShift = 3,
SpvImageOperandsOffsetShift = 4,
SpvImageOperandsConstOffsetsShift = 5,
SpvImageOperandsSampleShift = 6,
SpvImageOperandsMinLodShift = 7,
} SpvImageOperandsShift;
typedef enum SpvImageOperandsMask_ {
SpvImageOperandsMaskNone = 0,
SpvImageOperandsBiasMask = 0x00000001,
SpvImageOperandsLodMask = 0x00000002,
SpvImageOperandsGradMask = 0x00000004,
SpvImageOperandsConstOffsetMask = 0x00000008,
SpvImageOperandsOffsetMask = 0x00000010,
SpvImageOperandsConstOffsetsMask = 0x00000020,
SpvImageOperandsSampleMask = 0x00000040,
SpvImageOperandsMinLodMask = 0x00000080,
} SpvImageOperandsMask;
typedef enum SpvFPFastMathModeShift_ {
SpvFPFastMathModeNotNaNShift = 0,
SpvFPFastMathModeNotInfShift = 1,
SpvFPFastMathModeNSZShift = 2,
SpvFPFastMathModeAllowRecipShift = 3,
SpvFPFastMathModeFastShift = 4,
} SpvFPFastMathModeShift;
typedef enum SpvFPFastMathModeMask_ {
SpvFPFastMathModeMaskNone = 0,
SpvFPFastMathModeNotNaNMask = 0x00000001,
SpvFPFastMathModeNotInfMask = 0x00000002,
SpvFPFastMathModeNSZMask = 0x00000004,
SpvFPFastMathModeAllowRecipMask = 0x00000008,
SpvFPFastMathModeFastMask = 0x00000010,
} SpvFPFastMathModeMask;
typedef enum SpvFPRoundingMode_ {
SpvFPRoundingModeRTE = 0,
SpvFPRoundingModeRTZ = 1,
SpvFPRoundingModeRTP = 2,
SpvFPRoundingModeRTN = 3,
} SpvFPRoundingMode;
typedef enum SpvLinkageType_ {
SpvLinkageTypeExport = 0,
SpvLinkageTypeImport = 1,
} SpvLinkageType;
typedef enum SpvAccessQualifier_ {
SpvAccessQualifierReadOnly = 0,
SpvAccessQualifierWriteOnly = 1,
SpvAccessQualifierReadWrite = 2,
} SpvAccessQualifier;
typedef enum SpvFunctionParameterAttribute_ {
SpvFunctionParameterAttributeZext = 0,
SpvFunctionParameterAttributeSext = 1,
SpvFunctionParameterAttributeByVal = 2,
SpvFunctionParameterAttributeSret = 3,
SpvFunctionParameterAttributeNoAlias = 4,
SpvFunctionParameterAttributeNoCapture = 5,
SpvFunctionParameterAttributeNoWrite = 6,
SpvFunctionParameterAttributeNoReadWrite = 7,
} SpvFunctionParameterAttribute;
typedef enum SpvDecoration_ {
SpvDecorationRelaxedPrecision = 0,
SpvDecorationSpecId = 1,
SpvDecorationBlock = 2,
SpvDecorationBufferBlock = 3,
SpvDecorationRowMajor = 4,
SpvDecorationColMajor = 5,
SpvDecorationArrayStride = 6,
SpvDecorationMatrixStride = 7,
SpvDecorationGLSLShared = 8,
SpvDecorationGLSLPacked = 9,
SpvDecorationCPacked = 10,
SpvDecorationBuiltIn = 11,
SpvDecorationNoPerspective = 13,
SpvDecorationFlat = 14,
SpvDecorationPatch = 15,
SpvDecorationCentroid = 16,
SpvDecorationSample = 17,
SpvDecorationInvariant = 18,
SpvDecorationRestrict = 19,
SpvDecorationAliased = 20,
SpvDecorationVolatile = 21,
SpvDecorationConstant = 22,
SpvDecorationCoherent = 23,
SpvDecorationNonWritable = 24,
SpvDecorationNonReadable = 25,
SpvDecorationUniform = 26,
SpvDecorationSaturatedConversion = 28,
SpvDecorationStream = 29,
SpvDecorationLocation = 30,
SpvDecorationComponent = 31,
SpvDecorationIndex = 32,
SpvDecorationBinding = 33,
SpvDecorationDescriptorSet = 34,
SpvDecorationOffset = 35,
SpvDecorationXfbBuffer = 36,
SpvDecorationXfbStride = 37,
SpvDecorationFuncParamAttr = 38,
SpvDecorationFPRoundingMode = 39,
SpvDecorationFPFastMathMode = 40,
SpvDecorationLinkageAttributes = 41,
SpvDecorationNoContraction = 42,
SpvDecorationInputAttachmentIndex = 43,
SpvDecorationAlignment = 44,
} SpvDecoration;
typedef enum SpvBuiltIn_ {
SpvBuiltInPosition = 0,
SpvBuiltInPointSize = 1,
SpvBuiltInClipDistance = 3,
SpvBuiltInCullDistance = 4,
SpvBuiltInVertexId = 5,
SpvBuiltInInstanceId = 6,
SpvBuiltInPrimitiveId = 7,
SpvBuiltInInvocationId = 8,
SpvBuiltInLayer = 9,
SpvBuiltInViewportIndex = 10,
SpvBuiltInTessLevelOuter = 11,
SpvBuiltInTessLevelInner = 12,
SpvBuiltInTessCoord = 13,
SpvBuiltInPatchVertices = 14,
SpvBuiltInFragCoord = 15,
SpvBuiltInPointCoord = 16,
SpvBuiltInFrontFacing = 17,
SpvBuiltInSampleId = 18,
SpvBuiltInSamplePosition = 19,
SpvBuiltInSampleMask = 20,
SpvBuiltInFragDepth = 22,
SpvBuiltInHelperInvocation = 23,
SpvBuiltInNumWorkgroups = 24,
SpvBuiltInWorkgroupSize = 25,
SpvBuiltInWorkgroupId = 26,
SpvBuiltInLocalInvocationId = 27,
SpvBuiltInGlobalInvocationId = 28,
SpvBuiltInLocalInvocationIndex = 29,
SpvBuiltInWorkDim = 30,
SpvBuiltInGlobalSize = 31,
SpvBuiltInEnqueuedWorkgroupSize = 32,
SpvBuiltInGlobalOffset = 33,
SpvBuiltInGlobalLinearId = 34,
SpvBuiltInSubgroupSize = 36,
SpvBuiltInSubgroupMaxSize = 37,
SpvBuiltInNumSubgroups = 38,
SpvBuiltInNumEnqueuedSubgroups = 39,
SpvBuiltInSubgroupId = 40,
SpvBuiltInSubgroupLocalInvocationId = 41,
SpvBuiltInVertexIndex = 42,
SpvBuiltInInstanceIndex = 43,
} SpvBuiltIn;
typedef enum SpvSelectionControlShift_ {
SpvSelectionControlFlattenShift = 0,
SpvSelectionControlDontFlattenShift = 1,
} SpvSelectionControlShift;
typedef enum SpvSelectionControlMask_ {
SpvSelectionControlMaskNone = 0,
SpvSelectionControlFlattenMask = 0x00000001,
SpvSelectionControlDontFlattenMask = 0x00000002,
} SpvSelectionControlMask;
typedef enum SpvLoopControlShift_ {
SpvLoopControlUnrollShift = 0,
SpvLoopControlDontUnrollShift = 1,
} SpvLoopControlShift;
typedef enum SpvLoopControlMask_ {
SpvLoopControlMaskNone = 0,
SpvLoopControlUnrollMask = 0x00000001,
SpvLoopControlDontUnrollMask = 0x00000002,
} SpvLoopControlMask;
typedef enum SpvFunctionControlShift_ {
SpvFunctionControlInlineShift = 0,
SpvFunctionControlDontInlineShift = 1,
SpvFunctionControlPureShift = 2,
SpvFunctionControlConstShift = 3,
} SpvFunctionControlShift;
typedef enum SpvFunctionControlMask_ {
SpvFunctionControlMaskNone = 0,
SpvFunctionControlInlineMask = 0x00000001,
SpvFunctionControlDontInlineMask = 0x00000002,
SpvFunctionControlPureMask = 0x00000004,
SpvFunctionControlConstMask = 0x00000008,
} SpvFunctionControlMask;
typedef enum SpvMemorySemanticsShift_ {
SpvMemorySemanticsAcquireShift = 1,
SpvMemorySemanticsReleaseShift = 2,
SpvMemorySemanticsAcquireReleaseShift = 3,
SpvMemorySemanticsSequentiallyConsistentShift = 4,
SpvMemorySemanticsUniformMemoryShift = 6,
SpvMemorySemanticsSubgroupMemoryShift = 7,
SpvMemorySemanticsWorkgroupMemoryShift = 8,
SpvMemorySemanticsCrossWorkgroupMemoryShift = 9,
SpvMemorySemanticsAtomicCounterMemoryShift = 10,
SpvMemorySemanticsImageMemoryShift = 11,
} SpvMemorySemanticsShift;
typedef enum SpvMemorySemanticsMask_ {
SpvMemorySemanticsMaskNone = 0,
SpvMemorySemanticsAcquireMask = 0x00000002,
SpvMemorySemanticsReleaseMask = 0x00000004,
SpvMemorySemanticsAcquireReleaseMask = 0x00000008,
SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010,
SpvMemorySemanticsUniformMemoryMask = 0x00000040,
SpvMemorySemanticsSubgroupMemoryMask = 0x00000080,
SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100,
SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400,
SpvMemorySemanticsImageMemoryMask = 0x00000800,
} SpvMemorySemanticsMask;
typedef enum SpvMemoryAccessShift_ {
SpvMemoryAccessVolatileShift = 0,
SpvMemoryAccessAlignedShift = 1,
SpvMemoryAccessNontemporalShift = 2,
} SpvMemoryAccessShift;
typedef enum SpvMemoryAccessMask_ {
SpvMemoryAccessMaskNone = 0,
SpvMemoryAccessVolatileMask = 0x00000001,
SpvMemoryAccessAlignedMask = 0x00000002,
SpvMemoryAccessNontemporalMask = 0x00000004,
} SpvMemoryAccessMask;
typedef enum SpvScope_ {
SpvScopeCrossDevice = 0,
SpvScopeDevice = 1,
SpvScopeWorkgroup = 2,
SpvScopeSubgroup = 3,
SpvScopeInvocation = 4,
} SpvScope;
typedef enum SpvGroupOperation_ {
SpvGroupOperationReduce = 0,
SpvGroupOperationInclusiveScan = 1,
SpvGroupOperationExclusiveScan = 2,
} SpvGroupOperation;
typedef enum SpvKernelEnqueueFlags_ {
SpvKernelEnqueueFlagsNoWait = 0,
SpvKernelEnqueueFlagsWaitKernel = 1,
SpvKernelEnqueueFlagsWaitWorkGroup = 2,
} SpvKernelEnqueueFlags;
typedef enum SpvKernelProfilingInfoShift_ {
SpvKernelProfilingInfoCmdExecTimeShift = 0,
} SpvKernelProfilingInfoShift;
typedef enum SpvKernelProfilingInfoMask_ {
SpvKernelProfilingInfoMaskNone = 0,
SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001,
} SpvKernelProfilingInfoMask;
typedef enum SpvCapability_ {
SpvCapabilityMatrix = 0,
SpvCapabilityShader = 1,
SpvCapabilityGeometry = 2,
SpvCapabilityTessellation = 3,
SpvCapabilityAddresses = 4,
SpvCapabilityLinkage = 5,
SpvCapabilityKernel = 6,
SpvCapabilityVector16 = 7,
SpvCapabilityFloat16Buffer = 8,
SpvCapabilityFloat16 = 9,
SpvCapabilityFloat64 = 10,
SpvCapabilityInt64 = 11,
SpvCapabilityInt64Atomics = 12,
SpvCapabilityImageBasic = 13,
SpvCapabilityImageReadWrite = 14,
SpvCapabilityImageMipmap = 15,
SpvCapabilityPipes = 17,
SpvCapabilityGroups = 18,
SpvCapabilityDeviceEnqueue = 19,
SpvCapabilityLiteralSampler = 20,
SpvCapabilityAtomicStorage = 21,
SpvCapabilityInt16 = 22,
SpvCapabilityTessellationPointSize = 23,
SpvCapabilityGeometryPointSize = 24,
SpvCapabilityImageGatherExtended = 25,
SpvCapabilityStorageImageMultisample = 27,
SpvCapabilityUniformBufferArrayDynamicIndexing = 28,
SpvCapabilitySampledImageArrayDynamicIndexing = 29,
SpvCapabilityStorageBufferArrayDynamicIndexing = 30,
SpvCapabilityStorageImageArrayDynamicIndexing = 31,
SpvCapabilityClipDistance = 32,
SpvCapabilityCullDistance = 33,
SpvCapabilityImageCubeArray = 34,
SpvCapabilitySampleRateShading = 35,
SpvCapabilityImageRect = 36,
SpvCapabilitySampledRect = 37,
SpvCapabilityGenericPointer = 38,
SpvCapabilityInt8 = 39,
SpvCapabilityInputAttachment = 40,
SpvCapabilitySparseResidency = 41,
SpvCapabilityMinLod = 42,
SpvCapabilitySampled1D = 43,
SpvCapabilityImage1D = 44,
SpvCapabilitySampledCubeArray = 45,
SpvCapabilitySampledBuffer = 46,
SpvCapabilityImageBuffer = 47,
SpvCapabilityImageMSArray = 48,
SpvCapabilityStorageImageExtendedFormats = 49,
SpvCapabilityImageQuery = 50,
SpvCapabilityDerivativeControl = 51,
SpvCapabilityInterpolationFunction = 52,
SpvCapabilityTransformFeedback = 53,
SpvCapabilityGeometryStreams = 54,
SpvCapabilityStorageImageReadWithoutFormat = 55,
SpvCapabilityStorageImageWriteWithoutFormat = 56,
SpvCapabilityMultiViewport = 57,
} SpvCapability;
typedef enum SpvOp_ {
SpvOpNop = 0,
SpvOpUndef = 1,
SpvOpSourceContinued = 2,
SpvOpSource = 3,
SpvOpSourceExtension = 4,
SpvOpName = 5,
SpvOpMemberName = 6,
SpvOpString = 7,
SpvOpLine = 8,
SpvOpExtension = 10,
SpvOpExtInstImport = 11,
SpvOpExtInst = 12,
SpvOpMemoryModel = 14,
SpvOpEntryPoint = 15,
SpvOpExecutionMode = 16,
SpvOpCapability = 17,
SpvOpTypeVoid = 19,
SpvOpTypeBool = 20,
SpvOpTypeInt = 21,
SpvOpTypeFloat = 22,
SpvOpTypeVector = 23,
SpvOpTypeMatrix = 24,
SpvOpTypeImage = 25,
SpvOpTypeSampler = 26,
SpvOpTypeSampledImage = 27,
SpvOpTypeArray = 28,
SpvOpTypeRuntimeArray = 29,
SpvOpTypeStruct = 30,
SpvOpTypeOpaque = 31,
SpvOpTypePointer = 32,
SpvOpTypeFunction = 33,
SpvOpTypeEvent = 34,
SpvOpTypeDeviceEvent = 35,
SpvOpTypeReserveId = 36,
SpvOpTypeQueue = 37,
SpvOpTypePipe = 38,
SpvOpTypeForwardPointer = 39,
SpvOpConstantTrue = 41,
SpvOpConstantFalse = 42,
SpvOpConstant = 43,
SpvOpConstantComposite = 44,
SpvOpConstantSampler = 45,
SpvOpConstantNull = 46,
SpvOpSpecConstantTrue = 48,
SpvOpSpecConstantFalse = 49,
SpvOpSpecConstant = 50,
SpvOpSpecConstantComposite = 51,
SpvOpSpecConstantOp = 52,
SpvOpFunction = 54,
SpvOpFunctionParameter = 55,
SpvOpFunctionEnd = 56,
SpvOpFunctionCall = 57,
SpvOpVariable = 59,
SpvOpImageTexelPointer = 60,
SpvOpLoad = 61,
SpvOpStore = 62,
SpvOpCopyMemory = 63,
SpvOpCopyMemorySized = 64,
SpvOpAccessChain = 65,
SpvOpInBoundsAccessChain = 66,
SpvOpPtrAccessChain = 67,
SpvOpArrayLength = 68,
SpvOpGenericPtrMemSemantics = 69,
SpvOpInBoundsPtrAccessChain = 70,
SpvOpDecorate = 71,
SpvOpMemberDecorate = 72,
SpvOpDecorationGroup = 73,
SpvOpGroupDecorate = 74,
SpvOpGroupMemberDecorate = 75,
SpvOpVectorExtractDynamic = 77,
SpvOpVectorInsertDynamic = 78,
SpvOpVectorShuffle = 79,
SpvOpCompositeConstruct = 80,
SpvOpCompositeExtract = 81,
SpvOpCompositeInsert = 82,
SpvOpCopyObject = 83,
SpvOpTranspose = 84,
SpvOpSampledImage = 86,
SpvOpImageSampleImplicitLod = 87,
SpvOpImageSampleExplicitLod = 88,
SpvOpImageSampleDrefImplicitLod = 89,
SpvOpImageSampleDrefExplicitLod = 90,
SpvOpImageSampleProjImplicitLod = 91,
SpvOpImageSampleProjExplicitLod = 92,
SpvOpImageSampleProjDrefImplicitLod = 93,
SpvOpImageSampleProjDrefExplicitLod = 94,
SpvOpImageFetch = 95,
SpvOpImageGather = 96,
SpvOpImageDrefGather = 97,
SpvOpImageRead = 98,
SpvOpImageWrite = 99,
SpvOpImage = 100,
SpvOpImageQueryFormat = 101,
SpvOpImageQueryOrder = 102,
SpvOpImageQuerySizeLod = 103,
SpvOpImageQuerySize = 104,
SpvOpImageQueryLod = 105,
SpvOpImageQueryLevels = 106,
SpvOpImageQuerySamples = 107,
SpvOpConvertFToU = 109,
SpvOpConvertFToS = 110,
SpvOpConvertSToF = 111,
SpvOpConvertUToF = 112,
SpvOpUConvert = 113,
SpvOpSConvert = 114,
SpvOpFConvert = 115,
SpvOpQuantizeToF16 = 116,
SpvOpConvertPtrToU = 117,
SpvOpSatConvertSToU = 118,
SpvOpSatConvertUToS = 119,
SpvOpConvertUToPtr = 120,
SpvOpPtrCastToGeneric = 121,
SpvOpGenericCastToPtr = 122,
SpvOpGenericCastToPtrExplicit = 123,
SpvOpBitcast = 124,
SpvOpSNegate = 126,
SpvOpFNegate = 127,
SpvOpIAdd = 128,
SpvOpFAdd = 129,
SpvOpISub = 130,
SpvOpFSub = 131,
SpvOpIMul = 132,
SpvOpFMul = 133,
SpvOpUDiv = 134,
SpvOpSDiv = 135,
SpvOpFDiv = 136,
SpvOpUMod = 137,
SpvOpSRem = 138,
SpvOpSMod = 139,
SpvOpFRem = 140,
SpvOpFMod = 141,
SpvOpVectorTimesScalar = 142,
SpvOpMatrixTimesScalar = 143,
SpvOpVectorTimesMatrix = 144,
SpvOpMatrixTimesVector = 145,
SpvOpMatrixTimesMatrix = 146,
SpvOpOuterProduct = 147,
SpvOpDot = 148,
SpvOpIAddCarry = 149,
SpvOpISubBorrow = 150,
SpvOpUMulExtended = 151,
SpvOpSMulExtended = 152,
SpvOpAny = 154,
SpvOpAll = 155,
SpvOpIsNan = 156,
SpvOpIsInf = 157,
SpvOpIsFinite = 158,
SpvOpIsNormal = 159,
SpvOpSignBitSet = 160,
SpvOpLessOrGreater = 161,
SpvOpOrdered = 162,
SpvOpUnordered = 163,
SpvOpLogicalEqual = 164,
SpvOpLogicalNotEqual = 165,
SpvOpLogicalOr = 166,
SpvOpLogicalAnd = 167,
SpvOpLogicalNot = 168,
SpvOpSelect = 169,
SpvOpIEqual = 170,
SpvOpINotEqual = 171,
SpvOpUGreaterThan = 172,
SpvOpSGreaterThan = 173,
SpvOpUGreaterThanEqual = 174,
SpvOpSGreaterThanEqual = 175,
SpvOpULessThan = 176,
SpvOpSLessThan = 177,
SpvOpULessThanEqual = 178,
SpvOpSLessThanEqual = 179,
SpvOpFOrdEqual = 180,
SpvOpFUnordEqual = 181,
SpvOpFOrdNotEqual = 182,
SpvOpFUnordNotEqual = 183,
SpvOpFOrdLessThan = 184,
SpvOpFUnordLessThan = 185,
SpvOpFOrdGreaterThan = 186,
SpvOpFUnordGreaterThan = 187,
SpvOpFOrdLessThanEqual = 188,
SpvOpFUnordLessThanEqual = 189,
SpvOpFOrdGreaterThanEqual = 190,
SpvOpFUnordGreaterThanEqual = 191,
SpvOpShiftRightLogical = 194,
SpvOpShiftRightArithmetic = 195,
SpvOpShiftLeftLogical = 196,
SpvOpBitwiseOr = 197,
SpvOpBitwiseXor = 198,
SpvOpBitwiseAnd = 199,
SpvOpNot = 200,
SpvOpBitFieldInsert = 201,
SpvOpBitFieldSExtract = 202,
SpvOpBitFieldUExtract = 203,
SpvOpBitReverse = 204,
SpvOpBitCount = 205,
SpvOpDPdx = 207,
SpvOpDPdy = 208,
SpvOpFwidth = 209,
SpvOpDPdxFine = 210,
SpvOpDPdyFine = 211,
SpvOpFwidthFine = 212,
SpvOpDPdxCoarse = 213,
SpvOpDPdyCoarse = 214,
SpvOpFwidthCoarse = 215,
SpvOpEmitVertex = 218,
SpvOpEndPrimitive = 219,
SpvOpEmitStreamVertex = 220,
SpvOpEndStreamPrimitive = 221,
SpvOpControlBarrier = 224,
SpvOpMemoryBarrier = 225,
SpvOpAtomicLoad = 227,
SpvOpAtomicStore = 228,
SpvOpAtomicExchange = 229,
SpvOpAtomicCompareExchange = 230,
SpvOpAtomicCompareExchangeWeak = 231,
SpvOpAtomicIIncrement = 232,
SpvOpAtomicIDecrement = 233,
SpvOpAtomicIAdd = 234,
SpvOpAtomicISub = 235,
SpvOpAtomicSMin = 236,
SpvOpAtomicUMin = 237,
SpvOpAtomicSMax = 238,
SpvOpAtomicUMax = 239,
SpvOpAtomicAnd = 240,
SpvOpAtomicOr = 241,
SpvOpAtomicXor = 242,
SpvOpPhi = 245,
SpvOpLoopMerge = 246,
SpvOpSelectionMerge = 247,
SpvOpLabel = 248,
SpvOpBranch = 249,
SpvOpBranchConditional = 250,
SpvOpSwitch = 251,
SpvOpKill = 252,
SpvOpReturn = 253,
SpvOpReturnValue = 254,
SpvOpUnreachable = 255,
SpvOpLifetimeStart = 256,
SpvOpLifetimeStop = 257,
SpvOpGroupAsyncCopy = 259,
SpvOpGroupWaitEvents = 260,
SpvOpGroupAll = 261,
SpvOpGroupAny = 262,
SpvOpGroupBroadcast = 263,
SpvOpGroupIAdd = 264,
SpvOpGroupFAdd = 265,
SpvOpGroupFMin = 266,
SpvOpGroupUMin = 267,
SpvOpGroupSMin = 268,
SpvOpGroupFMax = 269,
SpvOpGroupUMax = 270,
SpvOpGroupSMax = 271,
SpvOpReadPipe = 274,
SpvOpWritePipe = 275,
SpvOpReservedReadPipe = 276,
SpvOpReservedWritePipe = 277,
SpvOpReserveReadPipePackets = 278,
SpvOpReserveWritePipePackets = 279,
SpvOpCommitReadPipe = 280,
SpvOpCommitWritePipe = 281,
SpvOpIsValidReserveId = 282,
SpvOpGetNumPipePackets = 283,
SpvOpGetMaxPipePackets = 284,
SpvOpGroupReserveReadPipePackets = 285,
SpvOpGroupReserveWritePipePackets = 286,
SpvOpGroupCommitReadPipe = 287,
SpvOpGroupCommitWritePipe = 288,
SpvOpEnqueueMarker = 291,
SpvOpEnqueueKernel = 292,
SpvOpGetKernelNDrangeSubGroupCount = 293,
SpvOpGetKernelNDrangeMaxSubGroupSize = 294,
SpvOpGetKernelWorkGroupSize = 295,
SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296,
SpvOpRetainEvent = 297,
SpvOpReleaseEvent = 298,
SpvOpCreateUserEvent = 299,
SpvOpIsValidEvent = 300,
SpvOpSetUserEventStatus = 301,
SpvOpCaptureEventProfilingInfo = 302,
SpvOpGetDefaultQueue = 303,
SpvOpBuildNDRange = 304,
SpvOpImageSparseSampleImplicitLod = 305,
SpvOpImageSparseSampleExplicitLod = 306,
SpvOpImageSparseSampleDrefImplicitLod = 307,
SpvOpImageSparseSampleDrefExplicitLod = 308,
SpvOpImageSparseSampleProjImplicitLod = 309,
SpvOpImageSparseSampleProjExplicitLod = 310,
SpvOpImageSparseSampleProjDrefImplicitLod = 311,
SpvOpImageSparseSampleProjDrefExplicitLod = 312,
SpvOpImageSparseFetch = 313,
SpvOpImageSparseGather = 314,
SpvOpImageSparseDrefGather = 315,
SpvOpImageSparseTexelsResident = 316,
SpvOpNoLine = 317,
SpvOpAtomicFlagTestAndSet = 318,
SpvOpAtomicFlagClear = 319,
} SpvOp;
#endif // #ifndef spirv_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,464 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "vtn_private.h"
/*
* Normally, column vectors in SPIR-V correspond to a single NIR SSA
* definition. But for matrix multiplies, we want to do one routine for
* multiplying a matrix by a matrix and then pretend that vectors are matrices
* with one column. So we "wrap" these things, and unwrap the result before we
* send it off.
*/
static struct vtn_ssa_value *
wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val)
{
if (val == NULL)
return NULL;
if (glsl_type_is_matrix(val->type))
return val;
struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value);
dest->type = val->type;
dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1);
dest->elems[0] = val;
return dest;
}
static struct vtn_ssa_value *
unwrap_matrix(struct vtn_ssa_value *val)
{
if (glsl_type_is_matrix(val->type))
return val;
return val->elems[0];
}
static struct vtn_ssa_value *
matrix_multiply(struct vtn_builder *b,
struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1)
{
struct vtn_ssa_value *src0 = wrap_matrix(b, _src0);
struct vtn_ssa_value *src1 = wrap_matrix(b, _src1);
struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed);
struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed);
unsigned src0_rows = glsl_get_vector_elements(src0->type);
unsigned src0_columns = glsl_get_matrix_columns(src0->type);
unsigned src1_columns = glsl_get_matrix_columns(src1->type);
const struct glsl_type *dest_type;
if (src1_columns > 1) {
dest_type = glsl_matrix_type(glsl_get_base_type(src0->type),
src0_rows, src1_columns);
} else {
dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows);
}
struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type);
dest = wrap_matrix(b, dest);
bool transpose_result = false;
if (src0_transpose && src1_transpose) {
/* transpose(A) * transpose(B) = transpose(B * A) */
src1 = src0_transpose;
src0 = src1_transpose;
src0_transpose = NULL;
src1_transpose = NULL;
transpose_result = true;
}
if (src0_transpose && !src1_transpose &&
glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) {
/* We already have the rows of src0 and the columns of src1 available,
* so we can just take the dot product of each row with each column to
* get the result.
*/
for (unsigned i = 0; i < src1_columns; i++) {
nir_ssa_def *vec_src[4];
for (unsigned j = 0; j < src0_rows; j++) {
vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def,
src1->elems[i]->def);
}
dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows);
}
} else {
/* We don't handle the case where src1 is transposed but not src0, since
* the general case only uses individual components of src1 so the
* optimizer should chew through the transpose we emitted for src1.
*/
for (unsigned i = 0; i < src1_columns; i++) {
/* dest[i] = sum(src0[j] * src1[i][j] for all j) */
dest->elems[i]->def =
nir_fmul(&b->nb, src0->elems[0]->def,
nir_channel(&b->nb, src1->elems[i]->def, 0));
for (unsigned j = 1; j < src0_columns; j++) {
dest->elems[i]->def =
nir_fadd(&b->nb, dest->elems[i]->def,
nir_fmul(&b->nb, src0->elems[j]->def,
nir_channel(&b->nb, src1->elems[i]->def, j)));
}
}
}
dest = unwrap_matrix(dest);
if (transpose_result)
dest = vtn_ssa_transpose(b, dest);
return dest;
}
static struct vtn_ssa_value *
mat_times_scalar(struct vtn_builder *b,
struct vtn_ssa_value *mat,
nir_ssa_def *scalar)
{
struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type);
for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) {
if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT)
dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar);
else
dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar);
}
return dest;
}
static void
vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode,
struct vtn_value *dest,
struct vtn_ssa_value *src0, struct vtn_ssa_value *src1)
{
switch (opcode) {
case SpvOpFNegate: {
dest->ssa = vtn_create_ssa_value(b, src0->type);
unsigned cols = glsl_get_matrix_columns(src0->type);
for (unsigned i = 0; i < cols; i++)
dest->ssa->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def);
break;
}
case SpvOpFAdd: {
dest->ssa = vtn_create_ssa_value(b, src0->type);
unsigned cols = glsl_get_matrix_columns(src0->type);
for (unsigned i = 0; i < cols; i++)
dest->ssa->elems[i]->def =
nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
break;
}
case SpvOpFSub: {
dest->ssa = vtn_create_ssa_value(b, src0->type);
unsigned cols = glsl_get_matrix_columns(src0->type);
for (unsigned i = 0; i < cols; i++)
dest->ssa->elems[i]->def =
nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def);
break;
}
case SpvOpTranspose:
dest->ssa = vtn_ssa_transpose(b, src0);
break;
case SpvOpMatrixTimesScalar:
if (src0->transposed) {
dest->ssa = vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed,
src1->def));
} else {
dest->ssa = mat_times_scalar(b, src0, src1->def);
}
break;
case SpvOpVectorTimesMatrix:
case SpvOpMatrixTimesVector:
case SpvOpMatrixTimesMatrix:
if (opcode == SpvOpVectorTimesMatrix) {
dest->ssa = matrix_multiply(b, vtn_ssa_transpose(b, src1), src0);
} else {
dest->ssa = matrix_multiply(b, src0, src1);
}
break;
default: unreachable("unknown matrix opcode");
}
}
nir_op
vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap)
{
/* Indicates that the first two arguments should be swapped. This is
* used for implementing greater-than and less-than-or-equal.
*/
*swap = false;
switch (opcode) {
case SpvOpSNegate: return nir_op_ineg;
case SpvOpFNegate: return nir_op_fneg;
case SpvOpNot: return nir_op_inot;
case SpvOpIAdd: return nir_op_iadd;
case SpvOpFAdd: return nir_op_fadd;
case SpvOpISub: return nir_op_isub;
case SpvOpFSub: return nir_op_fsub;
case SpvOpIMul: return nir_op_imul;
case SpvOpFMul: return nir_op_fmul;
case SpvOpUDiv: return nir_op_udiv;
case SpvOpSDiv: return nir_op_idiv;
case SpvOpFDiv: return nir_op_fdiv;
case SpvOpUMod: return nir_op_umod;
case SpvOpSMod: return nir_op_imod;
case SpvOpFMod: return nir_op_fmod;
case SpvOpSRem: return nir_op_irem;
case SpvOpFRem: return nir_op_frem;
case SpvOpShiftRightLogical: return nir_op_ushr;
case SpvOpShiftRightArithmetic: return nir_op_ishr;
case SpvOpShiftLeftLogical: return nir_op_ishl;
case SpvOpLogicalOr: return nir_op_ior;
case SpvOpLogicalEqual: return nir_op_ieq;
case SpvOpLogicalNotEqual: return nir_op_ine;
case SpvOpLogicalAnd: return nir_op_iand;
case SpvOpLogicalNot: return nir_op_inot;
case SpvOpBitwiseOr: return nir_op_ior;
case SpvOpBitwiseXor: return nir_op_ixor;
case SpvOpBitwiseAnd: return nir_op_iand;
case SpvOpSelect: return nir_op_bcsel;
case SpvOpIEqual: return nir_op_ieq;
case SpvOpBitFieldInsert: return nir_op_bitfield_insert;
case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract;
case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract;
case SpvOpBitReverse: return nir_op_bitfield_reverse;
case SpvOpBitCount: return nir_op_bit_count;
/* Comparisons: (TODO: How do we want to handled ordered/unordered?) */
case SpvOpFOrdEqual: return nir_op_feq;
case SpvOpFUnordEqual: return nir_op_feq;
case SpvOpINotEqual: return nir_op_ine;
case SpvOpFOrdNotEqual: return nir_op_fne;
case SpvOpFUnordNotEqual: return nir_op_fne;
case SpvOpULessThan: return nir_op_ult;
case SpvOpSLessThan: return nir_op_ilt;
case SpvOpFOrdLessThan: return nir_op_flt;
case SpvOpFUnordLessThan: return nir_op_flt;
case SpvOpUGreaterThan: *swap = true; return nir_op_ult;
case SpvOpSGreaterThan: *swap = true; return nir_op_ilt;
case SpvOpFOrdGreaterThan: *swap = true; return nir_op_flt;
case SpvOpFUnordGreaterThan: *swap = true; return nir_op_flt;
case SpvOpULessThanEqual: *swap = true; return nir_op_uge;
case SpvOpSLessThanEqual: *swap = true; return nir_op_ige;
case SpvOpFOrdLessThanEqual: *swap = true; return nir_op_fge;
case SpvOpFUnordLessThanEqual: *swap = true; return nir_op_fge;
case SpvOpUGreaterThanEqual: return nir_op_uge;
case SpvOpSGreaterThanEqual: return nir_op_ige;
case SpvOpFOrdGreaterThanEqual: return nir_op_fge;
case SpvOpFUnordGreaterThanEqual: return nir_op_fge;
/* Conversions: */
case SpvOpConvertFToU: return nir_op_f2u;
case SpvOpConvertFToS: return nir_op_f2i;
case SpvOpConvertSToF: return nir_op_i2f;
case SpvOpConvertUToF: return nir_op_u2f;
case SpvOpBitcast: return nir_op_imov;
case SpvOpUConvert:
case SpvOpQuantizeToF16: return nir_op_fquantize2f16;
/* TODO: NIR is 32-bit only; these are no-ops. */
case SpvOpSConvert: return nir_op_imov;
case SpvOpFConvert: return nir_op_fmov;
/* Derivatives: */
case SpvOpDPdx: return nir_op_fddx;
case SpvOpDPdy: return nir_op_fddy;
case SpvOpDPdxFine: return nir_op_fddx_fine;
case SpvOpDPdyFine: return nir_op_fddy_fine;
case SpvOpDPdxCoarse: return nir_op_fddx_coarse;
case SpvOpDPdyCoarse: return nir_op_fddy_coarse;
default:
unreachable("No NIR equivalent");
}
}
static void
handle_no_contraction(struct vtn_builder *b, struct vtn_value *val, int member,
const struct vtn_decoration *dec, void *_void)
{
assert(dec->scope == VTN_DEC_DECORATION);
if (dec->decoration != SpvDecorationNoContraction)
return;
b->nb.exact = true;
}
void
vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
const uint32_t *w, unsigned count)
{
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
const struct glsl_type *type =
vtn_value(b, w[1], vtn_value_type_type)->type->type;
vtn_foreach_decoration(b, val, handle_no_contraction, NULL);
/* Collect the various SSA sources */
const unsigned num_inputs = count - 3;
struct vtn_ssa_value *vtn_src[4] = { NULL, };
for (unsigned i = 0; i < num_inputs; i++)
vtn_src[i] = vtn_ssa_value(b, w[i + 3]);
if (glsl_type_is_matrix(vtn_src[0]->type) ||
(num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) {
vtn_handle_matrix_alu(b, opcode, val, vtn_src[0], vtn_src[1]);
b->nb.exact = false;
return;
}
val->ssa = vtn_create_ssa_value(b, type);
nir_ssa_def *src[4] = { NULL, };
for (unsigned i = 0; i < num_inputs; i++) {
assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type));
src[i] = vtn_src[i]->def;
}
switch (opcode) {
case SpvOpAny:
if (src[0]->num_components == 1) {
val->ssa->def = nir_imov(&b->nb, src[0]);
} else {
nir_op op;
switch (src[0]->num_components) {
case 2: op = nir_op_bany_inequal2; break;
case 3: op = nir_op_bany_inequal3; break;
case 4: op = nir_op_bany_inequal4; break;
}
val->ssa->def = nir_build_alu(&b->nb, op, src[0],
nir_imm_int(&b->nb, NIR_FALSE),
NULL, NULL);
}
break;
case SpvOpAll:
if (src[0]->num_components == 1) {
val->ssa->def = nir_imov(&b->nb, src[0]);
} else {
nir_op op;
switch (src[0]->num_components) {
case 2: op = nir_op_ball_iequal2; break;
case 3: op = nir_op_ball_iequal3; break;
case 4: op = nir_op_ball_iequal4; break;
}
val->ssa->def = nir_build_alu(&b->nb, op, src[0],
nir_imm_int(&b->nb, NIR_TRUE),
NULL, NULL);
}
break;
case SpvOpOuterProduct: {
for (unsigned i = 0; i < src[1]->num_components; i++) {
val->ssa->elems[i]->def =
nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i));
}
break;
}
case SpvOpDot:
val->ssa->def = nir_fdot(&b->nb, src[0], src[1]);
break;
case SpvOpIAddCarry:
assert(glsl_type_is_struct(val->ssa->type));
val->ssa->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]);
val->ssa->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]);
break;
case SpvOpISubBorrow:
assert(glsl_type_is_struct(val->ssa->type));
val->ssa->elems[0]->def = nir_isub(&b->nb, src[0], src[1]);
val->ssa->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]);
break;
case SpvOpUMulExtended:
assert(glsl_type_is_struct(val->ssa->type));
val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
val->ssa->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]);
break;
case SpvOpSMulExtended:
assert(glsl_type_is_struct(val->ssa->type));
val->ssa->elems[0]->def = nir_imul(&b->nb, src[0], src[1]);
val->ssa->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]);
break;
case SpvOpFwidth:
val->ssa->def = nir_fadd(&b->nb,
nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])),
nir_fabs(&b->nb, nir_fddy(&b->nb, src[0])));
break;
case SpvOpFwidthFine:
val->ssa->def = nir_fadd(&b->nb,
nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])),
nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0])));
break;
case SpvOpFwidthCoarse:
val->ssa->def = nir_fadd(&b->nb,
nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])),
nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0])));
break;
case SpvOpVectorTimesScalar:
/* The builder will take care of splatting for us. */
val->ssa->def = nir_fmul(&b->nb, src[0], src[1]);
break;
case SpvOpIsNan:
val->ssa->def = nir_fne(&b->nb, src[0], src[0]);
break;
case SpvOpIsInf:
val->ssa->def = nir_feq(&b->nb, nir_fabs(&b->nb, src[0]),
nir_imm_float(&b->nb, INFINITY));
break;
default: {
bool swap;
nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
if (swap) {
nir_ssa_def *tmp = src[0];
src[0] = src[1];
src[1] = tmp;
}
val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
break;
} /* default */
}
b->nb.exact = false;
}

View file

@ -0,0 +1,778 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "vtn_private.h"
#include "nir/nir_vla.h"
static bool
vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode,
const uint32_t *w, unsigned count)
{
switch (opcode) {
case SpvOpFunction: {
assert(b->func == NULL);
b->func = rzalloc(b, struct vtn_function);
list_inithead(&b->func->body);
b->func->control = w[3];
const struct glsl_type *result_type =
vtn_value(b, w[1], vtn_value_type_type)->type->type;
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function);
val->func = b->func;
const struct glsl_type *func_type =
vtn_value(b, w[4], vtn_value_type_type)->type->type;
assert(glsl_get_function_return_type(func_type) == result_type);
nir_function *func =
nir_function_create(b->shader, ralloc_strdup(b->shader, val->name));
func->num_params = glsl_get_length(func_type);
func->params = ralloc_array(b->shader, nir_parameter, func->num_params);
for (unsigned i = 0; i < func->num_params; i++) {
const struct glsl_function_param *param =
glsl_get_function_param(func_type, i);
func->params[i].type = param->type;
if (param->in) {
if (param->out) {
func->params[i].param_type = nir_parameter_inout;
} else {
func->params[i].param_type = nir_parameter_in;
}
} else {
if (param->out) {
func->params[i].param_type = nir_parameter_out;
} else {
assert(!"Parameter is neither in nor out");
}
}
}
func->return_type = glsl_get_function_return_type(func_type);
b->func->impl = nir_function_impl_create(func);
b->func_param_idx = 0;
break;
}
case SpvOpFunctionEnd:
b->func->end = w;
b->func = NULL;
break;
case SpvOpFunctionParameter: {
struct vtn_value *val =
vtn_push_value(b, w[2], vtn_value_type_access_chain);
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
assert(b->func_param_idx < b->func->impl->num_params);
nir_variable *param = b->func->impl->params[b->func_param_idx++];
assert(param->type == type->type);
/* Name the parameter so it shows up nicely in NIR */
param->name = ralloc_strdup(param, val->name);
struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable);
vtn_var->type = type;
vtn_var->var = param;
vtn_var->chain.var = vtn_var;
vtn_var->chain.length = 0;
struct vtn_type *without_array = type;
while(glsl_type_is_array(without_array->type))
without_array = without_array->array_element;
if (glsl_type_is_image(without_array->type)) {
vtn_var->mode = vtn_variable_mode_image;
param->interface_type = without_array->type;
} else if (glsl_type_is_sampler(without_array->type)) {
vtn_var->mode = vtn_variable_mode_sampler;
param->interface_type = without_array->type;
} else {
vtn_var->mode = vtn_variable_mode_param;
}
val->access_chain = &vtn_var->chain;
break;
}
case SpvOpLabel: {
assert(b->block == NULL);
b->block = rzalloc(b, struct vtn_block);
b->block->node.type = vtn_cf_node_type_block;
b->block->label = w;
vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block;
if (b->func->start_block == NULL) {
/* This is the first block encountered for this function. In this
* case, we set the start block and add it to the list of
* implemented functions that we'll walk later.
*/
b->func->start_block = b->block;
exec_list_push_tail(&b->functions, &b->func->node);
}
break;
}
case SpvOpSelectionMerge:
case SpvOpLoopMerge:
assert(b->block && b->block->merge == NULL);
b->block->merge = w;
break;
case SpvOpBranch:
case SpvOpBranchConditional:
case SpvOpSwitch:
case SpvOpKill:
case SpvOpReturn:
case SpvOpReturnValue:
case SpvOpUnreachable:
assert(b->block && b->block->branch == NULL);
b->block->branch = w;
b->block = NULL;
break;
default:
/* Continue on as per normal */
return true;
}
return true;
}
static void
vtn_add_case(struct vtn_builder *b, struct vtn_switch *swtch,
struct vtn_block *break_block,
uint32_t block_id, uint32_t val, bool is_default)
{
struct vtn_block *case_block =
vtn_value(b, block_id, vtn_value_type_block)->block;
/* Don't create dummy cases that just break */
if (case_block == break_block)
return;
if (case_block->switch_case == NULL) {
struct vtn_case *c = ralloc(b, struct vtn_case);
list_inithead(&c->body);
c->start_block = case_block;
c->fallthrough = NULL;
nir_array_init(&c->values, b);
c->is_default = false;
c->visited = false;
list_addtail(&c->link, &swtch->cases);
case_block->switch_case = c;
}
if (is_default) {
case_block->switch_case->is_default = true;
} else {
nir_array_add(&case_block->switch_case->values, uint32_t, val);
}
}
/* This function performs a depth-first search of the cases and puts them
* in fall-through order.
*/
static void
vtn_order_case(struct vtn_switch *swtch, struct vtn_case *cse)
{
if (cse->visited)
return;
cse->visited = true;
list_del(&cse->link);
if (cse->fallthrough) {
vtn_order_case(swtch, cse->fallthrough);
/* If we have a fall-through, place this case right before the case it
* falls through to. This ensures that fallthroughs come one after
* the other. These two can never get separated because that would
* imply something else falling through to the same case. Also, this
* can't break ordering because the DFS ensures that this case is
* visited before anything that falls through to it.
*/
list_addtail(&cse->link, &cse->fallthrough->link);
} else {
list_add(&cse->link, &swtch->cases);
}
}
static enum vtn_branch_type
vtn_get_branch_type(struct vtn_block *block,
struct vtn_case *swcase, struct vtn_block *switch_break,
struct vtn_block *loop_break, struct vtn_block *loop_cont)
{
if (block->switch_case) {
/* This branch is actually a fallthrough */
assert(swcase->fallthrough == NULL ||
swcase->fallthrough == block->switch_case);
swcase->fallthrough = block->switch_case;
return vtn_branch_type_switch_fallthrough;
} else if (block == switch_break) {
return vtn_branch_type_switch_break;
} else if (block == loop_break) {
return vtn_branch_type_loop_break;
} else if (block == loop_cont) {
return vtn_branch_type_loop_continue;
} else {
return vtn_branch_type_none;
}
}
static void
vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list,
struct vtn_block *start, struct vtn_case *switch_case,
struct vtn_block *switch_break,
struct vtn_block *loop_break, struct vtn_block *loop_cont,
struct vtn_block *end)
{
struct vtn_block *block = start;
while (block != end) {
if (block->merge && (*block->merge & SpvOpCodeMask) == SpvOpLoopMerge &&
!block->loop) {
struct vtn_loop *loop = ralloc(b, struct vtn_loop);
loop->node.type = vtn_cf_node_type_loop;
list_inithead(&loop->body);
list_inithead(&loop->cont_body);
loop->control = block->merge[3];
list_addtail(&loop->node.link, cf_list);
block->loop = loop;
struct vtn_block *new_loop_break =
vtn_value(b, block->merge[1], vtn_value_type_block)->block;
struct vtn_block *new_loop_cont =
vtn_value(b, block->merge[2], vtn_value_type_block)->block;
/* Note: This recursive call will start with the current block as
* its start block. If we weren't careful, we would get here
* again and end up in infinite recursion. This is why we set
* block->loop above and check for it before creating one. This
* way, we only create the loop once and the second call that
* tries to handle this loop goes to the cases below and gets
* handled as a regular block.
*
* Note: When we make the recursive walk calls, we pass NULL for
* the switch break since you have to break out of the loop first.
* We do, however, still pass the current switch case because it's
* possible that the merge block for the loop is the start of
* another case.
*/
vtn_cfg_walk_blocks(b, &loop->body, block, switch_case, NULL,
new_loop_break, new_loop_cont, NULL );
vtn_cfg_walk_blocks(b, &loop->cont_body, new_loop_cont, NULL, NULL,
new_loop_break, NULL, block);
block = new_loop_break;
continue;
}
assert(block->node.link.next == NULL);
list_addtail(&block->node.link, cf_list);
switch (*block->branch & SpvOpCodeMask) {
case SpvOpBranch: {
struct vtn_block *branch_block =
vtn_value(b, block->branch[1], vtn_value_type_block)->block;
block->branch_type = vtn_get_branch_type(branch_block,
switch_case, switch_break,
loop_break, loop_cont);
if (block->branch_type != vtn_branch_type_none)
return;
block = branch_block;
continue;
}
case SpvOpReturn:
case SpvOpReturnValue:
block->branch_type = vtn_branch_type_return;
return;
case SpvOpKill:
block->branch_type = vtn_branch_type_discard;
return;
case SpvOpBranchConditional: {
struct vtn_block *then_block =
vtn_value(b, block->branch[2], vtn_value_type_block)->block;
struct vtn_block *else_block =
vtn_value(b, block->branch[3], vtn_value_type_block)->block;
struct vtn_if *if_stmt = ralloc(b, struct vtn_if);
if_stmt->node.type = vtn_cf_node_type_if;
if_stmt->condition = block->branch[1];
list_inithead(&if_stmt->then_body);
list_inithead(&if_stmt->else_body);
list_addtail(&if_stmt->node.link, cf_list);
if (block->merge &&
(*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge) {
if_stmt->control = block->merge[2];
}
if_stmt->then_type = vtn_get_branch_type(then_block,
switch_case, switch_break,
loop_break, loop_cont);
if_stmt->else_type = vtn_get_branch_type(else_block,
switch_case, switch_break,
loop_break, loop_cont);
if (if_stmt->then_type == vtn_branch_type_none &&
if_stmt->else_type == vtn_branch_type_none) {
/* Neither side of the if is something we can short-circuit. */
assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge);
struct vtn_block *merge_block =
vtn_value(b, block->merge[1], vtn_value_type_block)->block;
vtn_cfg_walk_blocks(b, &if_stmt->then_body, then_block,
switch_case, switch_break,
loop_break, loop_cont, merge_block);
vtn_cfg_walk_blocks(b, &if_stmt->else_body, else_block,
switch_case, switch_break,
loop_break, loop_cont, merge_block);
enum vtn_branch_type merge_type =
vtn_get_branch_type(merge_block, switch_case, switch_break,
loop_break, loop_cont);
if (merge_type == vtn_branch_type_none) {
block = merge_block;
continue;
} else {
return;
}
} else if (if_stmt->then_type != vtn_branch_type_none &&
if_stmt->else_type != vtn_branch_type_none) {
/* Both sides were short-circuited. We're done here. */
return;
} else {
/* Exeactly one side of the branch could be short-circuited.
* We set the branch up as a predicated break/continue and we
* continue on with the other side as if it were what comes
* after the if.
*/
if (if_stmt->then_type == vtn_branch_type_none) {
block = then_block;
} else {
block = else_block;
}
continue;
}
unreachable("Should have returned or continued");
}
case SpvOpSwitch: {
assert((*block->merge & SpvOpCodeMask) == SpvOpSelectionMerge);
struct vtn_block *break_block =
vtn_value(b, block->merge[1], vtn_value_type_block)->block;
struct vtn_switch *swtch = ralloc(b, struct vtn_switch);
swtch->node.type = vtn_cf_node_type_switch;
swtch->selector = block->branch[1];
list_inithead(&swtch->cases);
list_addtail(&swtch->node.link, cf_list);
/* First, we go through and record all of the cases. */
const uint32_t *branch_end =
block->branch + (block->branch[0] >> SpvWordCountShift);
vtn_add_case(b, swtch, break_block, block->branch[2], 0, true);
for (const uint32_t *w = block->branch + 3; w < branch_end; w += 2)
vtn_add_case(b, swtch, break_block, w[1], w[0], false);
/* Now, we go through and walk the blocks. While we walk through
* the blocks, we also gather the much-needed fall-through
* information.
*/
list_for_each_entry(struct vtn_case, cse, &swtch->cases, link) {
assert(cse->start_block != break_block);
vtn_cfg_walk_blocks(b, &cse->body, cse->start_block, cse,
break_block, NULL, loop_cont, NULL);
}
/* Finally, we walk over all of the cases one more time and put
* them in fall-through order.
*/
for (const uint32_t *w = block->branch + 2; w < branch_end; w += 2) {
struct vtn_block *case_block =
vtn_value(b, *w, vtn_value_type_block)->block;
if (case_block == break_block)
continue;
assert(case_block->switch_case);
vtn_order_case(swtch, case_block->switch_case);
}
block = break_block;
continue;
}
case SpvOpUnreachable:
return;
default:
unreachable("Unhandled opcode");
}
}
}
void
vtn_build_cfg(struct vtn_builder *b, const uint32_t *words, const uint32_t *end)
{
vtn_foreach_instruction(b, words, end,
vtn_cfg_handle_prepass_instruction);
foreach_list_typed(struct vtn_function, func, node, &b->functions) {
vtn_cfg_walk_blocks(b, &func->body, func->start_block,
NULL, NULL, NULL, NULL, NULL);
}
}
static bool
vtn_handle_phis_first_pass(struct vtn_builder *b, SpvOp opcode,
const uint32_t *w, unsigned count)
{
if (opcode == SpvOpLabel)
return true; /* Nothing to do */
/* If this isn't a phi node, stop. */
if (opcode != SpvOpPhi)
return false;
/* For handling phi nodes, we do a poor-man's out-of-ssa on the spot.
* For each phi, we create a variable with the appropreate type and
* do a load from that variable. Then, in a second pass, we add
* stores to that variable to each of the predecessor blocks.
*
* We could do something more intelligent here. However, in order to
* handle loops and things properly, we really need dominance
* information. It would end up basically being the into-SSA
* algorithm all over again. It's easier if we just let
* lower_vars_to_ssa do that for us instead of repeating it here.
*/
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
nir_variable *phi_var =
nir_local_variable_create(b->nb.impl, type->type, "phi");
_mesa_hash_table_insert(b->phi_table, w, phi_var);
val->ssa = vtn_local_load(b, nir_deref_var_create(b, phi_var));
return true;
}
static bool
vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
const uint32_t *w, unsigned count)
{
if (opcode != SpvOpPhi)
return true;
struct hash_entry *phi_entry = _mesa_hash_table_search(b->phi_table, w);
assert(phi_entry);
nir_variable *phi_var = phi_entry->data;
for (unsigned i = 3; i < count; i += 2) {
struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
struct vtn_block *pred =
vtn_value(b, w[i + 1], vtn_value_type_block)->block;
b->nb.cursor = nir_after_block_before_jump(pred->end_block);
vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
}
return true;
}
static void
vtn_emit_branch(struct vtn_builder *b, enum vtn_branch_type branch_type,
nir_variable *switch_fall_var, bool *has_switch_break)
{
switch (branch_type) {
case vtn_branch_type_switch_break:
nir_store_var(&b->nb, switch_fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1);
*has_switch_break = true;
break;
case vtn_branch_type_switch_fallthrough:
break; /* Nothing to do */
case vtn_branch_type_loop_break:
nir_jump(&b->nb, nir_jump_break);
break;
case vtn_branch_type_loop_continue:
nir_jump(&b->nb, nir_jump_continue);
break;
case vtn_branch_type_return:
nir_jump(&b->nb, nir_jump_return);
break;
case vtn_branch_type_discard: {
nir_intrinsic_instr *discard =
nir_intrinsic_instr_create(b->nb.shader, nir_intrinsic_discard);
nir_builder_instr_insert(&b->nb, &discard->instr);
break;
}
default:
unreachable("Invalid branch type");
}
}
static void
vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list,
nir_variable *switch_fall_var, bool *has_switch_break,
vtn_instruction_handler handler)
{
list_for_each_entry(struct vtn_cf_node, node, cf_list, link) {
switch (node->type) {
case vtn_cf_node_type_block: {
struct vtn_block *block = (struct vtn_block *)node;
const uint32_t *block_start = block->label;
const uint32_t *block_end = block->merge ? block->merge :
block->branch;
block_start = vtn_foreach_instruction(b, block_start, block_end,
vtn_handle_phis_first_pass);
vtn_foreach_instruction(b, block_start, block_end, handler);
block->end_block = nir_cursor_current_block(b->nb.cursor);
if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) {
struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]);
vtn_local_store(b, src,
nir_deref_var_create(b, b->impl->return_var));
}
if (block->branch_type != vtn_branch_type_none) {
vtn_emit_branch(b, block->branch_type,
switch_fall_var, has_switch_break);
}
break;
}
case vtn_cf_node_type_if: {
struct vtn_if *vtn_if = (struct vtn_if *)node;
nir_if *if_stmt = nir_if_create(b->shader);
if_stmt->condition =
nir_src_for_ssa(vtn_ssa_value(b, vtn_if->condition)->def);
nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node);
bool sw_break = false;
b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
if (vtn_if->then_type == vtn_branch_type_none) {
vtn_emit_cf_list(b, &vtn_if->then_body,
switch_fall_var, &sw_break, handler);
} else {
vtn_emit_branch(b, vtn_if->then_type, switch_fall_var, &sw_break);
}
b->nb.cursor = nir_after_cf_list(&if_stmt->else_list);
if (vtn_if->else_type == vtn_branch_type_none) {
vtn_emit_cf_list(b, &vtn_if->else_body,
switch_fall_var, &sw_break, handler);
} else {
vtn_emit_branch(b, vtn_if->else_type, switch_fall_var, &sw_break);
}
b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node);
/* If we encountered a switch break somewhere inside of the if,
* then it would have been handled correctly by calling
* emit_cf_list or emit_branch for the interrior. However, we
* need to predicate everything following on wether or not we're
* still going.
*/
if (sw_break) {
*has_switch_break = true;
nir_if *switch_if = nir_if_create(b->shader);
switch_if->condition =
nir_src_for_ssa(nir_load_var(&b->nb, switch_fall_var));
nir_cf_node_insert(b->nb.cursor, &switch_if->cf_node);
b->nb.cursor = nir_after_cf_list(&if_stmt->then_list);
}
break;
}
case vtn_cf_node_type_loop: {
struct vtn_loop *vtn_loop = (struct vtn_loop *)node;
nir_loop *loop = nir_loop_create(b->shader);
nir_cf_node_insert(b->nb.cursor, &loop->cf_node);
b->nb.cursor = nir_after_cf_list(&loop->body);
vtn_emit_cf_list(b, &vtn_loop->body, NULL, NULL, handler);
if (!list_empty(&vtn_loop->cont_body)) {
/* If we have a non-trivial continue body then we need to put
* it at the beginning of the loop with a flag to ensure that
* it doesn't get executed in the first iteration.
*/
nir_variable *do_cont =
nir_local_variable_create(b->nb.impl, glsl_bool_type(), "cont");
b->nb.cursor = nir_before_cf_node(&loop->cf_node);
nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_FALSE), 1);
b->nb.cursor = nir_before_cf_list(&loop->body);
nir_if *cont_if = nir_if_create(b->shader);
cont_if->condition = nir_src_for_ssa(nir_load_var(&b->nb, do_cont));
nir_cf_node_insert(b->nb.cursor, &cont_if->cf_node);
b->nb.cursor = nir_after_cf_list(&cont_if->then_list);
vtn_emit_cf_list(b, &vtn_loop->cont_body, NULL, NULL, handler);
b->nb.cursor = nir_after_cf_node(&cont_if->cf_node);
nir_store_var(&b->nb, do_cont, nir_imm_int(&b->nb, NIR_TRUE), 1);
b->has_loop_continue = true;
}
b->nb.cursor = nir_after_cf_node(&loop->cf_node);
break;
}
case vtn_cf_node_type_switch: {
struct vtn_switch *vtn_switch = (struct vtn_switch *)node;
/* First, we create a variable to keep track of whether or not the
* switch is still going at any given point. Any switch breaks
* will set this variable to false.
*/
nir_variable *fall_var =
nir_local_variable_create(b->nb.impl, glsl_bool_type(), "fall");
nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_FALSE), 1);
/* Next, we gather up all of the conditions. We have to do this
* up-front because we also need to build an "any" condition so
* that we can use !any for default.
*/
const int num_cases = list_length(&vtn_switch->cases);
NIR_VLA(nir_ssa_def *, conditions, num_cases);
nir_ssa_def *sel = vtn_ssa_value(b, vtn_switch->selector)->def;
/* An accumulation of all conditions. Used for the default */
nir_ssa_def *any = NULL;
int i = 0;
list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
if (cse->is_default) {
conditions[i++] = NULL;
continue;
}
nir_ssa_def *cond = NULL;
nir_array_foreach(&cse->values, uint32_t, val) {
nir_ssa_def *is_val =
nir_ieq(&b->nb, sel, nir_imm_int(&b->nb, *val));
cond = cond ? nir_ior(&b->nb, cond, is_val) : is_val;
}
any = any ? nir_ior(&b->nb, any, cond) : cond;
conditions[i++] = cond;
}
assert(i == num_cases);
/* Now we can walk the list of cases and actually emit code */
i = 0;
list_for_each_entry(struct vtn_case, cse, &vtn_switch->cases, link) {
/* Figure out the condition */
nir_ssa_def *cond = conditions[i++];
if (cse->is_default) {
assert(cond == NULL);
cond = nir_inot(&b->nb, any);
}
/* Take fallthrough into account */
cond = nir_ior(&b->nb, cond, nir_load_var(&b->nb, fall_var));
nir_if *case_if = nir_if_create(b->nb.shader);
case_if->condition = nir_src_for_ssa(cond);
nir_cf_node_insert(b->nb.cursor, &case_if->cf_node);
bool has_break = false;
b->nb.cursor = nir_after_cf_list(&case_if->then_list);
nir_store_var(&b->nb, fall_var, nir_imm_int(&b->nb, NIR_TRUE), 1);
vtn_emit_cf_list(b, &cse->body, fall_var, &has_break, handler);
(void)has_break; /* We don't care */
b->nb.cursor = nir_after_cf_node(&case_if->cf_node);
}
assert(i == num_cases);
break;
}
default:
unreachable("Invalid CF node type");
}
}
}
void
vtn_function_emit(struct vtn_builder *b, struct vtn_function *func,
vtn_instruction_handler instruction_handler)
{
nir_builder_init(&b->nb, func->impl);
b->nb.cursor = nir_after_cf_list(&func->impl->body);
b->has_loop_continue = false;
b->phi_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
_mesa_key_pointer_equal);
vtn_emit_cf_list(b, &func->body, NULL, NULL, instruction_handler);
vtn_foreach_instruction(b, func->start_block->label, func->end,
vtn_handle_phi_second_pass);
/* Continue blocks for loops get inserted before the body of the loop
* but instructions in the continue may use SSA defs in the loop body.
* Therefore, we need to repair SSA to insert the needed phi nodes.
*/
if (b->has_loop_continue)
nir_repair_ssa_impl(func->impl);
}

View file

@ -0,0 +1,666 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Jason Ekstrand (jason@jlekstrand.net)
*
*/
#include "vtn_private.h"
#include "GLSL.std.450.h"
#define M_PIf ((float) M_PI)
#define M_PI_2f ((float) M_PI_2)
#define M_PI_4f ((float) M_PI_4)
static nir_ssa_def *
build_mat2_det(nir_builder *b, nir_ssa_def *col[2])
{
unsigned swiz[4] = {1, 0, 0, 0};
nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true));
return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1));
}
static nir_ssa_def *
build_mat3_det(nir_builder *b, nir_ssa_def *col[3])
{
unsigned yzx[4] = {1, 2, 0, 0};
unsigned zxy[4] = {2, 0, 1, 0};
nir_ssa_def *prod0 =
nir_fmul(b, col[0],
nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true),
nir_swizzle(b, col[2], zxy, 3, true)));
nir_ssa_def *prod1 =
nir_fmul(b, col[0],
nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true),
nir_swizzle(b, col[2], yzx, 3, true)));
nir_ssa_def *diff = nir_fsub(b, prod0, prod1);
return nir_fadd(b, nir_channel(b, diff, 0),
nir_fadd(b, nir_channel(b, diff, 1),
nir_channel(b, diff, 2)));
}
static nir_ssa_def *
build_mat4_det(nir_builder *b, nir_ssa_def **col)
{
nir_ssa_def *subdet[4];
for (unsigned i = 0; i < 4; i++) {
unsigned swiz[3];
for (unsigned j = 0; j < 3; j++)
swiz[j] = j + (j >= i);
nir_ssa_def *subcol[3];
subcol[0] = nir_swizzle(b, col[1], swiz, 3, true);
subcol[1] = nir_swizzle(b, col[2], swiz, 3, true);
subcol[2] = nir_swizzle(b, col[3], swiz, 3, true);
subdet[i] = build_mat3_det(b, subcol);
}
nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4));
return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0),
nir_channel(b, prod, 1)),
nir_fsub(b, nir_channel(b, prod, 2),
nir_channel(b, prod, 3)));
}
static nir_ssa_def *
build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src)
{
unsigned size = glsl_get_vector_elements(src->type);
nir_ssa_def *cols[4];
for (unsigned i = 0; i < size; i++)
cols[i] = src->elems[i]->def;
switch(size) {
case 2: return build_mat2_det(&b->nb, cols);
case 3: return build_mat3_det(&b->nb, cols);
case 4: return build_mat4_det(&b->nb, cols);
default:
unreachable("Invalid matrix size");
}
}
/* Computes the determinate of the submatrix given by taking src and
* removing the specified row and column.
*/
static nir_ssa_def *
build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src,
unsigned size, unsigned row, unsigned col)
{
assert(row < size && col < size);
if (size == 2) {
return nir_channel(b, src->elems[1 - col]->def, 1 - row);
} else {
/* Swizzle to get all but the specified row */
unsigned swiz[3];
for (unsigned j = 0; j < 3; j++)
swiz[j] = j + (j >= row);
/* Grab all but the specified column */
nir_ssa_def *subcol[3];
for (unsigned j = 0; j < size; j++) {
if (j != col) {
subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def,
swiz, size - 1, true);
}
}
if (size == 3) {
return build_mat2_det(b, subcol);
} else {
assert(size == 4);
return build_mat3_det(b, subcol);
}
}
}
static struct vtn_ssa_value *
matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src)
{
nir_ssa_def *adj_col[4];
unsigned size = glsl_get_vector_elements(src->type);
/* Build up an adjugate matrix */
for (unsigned c = 0; c < size; c++) {
nir_ssa_def *elem[4];
for (unsigned r = 0; r < size; r++) {
elem[r] = build_mat_subdet(&b->nb, src, size, c, r);
if ((r + c) % 2)
elem[r] = nir_fneg(&b->nb, elem[r]);
}
adj_col[c] = nir_vec(&b->nb, elem, size);
}
nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src));
struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type);
for (unsigned i = 0; i < size; i++)
val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv);
return val;
}
static nir_ssa_def*
build_length(nir_builder *b, nir_ssa_def *vec)
{
switch (vec->num_components) {
case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec));
case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec));
case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec));
case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec));
default:
unreachable("Invalid number of components");
}
}
static inline nir_ssa_def *
build_fclamp(nir_builder *b,
nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
{
return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
}
/**
* Return e^x.
*/
static nir_ssa_def *
build_exp(nir_builder *b, nir_ssa_def *x)
{
return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E)));
}
/**
* Return ln(x) - the natural logarithm of x.
*/
static nir_ssa_def *
build_log(nir_builder *b, nir_ssa_def *x)
{
return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E));
}
/**
* Approximate asin(x) by the formula:
* asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1))))
*
* which is correct to first order at x=0 and x=±1 regardless of the p
* coefficients but can be made second-order correct at both ends by selecting
* the fit coefficients appropriately. Different p coefficients can be used
* in the asin and acos implementation to minimize some relative error metric
* in each case.
*/
static nir_ssa_def *
build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
{
nir_ssa_def *abs_x = nir_fabs(b, x);
return nir_fmul(b, nir_fsign(b, x),
nir_fsub(b, nir_imm_float(b, M_PI_2f),
nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
nir_fadd(b, nir_imm_float(b, M_PI_2f),
nir_fmul(b, abs_x,
nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
nir_fmul(b, abs_x,
nir_fadd(b, nir_imm_float(b, p0),
nir_fmul(b, abs_x,
nir_imm_float(b, p1))))))))));
}
/**
* Compute xs[0] + xs[1] + xs[2] + ... using fadd.
*/
static nir_ssa_def *
build_fsum(nir_builder *b, nir_ssa_def **xs, int terms)
{
nir_ssa_def *accum = xs[0];
for (int i = 1; i < terms; i++)
accum = nir_fadd(b, accum, xs[i]);
return accum;
}
static nir_ssa_def *
build_atan(nir_builder *b, nir_ssa_def *y_over_x)
{
nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
nir_ssa_def *one = nir_imm_float(b, 1.0f);
/*
* range-reduction, first step:
*
* / y_over_x if |y_over_x| <= 1.0;
* x = <
* \ 1.0 / y_over_x otherwise
*/
nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one),
nir_fmax(b, abs_y_over_x, one));
/*
* approximate atan by evaluating polynomial:
*
* x * 0.9999793128310355 - x^3 * 0.3326756418091246 +
* x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 +
* x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
*/
nir_ssa_def *x_2 = nir_fmul(b, x, x);
nir_ssa_def *x_3 = nir_fmul(b, x_2, x);
nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2);
nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2);
nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2);
nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
nir_ssa_def *polynomial_terms[] = {
nir_fmul(b, x, nir_imm_float(b, 0.9999793128310355f)),
nir_fmul(b, x_3, nir_imm_float(b, -0.3326756418091246f)),
nir_fmul(b, x_5, nir_imm_float(b, 0.1938924977115610f)),
nir_fmul(b, x_7, nir_imm_float(b, -0.1173503194786851f)),
nir_fmul(b, x_9, nir_imm_float(b, 0.0536813784310406f)),
nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
};
nir_ssa_def *tmp =
build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms));
/* range-reduction fixup */
tmp = nir_fadd(b, tmp,
nir_fmul(b,
nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
nir_fadd(b, nir_fmul(b, tmp,
nir_imm_float(b, -2.0f)),
nir_imm_float(b, M_PI_2f))));
/* sign fixup */
return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
}
static nir_ssa_def *
build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
{
nir_ssa_def *zero = nir_imm_float(b, 0.0f);
/* If |x| >= 1.0e-8 * |y|: */
nir_ssa_def *condition =
nir_fge(b, nir_fabs(b, x),
nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
/* Then...call atan(y/x) and fix it up: */
nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
nir_ssa_def *r_then =
nir_bcsel(b, nir_flt(b, x, zero),
nir_fadd(b, atan1,
nir_bcsel(b, nir_fge(b, y, zero),
nir_imm_float(b, M_PIf),
nir_imm_float(b, -M_PIf))),
atan1);
/* Else... */
nir_ssa_def *r_else =
nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
return nir_bcsel(b, condition, r_then, r_else);
}
static nir_ssa_def *
build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
{
nir_ssa_def *abs_x = nir_fabs(b, x);
nir_ssa_def *zero = nir_imm_float(b, 0.0f);
/* Single-precision floating-point values are stored as
* 1 sign bit;
* 8 exponent bits;
* 23 mantissa bits.
*
* An exponent shift of 23 will shift the mantissa out, leaving only the
* exponent and sign bit (which itself may be zero, if the absolute value
* was taken before the bitcast and shift.
*/
nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
/* Exponent of floating-point values in the range [0.5, 1.0). */
nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u);
nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
*exponent =
nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
nir_bcsel(b, is_not_zero, exponent_bias, zero));
return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
nir_bcsel(b, is_not_zero, exponent_value, zero));
}
static nir_op
vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode)
{
switch (opcode) {
case GLSLstd450Round: return nir_op_fround_even;
case GLSLstd450RoundEven: return nir_op_fround_even;
case GLSLstd450Trunc: return nir_op_ftrunc;
case GLSLstd450FAbs: return nir_op_fabs;
case GLSLstd450SAbs: return nir_op_iabs;
case GLSLstd450FSign: return nir_op_fsign;
case GLSLstd450SSign: return nir_op_isign;
case GLSLstd450Floor: return nir_op_ffloor;
case GLSLstd450Ceil: return nir_op_fceil;
case GLSLstd450Fract: return nir_op_ffract;
case GLSLstd450Sin: return nir_op_fsin;
case GLSLstd450Cos: return nir_op_fcos;
case GLSLstd450Pow: return nir_op_fpow;
case GLSLstd450Exp2: return nir_op_fexp2;
case GLSLstd450Log2: return nir_op_flog2;
case GLSLstd450Sqrt: return nir_op_fsqrt;
case GLSLstd450InverseSqrt: return nir_op_frsq;
case GLSLstd450FMin: return nir_op_fmin;
case GLSLstd450UMin: return nir_op_umin;
case GLSLstd450SMin: return nir_op_imin;
case GLSLstd450FMax: return nir_op_fmax;
case GLSLstd450UMax: return nir_op_umax;
case GLSLstd450SMax: return nir_op_imax;
case GLSLstd450FMix: return nir_op_flrp;
case GLSLstd450Fma: return nir_op_ffma;
case GLSLstd450Ldexp: return nir_op_ldexp;
case GLSLstd450FindILsb: return nir_op_find_lsb;
case GLSLstd450FindSMsb: return nir_op_ifind_msb;
case GLSLstd450FindUMsb: return nir_op_ufind_msb;
/* Packing/Unpacking functions */
case GLSLstd450PackSnorm4x8: return nir_op_pack_snorm_4x8;
case GLSLstd450PackUnorm4x8: return nir_op_pack_unorm_4x8;
case GLSLstd450PackSnorm2x16: return nir_op_pack_snorm_2x16;
case GLSLstd450PackUnorm2x16: return nir_op_pack_unorm_2x16;
case GLSLstd450PackHalf2x16: return nir_op_pack_half_2x16;
case GLSLstd450UnpackSnorm4x8: return nir_op_unpack_snorm_4x8;
case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8;
case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16;
case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16;
case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16;
default:
unreachable("No NIR equivalent");
}
}
static void
handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
const uint32_t *w, unsigned count)
{
struct nir_builder *nb = &b->nb;
const struct glsl_type *dest_type =
vtn_value(b, w[1], vtn_value_type_type)->type->type;
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
val->ssa = vtn_create_ssa_value(b, dest_type);
/* Collect the various SSA sources */
unsigned num_inputs = count - 5;
nir_ssa_def *src[3] = { NULL, };
for (unsigned i = 0; i < num_inputs; i++)
src[i] = vtn_ssa_value(b, w[i + 5])->def;
switch (entrypoint) {
case GLSLstd450Radians:
val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251));
return;
case GLSLstd450Degrees:
val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131));
return;
case GLSLstd450Tan:
val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]),
nir_fcos(nb, src[0]));
return;
case GLSLstd450Modf: {
nir_ssa_def *sign = nir_fsign(nb, src[0]);
nir_ssa_def *abs = nir_fabs(nb, src[0]);
val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
nir_store_deref_var(nb, vtn_nir_deref(b, w[6]),
nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf);
return;
}
case GLSLstd450ModfStruct: {
nir_ssa_def *sign = nir_fsign(nb, src[0]);
nir_ssa_def *abs = nir_fabs(nb, src[0]);
assert(glsl_type_is_struct(val->ssa->type));
val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs));
return;
}
case GLSLstd450Step:
val->ssa->def = nir_sge(nb, src[1], src[0]);
return;
case GLSLstd450Length:
val->ssa->def = build_length(nb, src[0]);
return;
case GLSLstd450Distance:
val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1]));
return;
case GLSLstd450Normalize:
val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0]));
return;
case GLSLstd450Exp:
val->ssa->def = build_exp(nb, src[0]);
return;
case GLSLstd450Log:
val->ssa->def = build_log(nb, src[0]);
return;
case GLSLstd450FClamp:
val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]);
return;
case GLSLstd450UClamp:
val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]);
return;
case GLSLstd450SClamp:
val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]);
return;
case GLSLstd450Cross: {
unsigned yzx[4] = { 1, 2, 0, 0 };
unsigned zxy[4] = { 2, 0, 1, 0 };
val->ssa->def =
nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true),
nir_swizzle(nb, src[1], zxy, 3, true)),
nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true),
nir_swizzle(nb, src[1], yzx, 3, true)));
return;
}
case GLSLstd450SmoothStep: {
/* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
nir_ssa_def *t =
build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]),
nir_fsub(nb, src[1], src[0])),
nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0));
/* result = t * t * (3 - 2 * t) */
val->ssa->def =
nir_fmul(nb, t, nir_fmul(nb, t,
nir_fsub(nb, nir_imm_float(nb, 3.0),
nir_fmul(nb, nir_imm_float(nb, 2.0), t))));
return;
}
case GLSLstd450FaceForward:
val->ssa->def =
nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]),
nir_imm_float(nb, 0.0)),
src[0], nir_fneg(nb, src[0]));
return;
case GLSLstd450Reflect:
/* I - 2 * dot(N, I) * N */
val->ssa->def =
nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0),
nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
src[1])));
return;
case GLSLstd450Refract: {
nir_ssa_def *I = src[0];
nir_ssa_def *N = src[1];
nir_ssa_def *eta = src[2];
nir_ssa_def *n_dot_i = nir_fdot(nb, N, I);
nir_ssa_def *one = nir_imm_float(nb, 1.0);
nir_ssa_def *zero = nir_imm_float(nb, 0.0);
/* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
nir_ssa_def *k =
nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
nir_ssa_def *result =
nir_fsub(nb, nir_fmul(nb, eta, I),
nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
nir_fsqrt(nb, k)), N));
/* XXX: bcsel, or if statement? */
val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
return;
}
case GLSLstd450Sinh:
/* 0.5 * (e^x - e^(-x)) */
val->ssa->def =
nir_fmul(nb, nir_imm_float(nb, 0.5f),
nir_fsub(nb, build_exp(nb, src[0]),
build_exp(nb, nir_fneg(nb, src[0]))));
return;
case GLSLstd450Cosh:
/* 0.5 * (e^x + e^(-x)) */
val->ssa->def =
nir_fmul(nb, nir_imm_float(nb, 0.5f),
nir_fadd(nb, build_exp(nb, src[0]),
build_exp(nb, nir_fneg(nb, src[0]))));
return;
case GLSLstd450Tanh:
/* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
val->ssa->def =
nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
nir_fsub(nb, build_exp(nb, src[0]),
build_exp(nb, nir_fneg(nb, src[0])))),
nir_fmul(nb, nir_imm_float(nb, 0.5f),
nir_fadd(nb, build_exp(nb, src[0]),
build_exp(nb, nir_fneg(nb, src[0])))));
return;
case GLSLstd450Asinh:
val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),
nir_imm_float(nb, 1.0f))))));
return;
case GLSLstd450Acosh:
val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
nir_imm_float(nb, 1.0f)))));
return;
case GLSLstd450Atanh: {
nir_ssa_def *one = nir_imm_float(nb, 1.0);
val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
nir_fsub(nb, one, src[0]))));
return;
}
case GLSLstd450Asin:
val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955);
return;
case GLSLstd450Acos:
val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f),
build_asin(nb, src[0], 0.08132463, -0.02363318));
return;
case GLSLstd450Atan:
val->ssa->def = build_atan(nb, src[0]);
return;
case GLSLstd450Atan2:
val->ssa->def = build_atan2(nb, src[0], src[1]);
return;
case GLSLstd450Frexp: {
nir_ssa_def *exponent;
val->ssa->def = build_frexp(nb, src[0], &exponent);
nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
return;
}
case GLSLstd450FrexpStruct: {
assert(glsl_type_is_struct(val->ssa->type));
val->ssa->elems[0]->def = build_frexp(nb, src[0],
&val->ssa->elems[1]->def);
return;
}
default:
val->ssa->def =
nir_build_alu(&b->nb, vtn_nir_alu_op_for_spirv_glsl_opcode(entrypoint),
src[0], src[1], src[2], NULL);
return;
}
}
bool
vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
const uint32_t *w, unsigned count)
{
switch ((enum GLSLstd450)ext_opcode) {
case GLSLstd450Determinant: {
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
val->ssa = rzalloc(b, struct vtn_ssa_value);
val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5]));
break;
}
case GLSLstd450MatrixInverse: {
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5]));
break;
}
case GLSLstd450InterpolateAtCentroid:
case GLSLstd450InterpolateAtSample:
case GLSLstd450InterpolateAtOffset:
unreachable("Unhandled opcode");
default:
handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count);
}
return true;
}

View file

@ -0,0 +1,484 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Jason Ekstrand (jason@jlekstrand.net)
*
*/
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "nir/nir_array.h"
#include "nir_spirv.h"
#include "spirv.h"
struct vtn_builder;
struct vtn_decoration;
enum vtn_value_type {
vtn_value_type_invalid = 0,
vtn_value_type_undef,
vtn_value_type_string,
vtn_value_type_decoration_group,
vtn_value_type_type,
vtn_value_type_constant,
vtn_value_type_access_chain,
vtn_value_type_function,
vtn_value_type_block,
vtn_value_type_ssa,
vtn_value_type_extension,
vtn_value_type_image_pointer,
vtn_value_type_sampled_image,
};
enum vtn_branch_type {
vtn_branch_type_none,
vtn_branch_type_switch_break,
vtn_branch_type_switch_fallthrough,
vtn_branch_type_loop_break,
vtn_branch_type_loop_continue,
vtn_branch_type_discard,
vtn_branch_type_return,
};
enum vtn_cf_node_type {
vtn_cf_node_type_block,
vtn_cf_node_type_if,
vtn_cf_node_type_loop,
vtn_cf_node_type_switch,
};
struct vtn_cf_node {
struct list_head link;
enum vtn_cf_node_type type;
};
struct vtn_loop {
struct vtn_cf_node node;
/* The main body of the loop */
struct list_head body;
/* The "continue" part of the loop. This gets executed after the body
* and is where you go when you hit a continue.
*/
struct list_head cont_body;
SpvLoopControlMask control;
};
struct vtn_if {
struct vtn_cf_node node;
uint32_t condition;
enum vtn_branch_type then_type;
struct list_head then_body;
enum vtn_branch_type else_type;
struct list_head else_body;
SpvSelectionControlMask control;
};
struct vtn_case {
struct list_head link;
struct list_head body;
/* The block that starts this case */
struct vtn_block *start_block;
/* The fallthrough case, if any */
struct vtn_case *fallthrough;
/* The uint32_t values that map to this case */
nir_array values;
/* True if this is the default case */
bool is_default;
/* Initialized to false; used when sorting the list of cases */
bool visited;
};
struct vtn_switch {
struct vtn_cf_node node;
uint32_t selector;
struct list_head cases;
};
struct vtn_block {
struct vtn_cf_node node;
/** A pointer to the label instruction */
const uint32_t *label;
/** A pointer to the merge instruction (or NULL if non exists) */
const uint32_t *merge;
/** A pointer to the branch instruction that ends this block */
const uint32_t *branch;
enum vtn_branch_type branch_type;
/** Points to the loop that this block starts (if it starts a loop) */
struct vtn_loop *loop;
/** Points to the switch case started by this block (if any) */
struct vtn_case *switch_case;
/** The last block in this SPIR-V block. */
nir_block *end_block;
};
struct vtn_function {
struct exec_node node;
nir_function_impl *impl;
struct vtn_block *start_block;
struct list_head body;
const uint32_t *end;
SpvFunctionControlMask control;
};
typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t,
const uint32_t *, unsigned);
void vtn_build_cfg(struct vtn_builder *b, const uint32_t *words,
const uint32_t *end);
void vtn_function_emit(struct vtn_builder *b, struct vtn_function *func,
vtn_instruction_handler instruction_handler);
const uint32_t *
vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start,
const uint32_t *end, vtn_instruction_handler handler);
struct vtn_ssa_value {
union {
nir_ssa_def *def;
struct vtn_ssa_value **elems;
};
/* For matrices, if this is non-NULL, then this value is actually the
* transpose of some other value. The value that `transposed` points to
* always dominates this value.
*/
struct vtn_ssa_value *transposed;
const struct glsl_type *type;
};
struct vtn_type {
const struct glsl_type *type;
/* The value that declares this type. Used for finding decorations */
struct vtn_value *val;
/* for matrices, whether the matrix is stored row-major */
bool row_major;
/* for structs, the offset of each member */
unsigned *offsets;
/* for structs, whether it was decorated as a "non-SSBO-like" block */
bool block;
/* for structs, whether it was decorated as an "SSBO-like" block */
bool buffer_block;
/* for structs with block == true, whether this is a builtin block (i.e. a
* block that contains only builtins).
*/
bool builtin_block;
/* Image format for image_load_store type images */
unsigned image_format;
/* Access qualifier for storage images */
SpvAccessQualifier access_qualifier;
/* for arrays and matrices, the array stride */
unsigned stride;
/* for arrays, the vtn_type for the elements of the array */
struct vtn_type *array_element;
/* for structures, the vtn_type for each member */
struct vtn_type **members;
/* Whether this type, or a parent type, has been decorated as a builtin */
bool is_builtin;
SpvBuiltIn builtin;
};
struct vtn_variable;
enum vtn_access_mode {
vtn_access_mode_id,
vtn_access_mode_literal,
};
struct vtn_access_link {
enum vtn_access_mode mode;
uint32_t id;
};
struct vtn_access_chain {
struct vtn_variable *var;
uint32_t length;
/* Struct elements and array offsets */
struct vtn_access_link link[0];
};
enum vtn_variable_mode {
vtn_variable_mode_local,
vtn_variable_mode_global,
vtn_variable_mode_param,
vtn_variable_mode_ubo,
vtn_variable_mode_ssbo,
vtn_variable_mode_push_constant,
vtn_variable_mode_image,
vtn_variable_mode_sampler,
vtn_variable_mode_workgroup,
vtn_variable_mode_input,
vtn_variable_mode_output,
};
struct vtn_variable {
enum vtn_variable_mode mode;
struct vtn_type *type;
unsigned descriptor_set;
unsigned binding;
nir_variable *var;
nir_variable **members;
struct vtn_access_chain chain;
};
struct vtn_image_pointer {
struct vtn_access_chain *image;
nir_ssa_def *coord;
nir_ssa_def *sample;
};
struct vtn_sampled_image {
struct vtn_access_chain *image; /* Image or array of images */
struct vtn_access_chain *sampler; /* Sampler */
};
struct vtn_value {
enum vtn_value_type value_type;
const char *name;
struct vtn_decoration *decoration;
union {
void *ptr;
char *str;
struct vtn_type *type;
struct {
nir_constant *constant;
const struct glsl_type *const_type;
};
struct vtn_access_chain *access_chain;
struct vtn_image_pointer *image;
struct vtn_sampled_image *sampled_image;
struct vtn_function *func;
struct vtn_block *block;
struct vtn_ssa_value *ssa;
vtn_instruction_handler ext_handler;
};
};
#define VTN_DEC_DECORATION -1
#define VTN_DEC_EXECUTION_MODE -2
#define VTN_DEC_STRUCT_MEMBER0 0
struct vtn_decoration {
struct vtn_decoration *next;
/* Specifies how to apply this decoration. Negative values represent a
* decoration or execution mode. (See the VTN_DEC_ #defines above.)
* Non-negative values specify that it applies to a structure member.
*/
int scope;
const uint32_t *literals;
struct vtn_value *group;
union {
SpvDecoration decoration;
SpvExecutionMode exec_mode;
};
};
struct vtn_builder {
nir_builder nb;
nir_shader *shader;
nir_function_impl *impl;
struct vtn_block *block;
/* Current file, line, and column. Useful for debugging. Set
* automatically by vtn_foreach_instruction.
*/
char *file;
int line, col;
/*
* In SPIR-V, constants are global, whereas in NIR, the load_const
* instruction we use is per-function. So while we parse each function, we
* keep a hash table of constants we've resolved to nir_ssa_value's so
* far, and we lazily resolve them when we see them used in a function.
*/
struct hash_table *const_table;
/*
* Map from phi instructions (pointer to the start of the instruction)
* to the variable corresponding to it.
*/
struct hash_table *phi_table;
unsigned num_specializations;
struct nir_spirv_specialization *specializations;
unsigned value_id_bound;
struct vtn_value *values;
gl_shader_stage entry_point_stage;
const char *entry_point_name;
struct vtn_value *entry_point;
bool origin_upper_left;
struct vtn_function *func;
struct exec_list functions;
/* Current function parameter index */
unsigned func_param_idx;
bool has_loop_continue;
};
static inline struct vtn_value *
vtn_push_value(struct vtn_builder *b, uint32_t value_id,
enum vtn_value_type value_type)
{
assert(value_id < b->value_id_bound);
assert(b->values[value_id].value_type == vtn_value_type_invalid);
b->values[value_id].value_type = value_type;
return &b->values[value_id];
}
static inline struct vtn_value *
vtn_untyped_value(struct vtn_builder *b, uint32_t value_id)
{
assert(value_id < b->value_id_bound);
return &b->values[value_id];
}
static inline struct vtn_value *
vtn_value(struct vtn_builder *b, uint32_t value_id,
enum vtn_value_type value_type)
{
struct vtn_value *val = vtn_untyped_value(b, value_id);
assert(val->value_type == value_type);
return val;
}
struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id);
struct vtn_ssa_value *vtn_create_ssa_value(struct vtn_builder *b,
const struct glsl_type *type);
struct vtn_ssa_value *vtn_ssa_transpose(struct vtn_builder *b,
struct vtn_ssa_value *src);
nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src,
unsigned index);
nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src,
nir_ssa_def *index);
nir_ssa_def *vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src,
nir_ssa_def *insert, unsigned index);
nir_ssa_def *vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src,
nir_ssa_def *insert, nir_ssa_def *index);
nir_deref_var *vtn_nir_deref(struct vtn_builder *b, uint32_t id);
nir_deref_var *vtn_access_chain_to_deref(struct vtn_builder *b,
struct vtn_access_chain *chain);
nir_ssa_def *
vtn_access_chain_to_offset(struct vtn_builder *b,
struct vtn_access_chain *chain,
nir_ssa_def **index_out, struct vtn_type **type_out,
unsigned *end_idx_out, bool stop_at_matrix);
struct vtn_ssa_value *vtn_local_load(struct vtn_builder *b, nir_deref_var *src);
void vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
nir_deref_var *dest);
struct vtn_ssa_value *
vtn_variable_load(struct vtn_builder *b, struct vtn_access_chain *src);
void vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
struct vtn_access_chain *dest);
void vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
const uint32_t *w, unsigned count);
typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *,
struct vtn_value *,
int member,
const struct vtn_decoration *,
void *);
void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
vtn_decoration_foreach_cb cb, void *data);
typedef void (*vtn_execution_mode_foreach_cb)(struct vtn_builder *,
struct vtn_value *,
const struct vtn_decoration *,
void *);
void vtn_foreach_execution_mode(struct vtn_builder *b, struct vtn_value *value,
vtn_execution_mode_foreach_cb cb, void *data);
nir_op vtn_nir_alu_op_for_spirv_opcode(SpvOp opcode, bool *swap);
void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
const uint32_t *w, unsigned count);
bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
const uint32_t *words, unsigned count);

File diff suppressed because it is too large Load diff

26
src/intel/Makefile.am Normal file
View file

@ -0,0 +1,26 @@
# Copyright © 2016 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
SUBDIRS = genxml isl
if HAVE_INTEL_VULKAN
SUBDIRS += vulkan
endif

1
src/intel/genxml/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
gen*_pack.h

View file

@ -0,0 +1,32 @@
# Copyright © 2016 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
BUILT_SOURCES = \
gen6_pack.h \
gen7_pack.h \
gen75_pack.h \
gen8_pack.h \
gen9_pack.h
%_pack.h : %.xml gen_pack_header.py
$(AM_V_GEN) $(srcdir)/gen_pack_header.py $< > $@
CLEANFILES = $(BUILT_SOURCES)

60
src/intel/genxml/README Normal file
View file

@ -0,0 +1,60 @@
This provides some background the design of the generated headers. We
started out trying to generate bit fields but it evolved into the pack
functions because of a few limitations:
1) Bit fields still generate terrible code today. Even with modern
optimizing compilers you get multiple load+mask+store operations
to the same dword in memory as you set individual bits. The
compiler also has to generate code to mask out overflowing values
(for example, if you assign 200 to a 2 bit field). Our driver
never writes overflowing values so that's not needed. On the
other hand, most compiler recognize that the template struct we
use is a temporary variable and copy propagate the individual
fields and do amazing constant folding. You should take a look
at the code that gets generated when you compile in release mode
with optimizations.
2) For some types we need to have overlapping bit fields. For
example, some values are 64 byte aligned 32 bit offsets. The
lower 5 bits of the offset are always zero, so the hw packs in a
few misc bits in the lower 5 bits there. Other times a field can
be either a u32 or a float. I tried to do this with overlapping
anonymous unions and it became a big mess. Also, when using
initializers, you can only initialize one union member so this
just doesn't work with out approach.
The pack functions on the other hand allows us a great deal of
flexibility in how we combine things. In the case of overlapping
fields (the u32 and float case), if we only set one of them in
the pack function, the compiler will recognize that the other is
initialized to 0 and optimize out the code to or it it.
3) Bit fields (and certainly overlapping anonymous unions of bit
fields) aren't generally stable across compilers in how they're
laid out and aligned. Our pack functions let us control exactly
how things get packed, using only simple and unambiguous bitwise
shifting and or'ing that works on any compiler.
Once we have the pack function it allows us to hook in various
transformations and validation as we go from template struct to dwords
in memory:
1) Validation: As I said above, our driver isn't supposed to write
overflowing values to the fields, but we've of course had lots of
cases where we make mistakes and write overflowing values. With
the pack function, we can actually assert on that and catch it at
runtime. bitfields would just silently truncate.
2) Type conversions: some times it's just a matter of writing a
float to a u32, but we also convert from bool to bits, from
floats to fixed point integers.
3) Relocations: whenever we have a pointer from one buffer to
another (for example a pointer from the meta data for a texture
to the raw texture data), we have to tell the kernel about it so
it can adjust the pointer to point to the final location. That
means extra work we have to do extra work to record and annotate
the dword location that holds the pointer. With bit fields, we'd
have to call a function to do this, but with the pack function we
generate code in the pack function to do this for us. That's a
lot less error prone and less work.

1923
src/intel/genxml/gen6.xml Normal file

File diff suppressed because it is too large Load diff

2538
src/intel/genxml/gen7.xml Normal file

File diff suppressed because it is too large Load diff

2935
src/intel/genxml/gen75.xml Normal file

File diff suppressed because it is too large Load diff

3174
src/intel/genxml/gen8.xml Normal file

File diff suppressed because it is too large Load diff

3478
src/intel/genxml/gen9.xml Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,40 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#ifndef GEN_VERSIONx10
# error "The GEN_VERSIONx10 macro must be defined"
#endif
#if (GEN_VERSIONx10 == 70)
# include "genxml/gen7_pack.h"
#elif (GEN_VERSIONx10 == 75)
# include "genxml/gen75_pack.h"
#elif (GEN_VERSIONx10 == 80)
# include "genxml/gen8_pack.h"
#elif (GEN_VERSIONx10 == 90)
# include "genxml/gen9_pack.h"
#else
# error "Need to add a pack header include for this gen"
#endif

View file

@ -0,0 +1,90 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
/* Macros for handling per-gen compilation.
*
* The prefixing macros GENX() and genX() automatically prefix whatever you
* give them by GENX_ or genX_ where X is the gen number.
*
* You can declare a function to be used on some range of gens like this:
*
* GENX_FUNC(GEN7, GEN75) void
* genX(my_function_name)(args...)
* {
* // Do stuff
* }
*
* If the file is compiled for any set of gens containing gen7 and gen75,
* the function will effectively only get compiled twice as
* gen7_my_function_nmae and gen75_my_function_name. The function has to
* be compilable on all gens, but it will become a static inline that gets
* discarded by the compiler on all gens not in range.
*
* You can do pseudo-runtime checks in your function such as
*
* if (GEN_GEN > 8 || GEN_IS_HASWELL) {
* // Do something
* }
*
* The contents of the if statement must be valid regardless of gen, but
* the if will get compiled away on everything except haswell.
*
* For places where you really do have a compile-time conflict, you can
* use preprocessor logic:
*
* #if (GEN_GEN > 8 || GEN_IS_HASWELL)
* // Do something
* #endif
*
* However, it is strongly recommended that the former be used whenever
* possible.
*/
/* Base macro defined on the command line. If we don't have this, we can't
* do anything.
*/
#ifndef GEN_VERSIONx10
# error "The GEN_VERSIONx10 macro must be defined"
#endif
#define GEN_GEN ((GEN_VERSIONx10) / 10)
#define GEN_IS_HASWELL ((GEN_VERSIONx10) == 75)
/* Prefixing macros */
#if (GEN_VERSIONx10 == 70)
# define GENX(X) GEN7_##X
# define genX(x) gen7_##x
#elif (GEN_VERSIONx10 == 75)
# define GENX(X) GEN75_##X
# define genX(x) gen75_##x
#elif (GEN_VERSIONx10 == 80)
# define GENX(X) GEN8_##X
# define genX(x) gen8_##x
#elif (GEN_VERSIONx10 == 90)
# define GENX(X) GEN9_##X
# define genX(x) gen9_##x
#else
# error "Need to add prefixing macros for this gen"
#endif

View file

@ -0,0 +1,640 @@
#!/usr/bin/env python3
import xml.parsers.expat
import re
import sys
import copy
license = """/*
* Copyright (C) 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
"""
pack_header = """%(license)s
/* Instructions, enums and structures for %(platform)s.
*
* This file has been generated, do not hand edit.
*/
#pragma once
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <assert.h>
#include <math.h>
#ifndef __gen_validate_value
#define __gen_validate_value(x)
#endif
#ifndef __gen_field_functions
#define __gen_field_functions
union __gen_value {
float f;
uint32_t dw;
};
static inline uint64_t
__gen_mbo(uint32_t start, uint32_t end)
{
return (~0ull >> (64 - (end - start + 1))) << start;
}
static inline uint64_t
__gen_uint(uint64_t v, uint32_t start, uint32_t end)
{
__gen_validate_value(v);
#if DEBUG
const int width = end - start + 1;
if (width < 64) {
const uint64_t max = (1ull << width) - 1;
assert(v <= max);
}
#endif
return v << start;
}
static inline uint64_t
__gen_sint(int64_t v, uint32_t start, uint32_t end)
{
const int width = end - start + 1;
__gen_validate_value(v);
#if DEBUG
if (width < 64) {
const int64_t max = (1ll << (width - 1)) - 1;
const int64_t min = -(1ll << (width - 1));
assert(min <= v && v <= max);
}
#endif
const uint64_t mask = ~0ull >> (64 - width);
return (v & mask) << start;
}
static inline uint64_t
__gen_offset(uint64_t v, uint32_t start, uint32_t end)
{
__gen_validate_value(v);
#if DEBUG
uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start;
assert((v & ~mask) == 0);
#endif
return v;
}
static inline uint32_t
__gen_float(float v)
{
__gen_validate_value(v);
return ((union __gen_value) { .f = (v) }).dw;
}
static inline uint64_t
__gen_sfixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits)
{
__gen_validate_value(v);
const float factor = (1 << fract_bits);
#if DEBUG
const float max = ((1 << (end - start)) - 1) / factor;
const float min = -(1 << (end - start)) / factor;
assert(min <= v && v <= max);
#endif
const int32_t int_val = roundf(v * factor);
const uint64_t mask = ~0ull >> (64 - (end - start + 1));
return (int_val & mask) << start;
}
static inline uint64_t
__gen_ufixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits)
{
__gen_validate_value(v);
const float factor = (1 << fract_bits);
#if DEBUG
const float max = ((1 << (end - start + 1)) - 1) / factor;
const float min = 0.0f;
assert(min <= v && v <= max);
#endif
const uint32_t uint_val = roundf(v * factor);
return uint_val << start;
}
#ifndef __gen_address_type
#error #define __gen_address_type before including this file
#endif
#ifndef __gen_user_data
#error #define __gen_combine_address before including this file
#endif
#endif
"""
def to_alphanum(name):
substitutions = {
' ': '',
'/': '',
'[': '',
']': '',
'(': '',
')': '',
'-': '',
':': '',
'.': '',
',': '',
'=': '',
'>': '',
'#': '',
'α': 'alpha',
'&': '',
'*': '',
'"': '',
'+': '',
'\'': '',
}
for i, j in substitutions.items():
name = name.replace(i, j)
return name
def safe_name(name):
name = to_alphanum(name)
if not str.isalpha(name[0]):
name = '_' + name
return name
def num_from_str(num_str):
if num_str.lower().startswith('0x'):
return int(num_str, base=16)
else:
assert(not num_str.startswith('0') and 'octals numbers not allowed')
return int(num_str)
class Field:
ufixed_pattern = re.compile("u(\d+)\.(\d+)")
sfixed_pattern = re.compile("s(\d+)\.(\d+)")
def __init__(self, parser, attrs):
self.parser = parser
if "name" in attrs:
self.name = safe_name(attrs["name"])
self.start = int(attrs["start"])
self.end = int(attrs["end"])
self.type = attrs["type"]
if "prefix" in attrs:
self.prefix = attrs["prefix"]
else:
self.prefix = None
if "default" in attrs:
self.default = int(attrs["default"])
else:
self.default = None
ufixed_match = Field.ufixed_pattern.match(self.type)
if ufixed_match:
self.type = 'ufixed'
self.fractional_size = int(ufixed_match.group(2))
sfixed_match = Field.sfixed_pattern.match(self.type)
if sfixed_match:
self.type = 'sfixed'
self.fractional_size = int(sfixed_match.group(2))
def emit_template_struct(self, dim):
if self.type == 'address':
type = '__gen_address_type'
elif self.type == 'bool':
type = 'bool'
elif self.type == 'float':
type = 'float'
elif self.type == 'ufixed':
type = 'float'
elif self.type == 'sfixed':
type = 'float'
elif self.type == 'uint' and self.end - self.start > 32:
type = 'uint64_t'
elif self.type == 'offset':
type = 'uint64_t'
elif self.type == 'int':
type = 'int32_t'
elif self.type == 'uint':
type = 'uint32_t'
elif self.type in self.parser.structs:
type = 'struct ' + self.parser.gen_prefix(safe_name(self.type))
elif self.type == 'mbo':
return
else:
print("#error unhandled type: %s" % self.type)
print(" %-36s %s%s;" % (type, self.name, dim))
if len(self.values) > 0 and self.default == None:
if self.prefix:
prefix = self.prefix + "_"
else:
prefix = ""
for value in self.values:
print("#define %-40s %d" % (prefix + value.name, value.value))
class Group:
def __init__(self, parser, parent, start, count, size):
self.parser = parser
self.parent = parent
self.start = start
self.count = count
self.size = size
self.fields = []
def emit_template_struct(self, dim):
if self.count == 0:
print(" /* variable length fields follow */")
else:
if self.count > 1:
dim = "%s[%d]" % (dim, self.count)
for field in self.fields:
field.emit_template_struct(dim)
class DWord:
def __init__(self):
self.size = 32
self.fields = []
self.address = None
def collect_dwords(self, dwords, start, dim):
for field in self.fields:
if type(field) is Group:
if field.count == 1:
field.collect_dwords(dwords, start + field.start, dim)
else:
for i in range(field.count):
field.collect_dwords(dwords,
start + field.start + i * field.size,
"%s[%d]" % (dim, i))
continue
index = (start + field.start) // 32
if not index in dwords:
dwords[index] = self.DWord()
clone = copy.copy(field)
clone.start = clone.start + start
clone.end = clone.end + start
clone.dim = dim
dwords[index].fields.append(clone)
if field.type == "address":
# assert dwords[index].address == None
dwords[index].address = field
# Coalesce all the dwords covered by this field. The two cases we
# handle are where multiple fields are in a 64 bit word (typically
# and address and a few bits) or where a single struct field
# completely covers multiple dwords.
while index < (start + field.end) // 32:
if index + 1 in dwords and not dwords[index] == dwords[index + 1]:
dwords[index].fields.extend(dwords[index + 1].fields)
dwords[index].size = 64
dwords[index + 1] = dwords[index]
index = index + 1
def emit_pack_function(self, start):
dwords = {}
self.collect_dwords(dwords, 0, "")
# Determine number of dwords in this group. If we have a size, use
# that, since that'll account for MBZ dwords at the end of a group
# (like dword 8 on BDW+ 3DSTATE_HS). Otherwise, use the largest dword
# index we've seen plus one.
if self.size > 0:
length = self.size // 32
else:
length = max(dwords.keys()) + 1
for index in range(length):
# Handle MBZ dwords
if not index in dwords:
print("")
print(" dw[%d] = 0;" % index)
continue
# For 64 bit dwords, we aliased the two dword entries in the dword
# dict it occupies. Now that we're emitting the pack function,
# skip the duplicate entries.
dw = dwords[index]
if index > 0 and index - 1 in dwords and dw == dwords[index - 1]:
continue
# Special case: only one field and it's a struct at the beginning
# of the dword. In this case we pack directly into the
# destination. This is the only way we handle embedded structs
# larger than 32 bits.
if len(dw.fields) == 1:
field = dw.fields[0]
name = field.name + field.dim
if field.type in self.parser.structs and field.start % 32 == 0:
print("")
print(" %s_pack(data, &dw[%d], &values->%s);" %
(self.parser.gen_prefix(safe_name(field.type)), index, name))
continue
# Pack any fields of struct type first so we have integer values
# to the dword for those fields.
field_index = 0
for field in dw.fields:
if type(field) is Field and field.type in self.parser.structs:
name = field.name + field.dim
print("")
print(" uint32_t v%d_%d;" % (index, field_index))
print(" %s_pack(data, &v%d_%d, &values->%s);" %
(self.parser.gen_prefix(safe_name(field.type)), index, field_index, name))
field_index = field_index + 1
print("")
dword_start = index * 32
if dw.address == None:
address_count = 0
else:
address_count = 1
if dw.size == 32 and dw.address == None:
v = None
print(" dw[%d] =" % index)
elif len(dw.fields) > address_count:
v = "v%d" % index
print(" const uint%d_t %s =" % (dw.size, v))
else:
v = "0"
field_index = 0
for field in dw.fields:
if field.type != "mbo":
name = field.name + field.dim
if field.type == "mbo":
s = "__gen_mbo(%d, %d)" % \
(field.start - dword_start, field.end - dword_start)
elif field.type == "address":
s = None
elif field.type == "uint":
s = "__gen_uint(values->%s, %d, %d)" % \
(name, field.start - dword_start, field.end - dword_start)
elif field.type == "int":
s = "__gen_sint(values->%s, %d, %d)" % \
(name, field.start - dword_start, field.end - dword_start)
elif field.type == "bool":
s = "__gen_uint(values->%s, %d, %d)" % \
(name, field.start - dword_start, field.end - dword_start)
elif field.type == "float":
s = "__gen_float(values->%s)" % name
elif field.type == "offset":
s = "__gen_offset(values->%s, %d, %d)" % \
(name, field.start - dword_start, field.end - dword_start)
elif field.type == 'ufixed':
s = "__gen_ufixed(values->%s, %d, %d, %d)" % \
(name, field.start - dword_start, field.end - dword_start, field.fractional_size)
elif field.type == 'sfixed':
s = "__gen_sfixed(values->%s, %d, %d, %d)" % \
(name, field.start - dword_start, field.end - dword_start, field.fractional_size)
elif field.type in self.parser.structs:
s = "__gen_uint(v%d_%d, %d, %d)" % \
(index, field_index, field.start - dword_start, field.end - dword_start)
field_index = field_index + 1
else:
print("/* unhandled field %s, type %s */\n" % (name, field.type))
s = None
if not s == None:
if field == dw.fields[-1]:
print(" %s;" % s)
else:
print(" %s |" % s)
if dw.size == 32:
if dw.address:
print(" dw[%d] = __gen_combine_address(data, &dw[%d], values->%s, %s);" % (index, index, dw.address.name, v))
continue
if dw.address:
v_address = "v%d_address" % index
print(" const uint64_t %s =\n __gen_combine_address(data, &dw[%d], values->%s, %s);" %
(v_address, index, dw.address.name, v))
v = v_address
print(" dw[%d] = %s;" % (index, v))
print(" dw[%d] = %s >> 32;" % (index + 1, v))
class Value:
def __init__(self, attrs):
self.name = safe_name(attrs["name"])
self.value = int(attrs["value"])
class Parser:
def __init__(self):
self.parser = xml.parsers.expat.ParserCreate()
self.parser.StartElementHandler = self.start_element
self.parser.EndElementHandler = self.end_element
self.instruction = None
self.structs = {}
self.registers = {}
def start_element(self, name, attrs):
if name == "genxml":
self.platform = attrs["name"]
self.gen = attrs["gen"].replace('.', '')
print(pack_header % {'license': license, 'platform': self.platform})
elif name in ("instruction", "struct", "register"):
if name == "instruction":
self.instruction = safe_name(attrs["name"])
self.length_bias = int(attrs["bias"])
elif name == "struct":
self.struct = safe_name(attrs["name"])
self.structs[attrs["name"]] = 1
elif name == "register":
self.register = safe_name(attrs["name"])
self.reg_num = num_from_str(attrs["num"])
self.registers[attrs["name"]] = 1
if "length" in attrs:
self.length = int(attrs["length"])
size = self.length * 32
else:
self.length = None
size = 0
self.group = Group(self, None, 0, 1, size)
elif name == "group":
group = Group(self, self.group,
int(attrs["start"]), int(attrs["count"]), int(attrs["size"]))
self.group.fields.append(group)
self.group = group
elif name == "field":
self.group.fields.append(Field(self, attrs))
self.values = []
elif name == "enum":
self.values = []
self.enum = safe_name(attrs["name"])
if "prefix" in attrs:
self.prefix = safe_name(attrs["prefix"])
else:
self.prefix= None
elif name == "value":
self.values.append(Value(attrs))
def end_element(self, name):
if name == "instruction":
self.emit_instruction()
self.instruction = None
self.group = None
elif name == "struct":
self.emit_struct()
self.struct = None
self.group = None
elif name == "register":
self.emit_register()
self.register = None
self.reg_num = None
self.group = None
elif name == "group":
self.group = self.group.parent
elif name == "field":
self.group.fields[-1].values = self.values
elif name == "enum":
self.emit_enum()
self.enum = None
def gen_prefix(self, name):
if name[0] == "_":
return 'GEN%s%s' % (self.gen, name)
else:
return 'GEN%s_%s' % (self.gen, name)
def emit_template_struct(self, name, group):
print("struct %s {" % self.gen_prefix(name))
group.emit_template_struct("")
print("};\n")
def emit_pack_function(self, name, group):
name = self.gen_prefix(name)
print("static inline void\n%s_pack(__gen_user_data *data, void * restrict dst,\n%sconst struct %s * restrict values)\n{" %
(name, ' ' * (len(name) + 6), name))
# Cast dst to make header C++ friendly
print(" uint32_t * restrict dw = (uint32_t * restrict) dst;")
group.emit_pack_function(0)
print("}\n")
def emit_instruction(self):
name = self.instruction
if not self.length == None:
print('#define %-33s %6d' %
(self.gen_prefix(name + "_length"), self.length))
print('#define %-33s %6d' %
(self.gen_prefix(name + "_length_bias"), self.length_bias))
default_fields = []
for field in self.group.fields:
if not type(field) is Field:
continue
if field.default == None:
continue
default_fields.append(" .%-35s = %6d" % (field.name, field.default))
if default_fields:
print('#define %-40s\\' % (self.gen_prefix(name + '_header')))
print(", \\\n".join(default_fields))
print('')
self.emit_template_struct(self.instruction, self.group)
self.emit_pack_function(self.instruction, self.group)
def emit_register(self):
name = self.register
if not self.reg_num == None:
print('#define %-33s 0x%04x' %
(self.gen_prefix(name + "_num"), self.reg_num))
if not self.length == None:
print('#define %-33s %6d' %
(self.gen_prefix(name + "_length"), self.length))
self.emit_template_struct(self.register, self.group)
self.emit_pack_function(self.register, self.group)
def emit_struct(self):
name = self.struct
if not self.length == None:
print('#define %-33s %6d' %
(self.gen_prefix(name + "_length"), self.length))
self.emit_template_struct(self.struct, self.group)
self.emit_pack_function(self.struct, self.group)
def emit_enum(self):
print('/* enum %s */' % self.gen_prefix(self.enum))
for value in self.values:
if self.prefix:
name = self.prefix + "_" + value.name
else:
name = value.name
print('#define %-36s %6d' % (name.upper(), value.value))
print('')
def parse(self, filename):
file = open(filename, "rb")
self.parser.ParseFile(file)
file.close()
if len(sys.argv) < 2:
print("No input xml file specified")
sys.exit(1)
input_file = sys.argv[1]
p = Parser()
p.parse(input_file)

1
src/intel/isl/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/isl_format_layout.c

123
src/intel/isl/Makefile.am Normal file
View file

@ -0,0 +1,123 @@
# Copyright 2015 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
SUBDIRS = .
ISL_GEN_LIBS = \
libisl-gen7.la \
libisl-gen75.la \
libisl-gen8.la \
libisl-gen9.la \
$(NULL)
noinst_LTLIBRARIES = $(ISL_GEN_LIBS) libisl.la
EXTRA_DIST = tests
# The gallium includes are for the util/u_math.h include from main/macros.h
AM_CPPFLAGS = \
$(INTEL_CFLAGS) \
$(VALGRIND_CFLAGS) \
$(DEFINES) \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/intel \
-I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/mesa \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
-I$(top_srcdir)/src/mesa/drivers/dri/i965 \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/include \
-I$(top_builddir)/src \
-I$(top_builddir)/src/intel
libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init
libisl_la_LIBADD = $(ISL_GEN_LIBS)
libisl_la_SOURCES = \
isl.c \
isl.h \
isl_format.c \
isl_format_layout.c \
isl_gen4.c \
isl_gen4.h \
isl_gen6.c \
isl_gen6.h \
isl_storage_image.c \
$(NULL)
libisl_gen7_la_SOURCES = \
isl_gen7.c \
isl_gen7.h \
isl_surface_state.c \
$(NULL)
libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=70
libisl_gen75_la_SOURCES = \
isl_surface_state.c \
$(NULL)
libisl_gen75_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=75
libisl_gen8_la_SOURCES = \
isl_gen8.c \
isl_gen8.h \
isl_surface_state.c \
$(NULL)
libisl_gen8_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=80
libisl_gen9_la_SOURCES = \
isl_gen9.c \
isl_gen9.h \
isl_surface_state.c \
$(NULL)
libisl_gen9_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=90
BUILT_SOURCES = \
isl_format_layout.c
isl_format_layout.c: isl_format_layout_gen.bash \
isl_format_layout.csv
$(AM_V_GEN)$(srcdir)/isl_format_layout_gen.bash \
<$(srcdir)/isl_format_layout.csv >$@
# ----------------------------------------------------------------------------
# Tests
# ----------------------------------------------------------------------------
TESTS = tests/isl_surf_get_image_offset_test
check_PROGRAMS = $(TESTS)
# Link tests to lib965_compiler.la for brw_get_device_info().
tests_ldadd = \
-lm \
libisl.la \
$(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la
tests_isl_surf_get_image_offset_test_SOURCES = \
tests/isl_surf_get_image_offset_test.c
tests_isl_surf_get_image_offset_test_LDADD = $(tests_ldadd)
# ----------------------------------------------------------------------------
include $(top_srcdir)/install-lib-links.mk

113
src/intel/isl/README Normal file
View file

@ -0,0 +1,113 @@
Intel Surface Layout
Introduction
============
isl is a small library that calculates the layout of Intel GPU surfaces, queries
those layouts, and queries the properties of surface formats.
Independence from User APIs
===========================
isl's API is independent of any user-facing graphics API, such as OpenGL and
Vulkan. This independence allows isl to be used a shared component by multiple
Intel drivers.
Rather than mimic the user-facing APIs, the isl API attempts to reflect Intel
hardware: the actual memory layout of Intel GPU surfaces and how one programs
the GPU to use those surfaces. For example:
- The tokens of `enum isl_format` (such as `ISL_FORMAT_R8G8B8A8_UNORM`)
match those of the hardware enum `SURFACE_FORMAT` rather than the OpenGL
or Vulkan format tokens. And the values of `isl_format` and
`SURFACE_FORMAT` are identical.
- The OpenGL and Vulkan APIs contain depth and stencil formats. However the
hardware enum `SURFACE_FORMAT` does not, and therefore neither does `enum
isl_format`. Rather than define new pixel formats that have no hardware
counterpart, isl records the intent to use a surface as a depth or stencil
buffer with the usage flags `ISL_SURF_USAGE_DEPTH_BIT` and
`ISL_SURF_USAGE_STENCIL_BIT`.
- `struct isl_surf` distinguishes between the surface's logical dimension
from the user API's perspective (`enum isl_surf_dim`, which may be 1D, 2D,
or 3D) and the layout of those dimensions in memory (`enum isl_dim_layout`).
Surface Units
=============
Intro
-----
ISL takes care in its equations to correctly handle conversion among surface
units (such as pixels and compression blocks) and to carefully distinguish
between a surface's logical layout in the client API and its physical layout
in memory.
Symbol names often explicitly declare their unit with a suffix:
- px: logical pixels
- sa: physical surface samples
- el: physical surface elements
- sa_rows: rows of physical surface samples
- el_rows: rows of physical surface elements
Logical units are independent of hardware generation and are closely related
to the user-facing API (OpenGL and Vulkan). Physical units are dependent on
hardware generation and reflect the surface's layout in memory.
Definitions
-----------
- Logical Pixels (px):
The surface's layout from the perspective of the client API (OpenGL and
Vulkan) is in units of logical pixels. Logical pixels are independent of the
surface's layout in memory.
A surface's width and height, in units of logical pixels, is not affected by
the surface's sample count. For example, consider a VkImage created with
VkImageCreateInfo{width=w0, height=h0, samples=s0}. The surface's width and
height at level 0 is, in units of logical pixels, w0 and h0 regardless of
the value of s0.
For example, the logical array length of a 3D surface is always 1, even on
Gen9 where the surface's memory layout is that of an array surface
(ISL_DIM_LAYOUT_GEN4_2D).
- Physical Surface Samples (sa):
For a multisampled surface, this unit has the obvious meaning.
A singlesampled surface, from ISL's perspective, is simply a multisampled
surface whose sample count is 1.
For example, consider a 2D single-level non-array surface with samples=4,
width_px=64, and height_px=64 (note that the suffix 'px' indicates logical
pixels). If the surface's multisample layout is ISL_MSAA_LAYOUT_INTERLEAVED,
then the extent of level 0 is, in units of physical surface samples,
width_sa=128, height_sa=128, depth_sa=1, array_length_sa=1. If
ISL_MSAA_LAYOUT_ARRAY, then width_sa=64, height_sa=64, depth_sa=1,
array_length_sa=4.
- Physical Surface Elements (el):
This unit allows ISL to treat compressed and uncompressed formats
identically in many calculations.
If the surface's pixel format is compressed, such as ETC2, then a surface
element is equivalent to a compression block. If uncompressed, then
a surface element is equivalent to a surface sample. As a corollary, for
a given surface a surface element is at least as large as a surface sample.
Errata
------
ISL acquired the term 'surface element' from the Broadwell PRM [1], which
defines it as follows:
An element is defined as a pixel in uncompresed surface formats, and as
a compression block in compressed surface formats. For MSFMT_DEPTH_STENCIL
type multisampled surfaces, an element is a sample.
References
==========
[1]: Broadwell PRM >> Volume 2d: Command Reference: Structures >>
RENDER_SURFACE_STATE Surface Vertical Alignment (p325)

1497
src/intel/isl/isl.c Normal file

File diff suppressed because it is too large Load diff

1178
src/intel/isl/isl.h Normal file

File diff suppressed because it is too large Load diff

108
src/intel/isl/isl_format.c Normal file
View file

@ -0,0 +1,108 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include "isl.h"
bool
isl_format_has_uint_channel(enum isl_format fmt)
{
const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
return fmtl->channels.r.type == ISL_UINT ||
fmtl->channels.g.type == ISL_UINT ||
fmtl->channels.b.type == ISL_UINT ||
fmtl->channels.a.type == ISL_UINT ||
fmtl->channels.l.type == ISL_UINT ||
fmtl->channels.i.type == ISL_UINT ||
fmtl->channels.p.type == ISL_UINT;
}
bool
isl_format_has_sint_channel(enum isl_format fmt)
{
const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
return fmtl->channels.r.type == ISL_SINT ||
fmtl->channels.g.type == ISL_SINT ||
fmtl->channels.b.type == ISL_SINT ||
fmtl->channels.a.type == ISL_SINT ||
fmtl->channels.l.type == ISL_SINT ||
fmtl->channels.i.type == ISL_SINT ||
fmtl->channels.p.type == ISL_SINT;
}
enum isl_format
isl_format_rgb_to_rgba(enum isl_format rgb)
{
assert(isl_format_is_rgb(rgb));
switch (rgb) {
case ISL_FORMAT_R32G32B32_FLOAT: return ISL_FORMAT_R32G32B32A32_FLOAT;
case ISL_FORMAT_R32G32B32_SINT: return ISL_FORMAT_R32G32B32A32_SINT;
case ISL_FORMAT_R32G32B32_UINT: return ISL_FORMAT_R32G32B32A32_UINT;
case ISL_FORMAT_R32G32B32_UNORM: return ISL_FORMAT_R32G32B32A32_UNORM;
case ISL_FORMAT_R32G32B32_SNORM: return ISL_FORMAT_R32G32B32A32_SNORM;
case ISL_FORMAT_R32G32B32_SSCALED: return ISL_FORMAT_R32G32B32A32_SSCALED;
case ISL_FORMAT_R32G32B32_USCALED: return ISL_FORMAT_R32G32B32A32_USCALED;
case ISL_FORMAT_R32G32B32_SFIXED: return ISL_FORMAT_R32G32B32A32_SFIXED;
case ISL_FORMAT_R8G8B8_UNORM: return ISL_FORMAT_R8G8B8A8_UNORM;
case ISL_FORMAT_R8G8B8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM;
case ISL_FORMAT_R8G8B8_SSCALED: return ISL_FORMAT_R8G8B8A8_SSCALED;
case ISL_FORMAT_R8G8B8_USCALED: return ISL_FORMAT_R8G8B8A8_USCALED;
case ISL_FORMAT_R16G16B16_FLOAT: return ISL_FORMAT_R16G16B16A16_FLOAT;
case ISL_FORMAT_R16G16B16_UNORM: return ISL_FORMAT_R16G16B16A16_UNORM;
case ISL_FORMAT_R16G16B16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM;
case ISL_FORMAT_R16G16B16_SSCALED: return ISL_FORMAT_R16G16B16A16_SSCALED;
case ISL_FORMAT_R16G16B16_USCALED: return ISL_FORMAT_R16G16B16A16_USCALED;
case ISL_FORMAT_R8G8B8_UNORM_SRGB: return ISL_FORMAT_R8G8B8A8_UNORM_SRGB;
case ISL_FORMAT_R16G16B16_UINT: return ISL_FORMAT_R16G16B16A16_UINT;
case ISL_FORMAT_R16G16B16_SINT: return ISL_FORMAT_R16G16B16A16_SINT;
case ISL_FORMAT_R8G8B8_UINT: return ISL_FORMAT_R8G8B8A8_UINT;
case ISL_FORMAT_R8G8B8_SINT: return ISL_FORMAT_R8G8B8A8_SINT;
default:
return ISL_FORMAT_UNSUPPORTED;
}
}
enum isl_format
isl_format_rgb_to_rgbx(enum isl_format rgb)
{
assert(isl_format_is_rgb(rgb));
switch (rgb) {
case ISL_FORMAT_R32G32B32_FLOAT:
return ISL_FORMAT_R32G32B32X32_FLOAT;
case ISL_FORMAT_R16G16B16_UNORM:
return ISL_FORMAT_R16G16B16X16_UNORM;
case ISL_FORMAT_R16G16B16_FLOAT:
return ISL_FORMAT_R16G16B16X16_FLOAT;
case ISL_FORMAT_R8G8B8_UNORM:
return ISL_FORMAT_R8G8B8X8_UNORM;
case ISL_FORMAT_R8G8B8_UNORM_SRGB:
return ISL_FORMAT_R8G8B8X8_UNORM_SRGB;
default:
return ISL_FORMAT_UNSUPPORTED;
}
}

View file

@ -0,0 +1,287 @@
# Copyright 2015 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
# @file
# @brief Layout of all hardware surface formats
#
# For the official list, see Broadwell PRM: Volume 2b: Command Reference:
# Enumerations: SURFACE_FORMAT.
#
# Columns:
# name: format name in PRM
# bpb: bits per block
# bw: block width, in pixels
# bh: block height, in pixels
# bd: block depth, in pixels
# r: red channel, data type and bitwidth
# g: green channel
# b: blue channel
# a: alpha channel
# l: luminance channel
# i: intensity channel
# p: palette channel
# space: colorspace
# txc: texture compression
#
# Data Types:
# x: void
# r: raw
# un: unorm
# sn: snorm
# uf: ufloat
# sf: sfloat
# ux: ufixed
# sx: sfixed
# ui: uint
# si: sint
# us: uscaled
# ss: sscaled
# Table is aligned with the Vim commands below, using the Align plugin:
# :AlignCtrl lr+ p8000000000000P1
# /^# name/,$ Align,
# name , bpb, bw, bh, bd, r, g, b, a, l, i, p, space, txc
R32G32B32A32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, sf32, , , , linear,
R32G32B32A32_SINT , 128, 1, 1, 1, si32, si32, si32, si32, , , , linear,
R32G32B32A32_UINT , 128, 1, 1, 1, ui32, ui32, ui32, ui32, , , , linear,
R32G32B32A32_UNORM , 128, 1, 1, 1, un32, un32, un32, un32, , , , linear,
R32G32B32A32_SNORM , 128, 1, 1, 1, sn32, sn32, sn32, sn32, , , , linear,
R64G64_FLOAT , 128, 1, 1, 1, sf64, sf64, , , , , , linear,
R32G32B32X32_FLOAT , 128, 1, 1, 1, sf32, sf32, sf32, x32, , , , linear,
R32G32B32A32_SSCALED , 128, 1, 1, 1, ss32, ss32, ss32, ss32, , , , linear,
R32G32B32A32_USCALED , 128, 1, 1, 1, us32, us32, us32, us32, , , , linear,
R32G32B32A32_SFIXED , 128, 1, 1, 1, sx32, sx32, sx32, sx32, , , , linear,
R64G64_PASSTHRU , 128, 1, 1, 1, r64, r64, , , , , , ,
R32G32B32_FLOAT , 96, 1, 1, 1, sf32, sf32, sf32, , , , , linear,
R32G32B32_SINT , 96, 1, 1, 1, si32, si32, si32, , , , , linear,
R32G32B32_UINT , 96, 1, 1, 1, ui32, ui32, ui32, , , , , linear,
R32G32B32_UNORM , 96, 1, 1, 1, un32, un32, un32, , , , , linear,
R32G32B32_SNORM , 96, 1, 1, 1, sn32, sn32, sn32, , , , , linear,
R32G32B32_SSCALED , 96, 1, 1, 1, ss32, ss32, ss32, , , , , linear,
R32G32B32_USCALED , 96, 1, 1, 1, us32, us32, us32, , , , , linear,
R32G32B32_SFIXED , 96, 1, 1, 1, sx32, sx32, sx32, , , , , linear,
R16G16B16A16_UNORM , 64, 1, 1, 1, un16, un16, un16, un16, , , , linear,
R16G16B16A16_SNORM , 64, 1, 1, 1, sn16, sn16, sn16, sn16, , , , linear,
R16G16B16A16_SINT , 64, 1, 1, 1, si16, si16, si16, si16, , , , linear,
R16G16B16A16_UINT , 64, 1, 1, 1, ui16, ui16, ui16, ui16, , , , linear,
R16G16B16A16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, sf16, , , , linear,
R32G32_FLOAT , 64, 1, 1, 1, sf32, sf32, , , , , , linear,
R32G32_SINT , 64, 1, 1, 1, si32, si32, , , , , , linear,
R32G32_UINT , 64, 1, 1, 1, ui32, ui32, , , , , , linear,
R32_FLOAT_X8X24_TYPELESS , 64, 1, 1, 1, sf32, x8, x24, , , , , linear,
X32_TYPELESS_G8X24_UINT , 64, 1, 1, 1, x32, ui8, x24, , , , , linear,
L32A32_FLOAT , 64, 1, 1, 1, , , , sf32, sf32, , , linear,
R32G32_UNORM , 64, 1, 1, 1, un32, un32, , , , , , linear,
R32G32_SNORM , 64, 1, 1, 1, sn32, sn32, , , , , , linear,
R64_FLOAT , 64, 1, 1, 1, sf64, , , , , , , linear,
R16G16B16X16_UNORM , 64, 1, 1, 1, un16, un16, un16, x16, , , , linear,
R16G16B16X16_FLOAT , 64, 1, 1, 1, sf16, sf16, sf16, x16, , , , linear,
A32X32_FLOAT , 64, 1, 1, 1, , , , sf32, x32, , , alpha,
L32X32_FLOAT , 64, 1, 1, 1, , , , x32, sf32, , , linear,
I32X32_FLOAT , 64, 1, 1, 1, , , , x32, , sf32, , linear,
R16G16B16A16_SSCALED , 64, 1, 1, 1, ss16, ss16, ss16, ss16, , , , linear,
R16G16B16A16_USCALED , 64, 1, 1, 1, us16, us16, us16, us16, , , , linear,
R32G32_SSCALED , 64, 1, 1, 1, ss32, ss32, , , , , , linear,
R32G32_USCALED , 64, 1, 1, 1, us32, us32, , , , , , linear,
R32G32_SFIXED , 64, 1, 1, 1, sx32, sx32, , , , , , linear,
R64_PASSTHRU , 64, 1, 1, 1, r64, , , , , , , ,
B8G8R8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear,
B8G8R8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb,
R10G10B10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear,
R10G10B10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb,
R10G10B10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear,
R10G10B10_SNORM_A2_UNORM , 32, 1, 1, 1, sn10, sn10, sn10, un2, , , , linear,
R8G8B8A8_UNORM , 32, 1, 1, 1, un8, un8, un8, un8, , , , linear,
R8G8B8A8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, un8, , , , srgb,
R8G8B8A8_SNORM , 32, 1, 1, 1, sn8, sn8, sn8, sn8, , , , linear,
R8G8B8A8_SINT , 32, 1, 1, 1, si8, si8, si8, si8, , , , linear,
R8G8B8A8_UINT , 32, 1, 1, 1, ui8, ui8, ui8, ui8, , , , linear,
R16G16_UNORM , 32, 1, 1, 1, un16, un16, , , , , , linear,
R16G16_SNORM , 32, 1, 1, 1, sn16, sn16, , , , , , linear,
R16G16_SINT , 32, 1, 1, 1, si16, si16, , , , , , linear,
R16G16_UINT , 32, 1, 1, 1, ui16, ui16, , , , , , linear,
R16G16_FLOAT , 32, 1, 1, 1, sf16, sf16, , , , , , linear,
B10G10R10A2_UNORM , 32, 1, 1, 1, un10, un10, un10, un2, , , , linear,
B10G10R10A2_UNORM_SRGB , 32, 1, 1, 1, un10, un10, un10, un2, , , , srgb,
R11G11B10_FLOAT , 32, 1, 1, 1, uf11, uf11, uf10, , , , , linear,
R32_SINT , 32, 1, 1, 1, si32, , , , , , , linear,
R32_UINT , 32, 1, 1, 1, ui32, , , , , , , linear,
R32_FLOAT , 32, 1, 1, 1, sf32, , , , , , , linear,
R24_UNORM_X8_TYPELESS , 32, 1, 1, 1, un24, x8, , , , , , linear,
X24_TYPELESS_G8_UINT , 32, 1, 1, 1, x24, ui8, , , , , , linear,
L32_UNORM , 32, 1, 1, 1, , , , , un32, , , linear,
A32_UNORM , 32, 1, 1, 1, , , , un32, , , , alpha,
L16A16_UNORM , 32, 1, 1, 1, , , , un16, un16, , , linear,
I24X8_UNORM , 32, 1, 1, 1, , , , x8, , un24, , linear,
L24X8_UNORM , 32, 1, 1, 1, , , , x8, un24, , , linear,
A24X8_UNORM , 32, 1, 1, 1, , , , un24, x8, , , alpha,
I32_FLOAT , 32, 1, 1, 1, , , , , , sf32, , linear,
L32_FLOAT , 32, 1, 1, 1, , , , , sf32, , , linear,
A32_FLOAT , 32, 1, 1, 1, , , , sf32, , , , alpha,
X8B8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear,
A8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, x8, un8, , , , linear,
B8X8_UNORM_G8R8_SNORM , 32, 1, 1, 1, sn8, sn8, un8, x8, , , , linear,
B8G8R8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear,
B8G8R8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb,
R8G8B8X8_UNORM , 32, 1, 1, 1, un8, un8, un8, x8, , , , linear,
R8G8B8X8_UNORM_SRGB , 32, 1, 1, 1, un8, un8, un8, x8, , , , srgb,
R9G9B9E5_SHAREDEXP , 32, 1, 1, 1, ui9, ui9, ui9, , , , , linear,
B10G10R10X2_UNORM , 32, 1, 1, 1, un10, un10, un10, x2, , , , linear,
L16A16_FLOAT , 32, 1, 1, 1, , , , sf16, sf16, , , linear,
R32_UNORM , 32, 1, 1, 1, un32, , , , , , , linear,
R32_SNORM , 32, 1, 1, 1, sn32, , , , , , , linear,
R10G10B10X2_USCALED , 32, 1, 1, 1, us10, us10, us10, x2, , , , linear,
R8G8B8A8_SSCALED , 32, 1, 1, 1, ss8, ss8, ss8, ss8, , , , linear,
R8G8B8A8_USCALED , 32, 1, 1, 1, us8, us8, us8, us8, , , , linear,
R16G16_SSCALED , 32, 1, 1, 1, ss16, ss6, , , , , , linear,
R16G16_USCALED , 32, 1, 1, 1, us16, us16, , , , , , linear,
R32_SSCALED , 32, 1, 1, 1, ss32, , , , , , , linear,
R32_USCALED , 32, 1, 1, 1, us32, , , , , , , linear,
B5G6R5_UNORM , 16, 1, 1, 1, un5, un6, un5, , , , , linear,
B5G6R5_UNORM_SRGB , 16, 1, 1, 1, un5, un6, un5, , , , , srgb,
B5G5R5A1_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear,
B5G5R5A1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, un1, , , , srgb,
B4G4R4A4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear,
B4G4R4A4_UNORM_SRGB , 16, 1, 1, 1, un4, un4, un4, un4, , , , srgb,
R8G8_UNORM , 16, 1, 1, 1, un8, un8, , , , , , linear,
R8G8_SNORM , 16, 1, 1, 1, sn8, sn8, , , , , , linear,
R8G8_SINT , 16, 1, 1, 1, si8, si8, , , , , , linear,
R8G8_UINT , 16, 1, 1, 1, ui8, ui8, , , , , , linear,
R16_UNORM , 16, 1, 1, 1, un16, , , , , , , linear,
R16_SNORM , 16, 1, 1, 1, sn16, , , , , , , linear,
R16_SINT , 16, 1, 1, 1, si16, , , , , , , linear,
R16_UINT , 16, 1, 1, 1, ui16, , , , , , , linear,
R16_FLOAT , 16, 1, 1, 1, sf16, , , , , , , linear,
A8P8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear,
A8P8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear,
I16_UNORM , 16, 1, 1, 1, , , , , , un16, , linear,
L16_UNORM , 16, 1, 1, 1, , , , , un16, , , linear,
A16_UNORM , 16, 1, 1, 1, , , , un16, , , , alpha,
L8A8_UNORM , 16, 1, 1, 1, , , , un8, un8, , , linear,
I16_FLOAT , 16, 1, 1, 1, , , , , , sf16, , linear,
L16_FLOAT , 16, 1, 1, 1, , , , , sf16, , , linear,
A16_FLOAT , 16, 1, 1, 1, , , , sf16, , , , alpha,
L8A8_UNORM_SRGB , 16, 1, 1, 1, , , , un8, un8, , , srgb,
R5G5_SNORM_B6_UNORM , 16, 1, 1, 1, sn5, sn5, un6, , , , , linear,
B5G5R5X1_UNORM , 16, 1, 1, 1, un5, un5, un5, x1, , , , linear,
B5G5R5X1_UNORM_SRGB , 16, 1, 1, 1, un5, un5, un5, x1, , , , srgb,
R8G8_SSCALED , 16, 1, 1, 1, ss8, ss8, , , , , , linear,
R8G8_USCALED , 16, 1, 1, 1, us8, us8, , , , , , linear,
R16_SSCALED , 16, 1, 1, 1, ss16, , , , , , , linear,
R16_USCALED , 16, 1, 1, 1, us16, , , , , , , linear,
P8A8_UNORM_PALETTE0 , 16, 1, 1, 1, , , , un8, , , un8, linear,
P8A8_UNORM_PALETTE1 , 16, 1, 1, 1, , , , un8, , , un8, linear,
A1B5G5R5_UNORM , 16, 1, 1, 1, un5, un5, un5, un1, , , , linear,
A4B4G4R4_UNORM , 16, 1, 1, 1, un4, un4, un4, un4, , , , linear,
L8A8_UINT , 16, 1, 1, 1, , , , ui8, ui8, , , linear,
L8A8_SINT , 16, 1, 1, 1, , , , si8, si8, , , linear,
R8_UNORM , 8, 1, 1, 1, un8, , , , , , , linear,
R8_SNORM , 8, 1, 1, 1, sn8, , , , , , , linear,
R8_SINT , 8, 1, 1, 1, si8, , , , , , , linear,
R8_UINT , 8, 1, 1, 1, ui8, , , , , , , linear,
A8_UNORM , 8, 1, 1, 1, , , , un8, , , , alpha,
I8_UNORM , 8, 1, 1, 1, , , , , , un8, , linear,
L8_UNORM , 8, 1, 1, 1, , , , , un8, , , linear,
P4A4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear,
A4P4_UNORM_PALETTE0 , 8, 1, 1, 1, , , , un4, , , un4, linear,
R8_SSCALED , 8, 1, 1, 1, ss8, , , , , , , linear,
R8_USCALED , 8, 1, 1, 1, us8, , , , , , , linear,
P8_UNORM_PALETTE0 , 8, 1, 1, 1, , , , , , , un8, linear,
L8_UNORM_SRGB , 8, 1, 1, 1, , , , , un8, , , linear,
P8_UNORM_PALETTE1 , 8, 1, 1, 1, , , , , , , un8, linear,
P4A4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear,
A4P4_UNORM_PALETTE1 , 8, 1, 1, 1, , , , un4, , , un4, linear,
Y8_UNORM , 0, 0, 0, 0, , , , , , , , yuv,
L8_UINT , 8, 1, 1, 1, , , , , ui8, , , linear,
L8_SINT , 8, 1, 1, 1, , , , , si8, , , linear,
I8_UINT , 8, 1, 1, 1, , , , , , ui8, , linear,
I8_SINT , 8, 1, 1, 1, , , , , , si8, , linear,
DXT1_RGB_SRGB , 64, 4, 4, 1, un4, un4, un4, , , , , srgb, dxt1
R1_UNORM , 1, 1, 1, 1, un1, , , , , , , linear,
YCRCB_NORMAL , 0, 0, 0, 0, , , , , , , , yuv,
YCRCB_SWAPUVY , 0, 0, 0, 0, , , , , , , , yuv,
P2_UNORM_PALETTE0 , 2, 1, 1, 1, , , , , , , un2, linear,
P2_UNORM_PALETTE1 , 2, 1, 1, 1, , , , , , , un2, linear,
BC1_UNORM , 64, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt1
BC2_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt3
BC3_UNORM , 128, 4, 4, 1, un4, un4, un4, un4, , , , linear, dxt5
BC4_UNORM , 64, 4, 4, 1, un8, , , , , , , linear, rgtc1
BC5_UNORM , 128, 4, 4, 1, un8, un8, , , , , , linear, rgtc2
BC1_UNORM_SRGB , 64, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt1
BC2_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt3
BC3_UNORM_SRGB , 128, 4, 4, 1, un4, un4, un4, un4, , , , srgb, dxt5
MONO8 , 1, 1, 1, 1, , , , , , , , ,
YCRCB_SWAPUV , 0, 0, 0, 0, , , , , , , , yuv,
YCRCB_SWAPY , 0, 0, 0, 0, , , , , , , , yuv,
DXT1_RGB , 64, 4, 4, 1, un4, un4, un4, , , , , linear, dxt1
FXT1 , 128, 8, 4, 1, un4, un4, un4, , , , , linear, fxt1
R8G8B8_UNORM , 24, 1, 1, 1, un8, un8, un8, , , , , linear,
R8G8B8_SNORM , 24, 1, 1, 1, sn8, sn8, sn8, , , , , linear,
R8G8B8_SSCALED , 24, 1, 1, 1, ss8, ss8, ss8, , , , , linear,
R8G8B8_USCALED , 24, 1, 1, 1, us8, us8, us8, , , , , linear,
R64G64B64A64_FLOAT , 256, 1, 1, 1, sf64, sf64, sf64, sf64, , , , linear,
R64G64B64_FLOAT , 196, 1, 1, 1, sf64, sf64, sf64, , , , , linear,
BC4_SNORM , 64, 4, 4, 1, sn8, , , , , , , linear, rgtc1
BC5_SNORM , 128, 4, 4, 1, sn8, sn8, , , , , , linear, rgtc2
R16G16B16_FLOAT , 48, 1, 1, 1, sf16, sf16, sf16, , , , , linear,
R16G16B16_UNORM , 48, 1, 1, 1, un16, un16, un16, , , , , linear,
R16G16B16_SNORM , 48, 1, 1, 1, sn16, sn16, sn16, , , , , linear,
R16G16B16_SSCALED , 48, 1, 1, 1, ss16, ss16, ss16, , , , , linear,
R16G16B16_USCALED , 48, 1, 1, 1, us16, us16, us16, , , , , linear,
BC6H_SF16 , 128, 4, 4, 1, sf16, sf16, sf16, , , , , linear, bptc
BC7_UNORM , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, bptc
BC7_UNORM_SRGB , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, bptc
BC6H_UF16 , 128, 4, 4, 1, uf16, uf16, uf16, , , , , linear, bptc
PLANAR_420_8 , 0, 0, 0, 0, , , , , , , , yuv,
R8G8B8_UNORM_SRGB , 24, 1, 1, 1, un8, un8, un8, , , , , srgb,
ETC1_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc1
ETC2_RGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , linear, etc2
EAC_R11 , 64, 4, 4, 1, un11, , , , , , , linear, etc2
EAC_RG11 , 128, 4, 4, 1, un11, un11, , , , , , linear, etc2
EAC_SIGNED_R11 , 64, 4, 4, 1, sn11, , , , , , , linear, etc2
EAC_SIGNED_RG11 , 128, 4, 4, 1, sn11, sn11, , , , , , linear, etc2
ETC2_SRGB8 , 64, 4, 4, 1, un8, un8, un8, , , , , srgb, etc2
R16G16B16_UINT , 48, 1, 1, 1, ui16, ui16, ui16, , , , , linear,
R16G16B16_SINT , 48, 1, 1, 1, si16, si16, si16, , , , , linear,
R32_SFIXED , 32, 1, 1, 1, sx16, , , , , , , linear,
R10G10B10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear,
R10G10B10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear,
R10G10B10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear,
R10G10B10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear,
B10G10R10A2_SNORM , 32, 1, 1, 1, sn10, sn10, sn10, sn2, , , , linear,
B10G10R10A2_USCALED , 32, 1, 1, 1, us10, us10, us10, us2, , , , linear,
B10G10R10A2_SSCALED , 32, 1, 1, 1, ss10, ss10, ss10, ss2, , , , linear,
B10G10R10A2_UINT , 32, 1, 1, 1, ui10, ui10, ui10, ui2, , , , linear,
B10G10R10A2_SINT , 32, 1, 1, 1, si10, si10, si10, si2, , , , linear,
R64G64B64A64_PASSTHRU , 256, 1, 1, 1, r64, r64, r64, r64, , , , ,
R64G64B64_PASSTHRU , 192, 1, 1, 1, r64, r64, r64, , , , , ,
ETC2_RGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , linear, etc2
ETC2_SRGB8_PTA , 64, 4, 4, 1, un8, un8, un8, un1, , , , srgb, etc2
ETC2_EAC_RGBA8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , linear, etc2
ETC2_EAC_SRGB8_A8 , 128, 4, 4, 1, un8, un8, un8, un8, , , , srgb, etc2
R8G8B8_UINT , 24, 1, 1, 1, ui8, ui8, ui8, , , , , linear,
R8G8B8_SINT , 24, 1, 1, 1, si8, si8, si8, , , , , linear,
RAW , 0, 0, 0, 0, , , , , , , , ,
Can't render this file because it contains an unexpected character in line 4 and column 65.

View file

@ -0,0 +1,128 @@
#!/usr/bin/env bash
#
# Copyright 2015 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
set -eu
set -o pipefail
cat <<'EOF'
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "isl.h"
const struct isl_format_layout
isl_format_layouts[] = {
EOF
sed -r '
# Delete comment lines and empty lines
/^[[:space:]]*#/d
/^[[:space:]]*$/d
# Delete spaces
s/[[:space:]]//g
# Translate formats
s/^([A-Za-z0-9_]+),*/ISL_FORMAT_\1,/
# Translate data type of channels
s/\<x([0-9]+),/ISL_VOID@\1,/g
s/\<r([0-9]+),/ISL_RAW@\1,/g
s/\<un([0-9]+),/ISL_UNORM@\1,/g
s/\<sn([0-9]+),/ISL_SNORM@\1,/g
s/\<uf([0-9]+),/ISL_UFLOAT@\1,/g
s/\<sf([0-9]+),/ISL_SFLOAT@\1,/g
s/\<ux([0-9]+),/ISL_UFIXED@\1,/g
s/\<sx([0-9]+),/ISL_SFIXED@\1,/g
s/\<ui([0-9]+),/ISL_UINT@\1,/g
s/\<si([0-9]+),/ISL_SINT@\1,/g
s/\<us([0-9]+),/ISL_USCALED@\1,/g
s/\<ss([0-9]+),/ISL_SSCALED@\1,/g
# Translate colorspaces
# Interpret alpha-only formats as having no colorspace.
s/\<(linear|srgb|yuv)\>/ISL_COLORSPACE_\1/
s/\<alpha\>//
# Translate texture compression
s/\<(dxt|fxt|rgtc|bptc|etc)([0-9]*)\>/ISL_TXC_\1\2/
' |
tr 'a-z' 'A-Z' | # Convert to uppersace
while IFS=, read -r format bpb bw bh bd \
red green blue alpha \
luminance intensity palette \
colorspace txc
do
: ${colorspace:=ISL_COLORSPACE_NONE}
: ${txc:=ISL_TXC_NONE}
cat <<EOF
[$format] = {
$format,
.bs = $((bpb/8)),
.bw = $bw, .bh = $bh, .bd = $bd,
.channels = {
.r = { $red },
.g = { $green },
.b = { $blue },
.a = { $alpha },
.l = { $luminance },
.i = { $intensity },
.p = { $palette },
},
.colorspace = $colorspace,
.txc = $txc,
},
EOF
done |
sed -r '
# Collapse empty channels
s/\{ \}/{}/
# Split non-empty channels into two members: base type and bit size
s/@/, /
'
# Terminate the table
printf '};\n'

74
src/intel/isl/isl_gen4.c Normal file
View file

@ -0,0 +1,74 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "isl_gen4.h"
#include "isl_priv.h"
bool
gen4_choose_msaa_layout(const struct isl_device *dev,
const struct isl_surf_init_info *info,
enum isl_tiling tiling,
enum isl_msaa_layout *msaa_layout)
{
/* Gen4 and Gen5 do not support MSAA */
assert(info->samples >= 1);
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
return true;
}
void
gen4_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el)
{
assert(info->samples == 1);
assert(msaa_layout == ISL_MSAA_LAYOUT_NONE);
assert(!isl_tiling_is_std_y(tiling));
/* Note that neither the surface's horizontal nor vertical image alignment
* is programmable on gen4 nor gen5.
*
* From the G35 PRM (2008-01), Volume 1 Graphics Core, Section 6.17.3.4
* Alignment Unit Size:
*
* Note that the compressed formats are padded to a full compression
* cell.
*
* +------------------------+--------+--------+
* | format | halign | valign |
* +------------------------+--------+--------+
* | YUV 4:2:2 formats | 4 | 2 |
* | uncompressed formats | 4 | 2 |
* +------------------------+--------+--------+
*/
if (isl_format_is_compressed(info->format)) {
*image_align_el = isl_extent3d(1, 1, 1);
return;
}
*image_align_el = isl_extent3d(4, 2, 1);
}

47
src/intel/isl/isl_gen4.h Normal file
View file

@ -0,0 +1,47 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include "isl_priv.h"
#ifdef __cplusplus
extern "C" {
#endif
bool
gen4_choose_msaa_layout(const struct isl_device *dev,
const struct isl_surf_init_info *info,
enum isl_tiling tiling,
enum isl_msaa_layout *msaa_layout);
void
gen4_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el);
#ifdef __cplusplus
}
#endif

160
src/intel/isl/isl_gen6.c Normal file
View file

@ -0,0 +1,160 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "isl_gen6.h"
#include "isl_priv.h"
bool
gen6_choose_msaa_layout(const struct isl_device *dev,
const struct isl_surf_init_info *info,
enum isl_tiling tiling,
enum isl_msaa_layout *msaa_layout)
{
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
assert(ISL_DEV_GEN(dev) == 6);
assert(info->samples >= 1);
if (info->samples == 1) {
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
return false;
}
/* From the Sandybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Surface
* Format:
*
* If Number of Multisamples is set to a value other than
* MULTISAMPLECOUNT_1, this field cannot be set to the following
* formats:
*
* - any format with greater than 64 bits per element
* - any compressed texture format (BC*)
* - any YCRCB* format
*/
if (fmtl->bs > 8)
return false;
if (isl_format_is_compressed(info->format))
return false;
if (isl_format_is_yuv(info->format))
return false;
/* From the Sandybridge PRM, Volume 4 Part 1 p85, SURFACE_STATE, Number of
* Multisamples:
*
* If this field is any value other than MULTISAMPLECOUNT_1 the
* following restrictions apply:
*
* - the Surface Type must be SURFTYPE_2D
* - [...]
*/
if (info->dim != ISL_SURF_DIM_2D)
return false;
/* More obvious restrictions */
if (isl_surf_usage_is_display(info->usage))
return false;
if (tiling == ISL_TILING_LINEAR)
return false;
if (info->levels > 1)
return false;
*msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;
return true;
}
void
gen6_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el)
{
/* Note that the surface's horizontal image alignment is not programmable
* on Sandybridge.
*
* From the Sandybridge PRM (2011-05), Volume 1, Part 1, Section 7.18.3.4
* Alignment Unit Size:
*
* Note that the compressed formats are padded to a full compression cell.
*
* +------------------------+--------+--------+
* | format | halign | valign |
* +------------------------+--------+--------+
* | YUV 4:2:2 formats | 4 | * |
* | uncompressed formats | 4 | * |
* +------------------------+--------+--------+
*
* * For these formats, the vertical alignment factor j is determined
* as follows:
* - j = 4 for any depth buffer
* - j = 2 for separate stencil buffer
* - j = 4 for any render target surface is multisampled (4x)
* - j = 2 for all other render target surface
*
* From the Sandrybridge PRM (2011-05), Volume 4, Part 1, Section 2.11.2
* SURFACE_STATE, Surface Vertical Alignment:
*
* - This field must be set to VALIGN_2 if the Surface Format is 96 bits
* per element (BPE).
*
* - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL
* (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY
* (0x190)
*/
if (isl_format_is_compressed(info->format)) {
*image_align_el = isl_extent3d(1, 1, 1);
return;
}
if (isl_format_is_yuv(info->format)) {
*image_align_el = isl_extent3d(4, 2, 1);
return;
}
if (info->samples > 1) {
*image_align_el = isl_extent3d(4, 4, 1);
return;
}
if (isl_surf_usage_is_depth_or_stencil(info->usage) &&
!ISL_DEV_USE_SEPARATE_STENCIL(dev)) {
/* interleaved depthstencil buffer */
*image_align_el = isl_extent3d(4, 4, 1);
return;
}
if (isl_surf_usage_is_depth(info->usage)) {
/* separate depth buffer */
*image_align_el = isl_extent3d(4, 4, 1);
return;
}
if (isl_surf_usage_is_stencil(info->usage)) {
/* separate stencil buffer */
*image_align_el = isl_extent3d(4, 2, 1);
return;
}
*image_align_el = isl_extent3d(4, 2, 1);
}

47
src/intel/isl/isl_gen6.h Normal file
View file

@ -0,0 +1,47 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include "isl_priv.h"
#ifdef __cplusplus
extern "C" {
#endif
bool
gen6_choose_msaa_layout(const struct isl_device *dev,
const struct isl_surf_init_info *info,
enum isl_tiling tiling,
enum isl_msaa_layout *msaa_layout);
void
gen6_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el);
#ifdef __cplusplus
}
#endif

395
src/intel/isl/isl_gen7.c Normal file
View file

@ -0,0 +1,395 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "isl_gen7.h"
#include "isl_priv.h"
bool
gen7_choose_msaa_layout(const struct isl_device *dev,
const struct isl_surf_init_info *info,
enum isl_tiling tiling,
enum isl_msaa_layout *msaa_layout)
{
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
bool require_array = false;
bool require_interleaved = false;
assert(ISL_DEV_GEN(dev) == 7);
assert(info->samples >= 1);
if (info->samples == 1) {
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
return true;
}
/* From the Ivybridge PRM, Volume 4 Part 1 p63, SURFACE_STATE, Surface
* Format:
*
* If Number of Multisamples is set to a value other than
* MULTISAMPLECOUNT_1, this field cannot be set to the following
* formats: any format with greater than 64 bits per element, any
* compressed texture format (BC*), and any YCRCB* format.
*/
if (fmtl->bs > 8)
return false;
if (isl_format_is_compressed(info->format))
return false;
if (isl_format_is_yuv(info->format))
return false;
/* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of
* Multisamples:
*
* - If this field is any value other than MULTISAMPLECOUNT_1, the
* Surface Type must be SURFTYPE_2D.
*
* - If this field is any value other than MULTISAMPLECOUNT_1, Surface
* Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero
*/
if (info->dim != ISL_SURF_DIM_2D)
return false;
if (info->levels > 1)
return false;
/* The Ivyrbridge PRM insists twice that signed integer formats cannot be
* multisampled.
*
* From the Ivybridge PRM, Volume 4 Part 1 p73, SURFACE_STATE, Number of
* Multisamples:
*
* - This field must be set to MULTISAMPLECOUNT_1 for SINT MSRTs when
* all RT channels are not written.
*
* And errata from the Ivybridge PRM, Volume 4 Part 1 p77,
* RENDER_SURFACE_STATE, MCS Enable:
*
* This field must be set to 0 [MULTISAMPLECOUNT_1] for all SINT MSRTs
* when all RT channels are not written.
*
* Note that the above SINT restrictions apply only to *MSRTs* (that is,
* *multisampled* render targets). The restrictions seem to permit an MCS
* if the render target is singlesampled.
*/
if (isl_format_has_sint_channel(info->format))
return false;
/* More obvious restrictions */
if (isl_surf_usage_is_display(info->usage))
return false;
if (tiling == ISL_TILING_LINEAR)
return false;
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
* Suface Storage Format:
*
* +---------------------+----------------------------------------------------------------+
* | MSFMT_MSS | Multsampled surface was/is rendered as a render target |
* | MSFMT_DEPTH_STENCIL | Multisampled surface was rendered as a depth or stencil buffer |
* +---------------------+----------------------------------------------------------------+
*
* In the table above, MSFMT_MSS refers to ISL_MSAA_LAYOUT_ARRAY, and
* MSFMT_DEPTH_STENCIL refers to ISL_MSAA_LAYOUT_INTERLEAVED.
*/
if (isl_surf_usage_is_depth_or_stencil(info->usage))
require_interleaved = true;
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
* Suface Storage Format:
*
* If the surfaces Number of Multisamples is MULTISAMPLECOUNT_8, Width
* is >= 8192 (meaning the actual surface width is >= 8193 pixels), this
* field must be set to MSFMT_MSS.
*/
if (info->samples == 8 && info->width == 8192)
require_array = true;
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
* Suface Storage Format:
*
* If the surfaces Number of Multisamples is MULTISAMPLECOUNT_8,
* ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surfaces Number
* of Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is
* > 8,388,608, this field must be set to MSFMT_DEPTH_STENCIL.
*/
if ((info->samples == 8 && info->height > 4194304u) ||
(info->samples == 4 && info->height > 8388608u))
require_interleaved = true;
/* From the Ivybridge PRM, Volume 4 Part 1 p72, SURFACE_STATE, Multisampled
* Suface Storage Format:
*
* This field must be set to MSFMT_DEPTH_STENCIL if Surface Format is
* one of the following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or
* R24_UNORM_X8_TYPELESS.
*/
if (info->format == ISL_FORMAT_I24X8_UNORM ||
info->format == ISL_FORMAT_L24X8_UNORM ||
info->format == ISL_FORMAT_A24X8_UNORM ||
info->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS)
require_interleaved = true;
if (require_array && require_interleaved)
return false;
if (require_interleaved) {
*msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;
return true;
}
/* Default to the array layout because it permits multisample
* compression.
*/
*msaa_layout = ISL_MSAA_LAYOUT_ARRAY;
return true;
}
static bool
gen7_format_needs_valign2(const struct isl_device *dev,
enum isl_format format)
{
/* This workaround applies only to gen7 */
if (ISL_DEV_GEN(dev) > 7)
return false;
/* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1,
* RENDER_SURFACE_STATE Surface Vertical Alignment:
*
* - Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL
* (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY
* (0x190)
*
* - VALIGN_4 is not supported for surface format R32G32B32_FLOAT.
*/
return isl_format_is_yuv(format) ||
format == ISL_FORMAT_R32G32B32_FLOAT;
}
/**
* @brief Filter out tiling flags that are incompatible with the surface.
*
* The resultant outgoing @a flags is a subset of the incoming @a flags. The
* outgoing flags may be empty (0x0) if the incoming flags were too
* restrictive.
*
* For example, if the surface will be used for a display
* (ISL_SURF_USAGE_DISPLAY_BIT), then this function filters out all tiling
* flags except ISL_TILING_X_BIT and ISL_TILING_LINEAR_BIT.
*/
void
gen7_filter_tiling(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
isl_tiling_flags_t *flags)
{
/* IVB+ requires separate stencil */
assert(ISL_DEV_USE_SEPARATE_STENCIL(dev));
/* Clear flags unsupported on this hardware */
if (ISL_DEV_GEN(dev) < 9) {
*flags &= ~ISL_TILING_Yf_BIT;
*flags &= ~ISL_TILING_Ys_BIT;
}
/* And... clear the Yf and Ys bits anyway because Anvil doesn't support
* them yet.
*/
*flags &= ~ISL_TILING_Yf_BIT; /* FINISHME[SKL]: Support Yf */
*flags &= ~ISL_TILING_Ys_BIT; /* FINISHME[SKL]: Support Ys */
if (isl_surf_usage_is_depth(info->usage)) {
/* Depth requires Y. */
*flags &= ISL_TILING_ANY_Y_MASK;
}
/* Separate stencil requires W tiling, and W tiling requires separate
* stencil.
*/
if (isl_surf_usage_is_stencil(info->usage)) {
*flags &= ISL_TILING_W_BIT;
} else {
*flags &= ~ISL_TILING_W_BIT;
}
if (info->usage & (ISL_SURF_USAGE_DISPLAY_ROTATE_90_BIT |
ISL_SURF_USAGE_DISPLAY_ROTATE_180_BIT |
ISL_SURF_USAGE_DISPLAY_ROTATE_270_BIT)) {
assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT);
isl_finishme("%s:%s: handle rotated display surfaces",
__FILE__, __func__);
}
if (info->usage & (ISL_SURF_USAGE_DISPLAY_FLIP_X_BIT |
ISL_SURF_USAGE_DISPLAY_FLIP_Y_BIT)) {
assert(*flags & ISL_SURF_USAGE_DISPLAY_BIT);
isl_finishme("%s:%s: handle flipped display surfaces",
__FILE__, __func__);
}
if (info->usage & ISL_SURF_USAGE_DISPLAY_BIT) {
/* Before Skylake, the display engine does not accept Y */
/* FINISHME[SKL]: Y tiling for display surfaces */
*flags &= (ISL_TILING_LINEAR_BIT | ISL_TILING_X_BIT);
}
if (info->samples > 1) {
/* From the Sandybridge PRM, Volume 4 Part 1, SURFACE_STATE Tiled
* Surface:
*
* For multisample render targets, this field must be 1 (true). MSRTs
* can only be tiled.
*
* Multisample surfaces never require X tiling, and Y tiling generally
* performs better than X. So choose Y. (Unless it's stencil, then it
* must be W).
*/
*flags &= (ISL_TILING_ANY_Y_MASK | ISL_TILING_W_BIT);
}
/* workaround */
if (ISL_DEV_GEN(dev) == 7 &&
gen7_format_needs_valign2(dev, info->format) &&
(info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) &&
info->samples == 1) {
/* Y tiling is illegal. From the Ivybridge PRM, Vol4 Part1 2.12.2.1,
* SURFACE_STATE Surface Vertical Alignment:
*
* This field must be set to VALIGN_4 for all tiled Y Render Target
* surfaces.
*/
*flags &= ~ISL_TILING_Y0_BIT;
}
}
/**
* Choose horizontal subimage alignment, in units of surface elements.
*/
static uint32_t
gen7_choose_halign_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info)
{
if (isl_format_is_compressed(info->format))
return 1;
/* From the Ivybridge PRM (2012-05-31), Volume 4, Part 1, Section 2.12.1,
* RENDER_SURFACE_STATE Surface Hoizontal Alignment:
*
* - This field is intended to be set to HALIGN_8 only if the surface
* was rendered as a depth buffer with Z16 format or a stencil buffer,
* since these surfaces support only alignment of 8. Use of HALIGN_8
* for other surfaces is supported, but uses more memory.
*/
if (isl_surf_info_is_z16(info) ||
isl_surf_usage_is_stencil(info->usage))
return 8;
return 4;
}
/**
* Choose vertical subimage alignment, in units of surface elements.
*/
static uint32_t
gen7_choose_valign_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling)
{
bool require_valign2 = false;
bool require_valign4 = false;
if (isl_format_is_compressed(info->format))
return 1;
if (gen7_format_needs_valign2(dev, info->format))
require_valign2 = true;
/* From the Ivybridge PRM, Volume 4, Part 1, Section 2.12.1:
* RENDER_SURFACE_STATE Surface Vertical Alignment:
*
* - This field is intended to be set to VALIGN_4 if the surface was
* rendered as a depth buffer, for a multisampled (4x) render target,
* or for a multisampled (8x) render target, since these surfaces
* support only alignment of 4. Use of VALIGN_4 for other surfaces is
* supported, but uses more memory. This field must be set to
* VALIGN_4 for all tiled Y Render Target surfaces.
*
*/
if (isl_surf_usage_is_depth(info->usage) ||
info->samples > 1 ||
tiling == ISL_TILING_Y0) {
require_valign4 = true;
}
if (isl_surf_usage_is_stencil(info->usage)) {
/* The Ivybridge PRM states that the stencil buffer's vertical alignment
* is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment
* Unit Size]. However, valign=8 is outside the set of valid values of
* RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2
* (0x0) and VALIGN_4 (0x1).
*
* The PRM is generally confused about the width, height, and alignment
* of the stencil buffer; and this confusion appears elsewhere. For
* example, the following PRM text effectively converts the stencil
* buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM,
* Volume 1, Part 1, Section
* 6.18.4.2 Base Address and LOD Calculation]:
*
* For separate stencil buffer, the width must be mutiplied by 2 and
* height divided by 2 as follows:
*
* w_L = 2*i*ceil(W_L/i)
* h_L = 1/2*j*ceil(H_L/j)
*
* The root of the confusion is that, in W tiling, each pair of rows is
* interleaved into one.
*
* FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API
* is more polished.
*/
require_valign4 = true;
}
assert(!require_valign2 || !require_valign4);
if (require_valign4)
return 4;
/* Prefer VALIGN_2 because it conserves memory. */
return 2;
}
void
gen7_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el)
{
/* IVB+ does not support combined depthstencil. */
assert(!isl_surf_usage_is_depth_and_stencil(info->usage));
*image_align_el = (struct isl_extent3d) {
.w = gen7_choose_halign_el(dev, info),
.h = gen7_choose_valign_el(dev, info, tiling),
.d = 1,
};
}

52
src/intel/isl/isl_gen7.h Normal file
View file

@ -0,0 +1,52 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include "isl_priv.h"
#ifdef __cplusplus
extern "C" {
#endif
void
gen7_filter_tiling(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
isl_tiling_flags_t *flags);
bool
gen7_choose_msaa_layout(const struct isl_device *dev,
const struct isl_surf_init_info *info,
enum isl_tiling tiling,
enum isl_msaa_layout *msaa_layout);
void
gen7_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el);
#ifdef __cplusplus
}
#endif

229
src/intel/isl/isl_gen8.c Normal file
View file

@ -0,0 +1,229 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "isl_gen8.h"
#include "isl_priv.h"
bool
gen8_choose_msaa_layout(const struct isl_device *dev,
const struct isl_surf_init_info *info,
enum isl_tiling tiling,
enum isl_msaa_layout *msaa_layout)
{
bool require_array = false;
bool require_interleaved = false;
assert(info->samples >= 1);
if (info->samples == 1) {
*msaa_layout = ISL_MSAA_LAYOUT_NONE;
return true;
}
/* From the Broadwell PRM >> Volume2d: Command Structures >>
* RENDER_SURFACE_STATE Tile Mode:
*
* - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
* must be YMAJOR.
*
* As usual, though, stencil is special.
*/
if (!isl_tiling_is_any_y(tiling) && !isl_surf_usage_is_stencil(info->usage))
return false;
/* From the Broadwell PRM >> Volume2d: Command Structures >>
* RENDER_SURFACE_STATE Multisampled Surface Storage Format:
*
* All multisampled render target surfaces must have this field set to
* MSFMT_MSS
*/
if (info->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)
require_array = true;
/* From the Broadwell PRM >> Volume2d: Command Structures >>
* RENDER_SURFACE_STATE Number of Multisamples:
*
* - If this field is any value other than MULTISAMPLECOUNT_1, the
* Surface Type must be SURFTYPE_2D This field must be set to
* MULTISAMPLECOUNT_1 unless the surface is a Sampling Engine surface
* or Render Target surface.
*
* - If this field is any value other than MULTISAMPLECOUNT_1, Surface
* Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero.
*/
if (info->dim != ISL_SURF_DIM_2D)
return false;
if (info->levels > 1)
return false;
/* More obvious restrictions */
if (isl_surf_usage_is_display(info->usage))
return false;
if (isl_format_is_compressed(info->format))
return false;
if (isl_format_is_yuv(info->format))
return false;
if (isl_surf_usage_is_depth_or_stencil(info->usage))
require_interleaved = true;
if (require_array && require_interleaved)
return false;
if (require_interleaved) {
*msaa_layout = ISL_MSAA_LAYOUT_INTERLEAVED;
return true;
}
*msaa_layout = ISL_MSAA_LAYOUT_ARRAY;
return true;
}
/**
* Choose horizontal subimage alignment, in units of surface elements.
*/
static uint32_t
gen8_choose_halign_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info)
{
if (isl_format_is_compressed(info->format))
return 1;
/* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
* RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
*
* - This field is intended to be set to HALIGN_8 only if the surface
* was rendered as a depth buffer with Z16 format or a stencil buffer.
* In this case it must be set to HALIGN_8 since these surfaces
* support only alignment of 8. [...]
*/
if (isl_surf_info_is_z16(info))
return 8;
if (isl_surf_usage_is_stencil(info->usage))
return 8;
/* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
* RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
*
* [...] For Z32 formats it must be set to HALIGN_4.
*/
if (isl_surf_usage_is_depth(info->usage))
return 4;
if (!(info->usage & ISL_SURF_USAGE_DISABLE_AUX_BIT)) {
/* From the Broadwell PRM, Volume 2d "Command Reference: Structures",
* RENDER_SURFACE_STATE Surface Horizontal Alignment, p326:
*
* - When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E,
* HALIGN 16 must be used.
*
* This case handles color surfaces that may own an auxiliary MCS, CCS_D,
* or CCS_E. Depth buffers, including those that own an auxiliary HiZ
* surface, are handled above and do not require HALIGN_16.
*/
assert(!isl_surf_usage_is_depth(info->usage));
return 16;
}
/* XXX(chadv): I believe the hardware requires each image to be
* cache-aligned. If that's true, then defaulting to halign=4 is wrong for
* many formats. Depending on the format's block size, we may need to
* increase halign to 8.
*/
return 4;
}
/**
* Choose vertical subimage alignment, in units of surface elements.
*/
static uint32_t
gen8_choose_valign_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info)
{
/* From the Broadwell PRM > Volume 2d: Command Reference: Structures
* > RENDER_SURFACE_STATE Surface Vertical Alignment (p325):
*
* - For Sampling Engine and Render Target Surfaces: This field
* specifies the vertical alignment requirement in elements for the
* surface. [...] An element is defined as a pixel in uncompresed
* surface formats, and as a compression block in compressed surface
* formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
* element is a sample.
*
* - This field is intended to be set to VALIGN_4 if the surface was
* rendered as a depth buffer, for a multisampled (4x) render target,
* or for a multisampled (8x) render target, since these surfaces
* support only alignment of 4. Use of VALIGN_4 for other surfaces is
* supported, but increases memory usage.
*
* - This field is intended to be set to VALIGN_8 only if the surface
* was rendered as a stencil buffer, since stencil buffer surfaces
* support only alignment of 8. If set to VALIGN_8, Surface Format
* must be R8_UINT.
*/
if (isl_format_is_compressed(info->format))
return 1;
if (isl_surf_usage_is_stencil(info->usage))
return 8;
return 4;
}
void
gen8_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el)
{
assert(!isl_tiling_is_std_y(tiling));
/* The below text from the Broadwell PRM provides some insight into the
* hardware's requirements for LOD alignment. From the Broadwell PRM >>
* Volume 5: Memory Views >> Surface Layout >> 2D Surfaces:
*
* These [2D surfaces] must adhere to the following memory organization
* rules:
*
* - For non-compressed texture formats, each mipmap must start on an
* even row within the monolithic rectangular area. For
* 1-texel-high mipmaps, this may require a row of padding below
* the previous mipmap. This restriction does not apply to any
* compressed texture formats; each subsequent (lower-res)
* compressed mipmap is positioned directly below the previous
* mipmap.
*
* - Vertical alignment restrictions vary with memory tiling type:
* 1 DWord for linear, 16-byte (DQWord) for tiled. (Note that tiled
* mipmaps are not required to start at the left edge of a tile
* row.)
*/
*image_align_el = (struct isl_extent3d) {
.w = gen8_choose_halign_el(dev, info),
.h = gen8_choose_valign_el(dev, info),
.d = 1,
};
}

47
src/intel/isl/isl_gen8.h Normal file
View file

@ -0,0 +1,47 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include "isl_priv.h"
#ifdef __cplusplus
extern "C" {
#endif
bool
gen8_choose_msaa_layout(const struct isl_device *dev,
const struct isl_surf_init_info *info,
enum isl_tiling tiling,
enum isl_msaa_layout *msaa_layout);
void
gen8_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el);
#ifdef __cplusplus
}
#endif

185
src/intel/isl/isl_gen9.c Normal file
View file

@ -0,0 +1,185 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "isl_gen8.h"
#include "isl_gen9.h"
#include "isl_priv.h"
/**
* Calculate the surface's subimage alignment, in units of surface samples,
* for the standard tiling formats Yf and Ys.
*/
static void
gen9_calc_std_image_alignment_sa(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *align_sa)
{
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
assert(isl_tiling_is_std_y(tiling));
const uint32_t bs = fmtl->bs;
const uint32_t is_Ys = tiling == ISL_TILING_Ys;
switch (info->dim) {
case ISL_SURF_DIM_1D:
/* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface
* Layout and Tiling > 1D Surfaces > 1D Alignment Requirements.
*/
*align_sa = (struct isl_extent3d) {
.w = 1 << (12 - (ffs(bs) - 1) + (4 * is_Ys)),
.h = 1,
.d = 1,
};
return;
case ISL_SURF_DIM_2D:
/* See the Skylake BSpec > Memory Views > Common Surface Formats >
* Surface Layout and Tiling > 2D Surfaces > 2D/CUBE Alignment
* Requirements.
*/
*align_sa = (struct isl_extent3d) {
.w = 1 << (6 - ((ffs(bs) - 1) / 2) + (4 * is_Ys)),
.h = 1 << (6 - ((ffs(bs) - 0) / 2) + (4 * is_Ys)),
.d = 1,
};
if (is_Ys) {
/* FINISHME(chadv): I don't trust this code. Untested. */
isl_finishme("%s:%s: [SKL+] multisample TileYs", __FILE__, __func__);
switch (msaa_layout) {
case ISL_MSAA_LAYOUT_NONE:
case ISL_MSAA_LAYOUT_INTERLEAVED:
break;
case ISL_MSAA_LAYOUT_ARRAY:
align_sa->w >>= (ffs(info->samples) - 0) / 2;
align_sa->h >>= (ffs(info->samples) - 1) / 2;
break;
}
}
return;
case ISL_SURF_DIM_3D:
/* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface
* Layout and Tiling > 1D Surfaces > 1D Alignment Requirements.
*/
*align_sa = (struct isl_extent3d) {
.w = 1 << (4 - ((ffs(bs) + 1) / 3) + (4 * is_Ys)),
.h = 1 << (4 - ((ffs(bs) - 1) / 3) + (2 * is_Ys)),
.d = 1 << (4 - ((ffs(bs) - 0) / 3) + (2 * is_Ys)),
};
return;
}
unreachable("bad isl_surface_type");
}
void
gen9_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el)
{
/* This BSpec text provides some insight into the hardware's alignment
* requirements [Skylake BSpec > Memory Views > Common Surface Formats >
* Surface Layout and Tiling > 2D Surfaces]:
*
* An LOD must be aligned to a cache-line except for some special cases
* related to Planar YUV surfaces. In general, the cache-alignment
* restriction implies there is a minimum height for an LOD of 4 texels.
* So, LODs which are smaller than 4 high are padded.
*
* From the Skylake BSpec, RENDER_SURFACE_STATE Surface Vertical Alignment:
*
* - For Sampling Engine and Render Target Surfaces: This field
* specifies the vertical alignment requirement in elements for the
* surface. [...] An element is defined as a pixel in uncompresed
* surface formats, and as a compression block in compressed surface
* formats. For MSFMT_DEPTH_STENCIL type multisampled surfaces, an
* element is a sample.
*
* - This field is used for 2D, CUBE, and 3D surface alignment when Tiled
* Resource Mode is TRMODE_NONE (Tiled Resource Mode is disabled).
* This field is ignored for 1D surfaces and also when Tiled Resource
* Mode is not TRMODE_NONE (e.g. Tiled Resource Mode is enabled).
*
* See the appropriate Alignment table in the "Surface Layout and
* Tiling" section under Common Surface Formats for the table of
* alignment values for Tiled Resrouces.
*
* - For uncompressed surfaces, the units of "j" are rows of pixels on
* the physical surface. For compressed texture formats, the units of
* "j" are in compression blocks, thus each increment in "j" is equal
* to h pixels, where h is the height of the compression block in
* pixels.
*
* - Valid Values: VALIGN_4, VALIGN_8, VALIGN_16
*
* From the Skylake BSpec, RENDER_SURFACE_STATE Surface Horizontal
* Alignment:
*
* - For uncompressed surfaces, the units of "i" are pixels on the
* physical surface. For compressed texture formats, the units of "i"
* are in compression blocks, thus each increment in "i" is equal to
* w pixels, where w is the width of the compression block in pixels.
*
* - Valid Values: HALIGN_4, HALIGN_8, HALIGN_16
*/
if (isl_tiling_is_std_y(tiling)) {
struct isl_extent3d image_align_sa;
gen9_calc_std_image_alignment_sa(dev, info, tiling, msaa_layout,
&image_align_sa);
*image_align_el = isl_extent3d_sa_to_el(info->format, image_align_sa);
return;
}
if (info->dim == ISL_SURF_DIM_1D) {
/* See the Skylake BSpec > Memory Views > Common Surface Formats > Surface
* Layout and Tiling > 1D Surfaces > 1D Alignment Requirements.
*/
*image_align_el = isl_extent3d(64, 1, 1);
return;
}
if (isl_format_is_compressed(info->format)) {
/* On Gen9, the meaning of RENDER_SURFACE_STATE's
* SurfaceHorizontalAlignment and SurfaceVerticalAlignment changed for
* compressed formats. They now indicate a multiple of the compression
* block. For example, if the compression mode is ETC2 then HALIGN_4
* indicates a horizontal alignment of 16 pixels.
*
* To avoid wasting memory, choose the smallest alignment possible:
* HALIGN_4 and VALIGN_4.
*/
*image_align_el = isl_extent3d(4, 4, 1);
return;
}
gen8_choose_image_alignment_el(dev, info, tiling, msaa_layout,
image_align_el);
}

41
src/intel/isl/isl_gen9.h Normal file
View file

@ -0,0 +1,41 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include "isl_priv.h"
#ifdef __cplusplus
extern "C" {
#endif
void
gen9_choose_image_alignment_el(const struct isl_device *dev,
const struct isl_surf_init_info *restrict info,
enum isl_tiling tiling,
enum isl_msaa_layout msaa_layout,
struct isl_extent3d *image_align_el);
#ifdef __cplusplus
}
#endif

170
src/intel/isl/isl_priv.h Normal file
View file

@ -0,0 +1,170 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include <assert.h>
#include "brw_device_info.h"
#include "util/macros.h"
#include "isl.h"
#define isl_finishme(format, ...) \
__isl_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__)
void PRINTFLIKE(3, 4) UNUSED
__isl_finishme(const char *file, int line, const char *fmt, ...);
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
static inline uint32_t
ffs(uint32_t n) {
return __builtin_ffs(n);
}
static inline bool
isl_is_pow2(uintmax_t n)
{
return !(n & (n - 1));
}
/**
* Alignment must be a power of 2.
*/
static inline bool
isl_is_aligned(uintmax_t n, uintmax_t a)
{
assert(isl_is_pow2(a));
return (n & (a - 1)) == 0;
}
/**
* Alignment must be a power of 2.
*/
static inline uintmax_t
isl_align(uintmax_t n, uintmax_t a)
{
assert(a != 0 && isl_is_pow2(a));
return (n + a - 1) & ~(a - 1);
}
static inline uintmax_t
isl_align_npot(uintmax_t n, uintmax_t a)
{
assert(a > 0);
return ((n + a - 1) / a) * a;
}
/**
* Alignment must be a power of 2.
*/
static inline uintmax_t
isl_align_div(uintmax_t n, uintmax_t a)
{
return isl_align(n, a) / a;
}
static inline uintmax_t
isl_align_div_npot(uintmax_t n, uintmax_t a)
{
return isl_align_npot(n, a) / a;
}
/**
* Log base 2, rounding towards zero.
*/
static inline uint32_t
isl_log2u(uint32_t n)
{
assert(n != 0);
return 31 - __builtin_clz(n);
}
static inline uint32_t
isl_minify(uint32_t n, uint32_t levels)
{
if (unlikely(n == 0))
return 0;
else
return MAX(n >> levels, 1);
}
static inline struct isl_extent3d
isl_extent3d_sa_to_el(enum isl_format fmt, struct isl_extent3d extent_sa)
{
const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
assert(extent_sa.w % fmtl->bw == 0);
assert(extent_sa.h % fmtl->bh == 0);
assert(extent_sa.d % fmtl->bd == 0);
return (struct isl_extent3d) {
.w = extent_sa.w / fmtl->bw,
.h = extent_sa.h / fmtl->bh,
.d = extent_sa.d / fmtl->bd,
};
}
static inline struct isl_extent3d
isl_extent3d_el_to_sa(enum isl_format fmt, struct isl_extent3d extent_el)
{
const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
return (struct isl_extent3d) {
.w = extent_el.w * fmtl->bw,
.h = extent_el.h * fmtl->bh,
.d = extent_el.d * fmtl->bd,
};
}
void
isl_gen7_surf_fill_state_s(const struct isl_device *dev, void *state,
const struct isl_surf_fill_state_info *restrict info);
void
isl_gen75_surf_fill_state_s(const struct isl_device *dev, void *state,
const struct isl_surf_fill_state_info *restrict info);
void
isl_gen8_surf_fill_state_s(const struct isl_device *dev, void *state,
const struct isl_surf_fill_state_info *restrict info);
void
isl_gen9_surf_fill_state_s(const struct isl_device *dev, void *state,
const struct isl_surf_fill_state_info *restrict info);
void
isl_gen7_buffer_fill_state_s(void *state,
const struct isl_buffer_fill_state_info *restrict info);
void
isl_gen75_buffer_fill_state_s(void *state,
const struct isl_buffer_fill_state_info *restrict info);
void
isl_gen8_buffer_fill_state_s(void *state,
const struct isl_buffer_fill_state_info *restrict info);
void
isl_gen9_buffer_fill_state_s(void *state,
const struct isl_buffer_fill_state_info *restrict info);

View file

@ -0,0 +1,293 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "isl_priv.h"
#include "brw_compiler.h"
bool
isl_is_storage_image_format(enum isl_format format)
{
/* XXX: Maybe we should put this in the CSV? */
switch (format) {
case ISL_FORMAT_R32G32B32A32_UINT:
case ISL_FORMAT_R32G32B32A32_SINT:
case ISL_FORMAT_R32G32B32A32_FLOAT:
case ISL_FORMAT_R32_UINT:
case ISL_FORMAT_R32_SINT:
case ISL_FORMAT_R32_FLOAT:
case ISL_FORMAT_R16G16B16A16_UINT:
case ISL_FORMAT_R16G16B16A16_SINT:
case ISL_FORMAT_R16G16B16A16_FLOAT:
case ISL_FORMAT_R32G32_UINT:
case ISL_FORMAT_R32G32_SINT:
case ISL_FORMAT_R32G32_FLOAT:
case ISL_FORMAT_R8G8B8A8_UINT:
case ISL_FORMAT_R8G8B8A8_SINT:
case ISL_FORMAT_R16G16_UINT:
case ISL_FORMAT_R16G16_SINT:
case ISL_FORMAT_R16G16_FLOAT:
case ISL_FORMAT_R8G8_UINT:
case ISL_FORMAT_R8G8_SINT:
case ISL_FORMAT_R16_UINT:
case ISL_FORMAT_R16_FLOAT:
case ISL_FORMAT_R16_SINT:
case ISL_FORMAT_R8_UINT:
case ISL_FORMAT_R8_SINT:
case ISL_FORMAT_R10G10B10A2_UINT:
case ISL_FORMAT_R10G10B10A2_UNORM:
case ISL_FORMAT_R11G11B10_FLOAT:
case ISL_FORMAT_R16G16B16A16_UNORM:
case ISL_FORMAT_R16G16B16A16_SNORM:
case ISL_FORMAT_R8G8B8A8_UNORM:
case ISL_FORMAT_R8G8B8A8_SNORM:
case ISL_FORMAT_R16G16_UNORM:
case ISL_FORMAT_R16G16_SNORM:
case ISL_FORMAT_R8G8_UNORM:
case ISL_FORMAT_R8G8_SNORM:
case ISL_FORMAT_R16_UNORM:
case ISL_FORMAT_R16_SNORM:
case ISL_FORMAT_R8_UNORM:
case ISL_FORMAT_R8_SNORM:
return true;
default:
return false;
}
}
enum isl_format
isl_lower_storage_image_format(const struct isl_device *dev,
enum isl_format format)
{
switch (format) {
/* These are never lowered. Up to BDW we'll have to fall back to untyped
* surface access for 128bpp formats.
*/
case ISL_FORMAT_R32G32B32A32_UINT:
case ISL_FORMAT_R32G32B32A32_SINT:
case ISL_FORMAT_R32G32B32A32_FLOAT:
case ISL_FORMAT_R32_UINT:
case ISL_FORMAT_R32_SINT:
case ISL_FORMAT_R32_FLOAT:
return format;
/* From HSW to BDW the only 64bpp format supported for typed access is
* RGBA_UINT16. IVB falls back to untyped.
*/
case ISL_FORMAT_R16G16B16A16_UINT:
case ISL_FORMAT_R16G16B16A16_SINT:
case ISL_FORMAT_R16G16B16A16_FLOAT:
case ISL_FORMAT_R32G32_UINT:
case ISL_FORMAT_R32G32_SINT:
case ISL_FORMAT_R32G32_FLOAT:
return (ISL_DEV_GEN(dev) >= 9 ? format :
ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
ISL_FORMAT_R16G16B16A16_UINT :
ISL_FORMAT_R32G32_UINT);
/* Up to BDW no SINT or FLOAT formats of less than 32 bits per component
* are supported. IVB doesn't support formats with more than one component
* for typed access. For 8 and 16 bpp formats IVB relies on the
* undocumented behavior that typed reads from R_UINT8 and R_UINT16
* surfaces actually do a 32-bit misaligned read. The alternative would be
* to use two surface state entries with different formats for each image,
* one for reading (using R_UINT32) and another one for writing (using
* R_UINT8 or R_UINT16), but that would complicate the shaders we generate
* even more.
*/
case ISL_FORMAT_R8G8B8A8_UINT:
case ISL_FORMAT_R8G8B8A8_SINT:
return (ISL_DEV_GEN(dev) >= 9 ? format :
ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT);
case ISL_FORMAT_R16G16_UINT:
case ISL_FORMAT_R16G16_SINT:
case ISL_FORMAT_R16G16_FLOAT:
return (ISL_DEV_GEN(dev) >= 9 ? format :
ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT);
case ISL_FORMAT_R8G8_UINT:
case ISL_FORMAT_R8G8_SINT:
return (ISL_DEV_GEN(dev) >= 9 ? format :
ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT);
case ISL_FORMAT_R16_UINT:
case ISL_FORMAT_R16_FLOAT:
case ISL_FORMAT_R16_SINT:
return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R16_UINT);
case ISL_FORMAT_R8_UINT:
case ISL_FORMAT_R8_SINT:
return (ISL_DEV_GEN(dev) >= 9 ? format : ISL_FORMAT_R8_UINT);
/* Neither the 2/10/10/10 nor the 11/11/10 packed formats are supported
* by the hardware.
*/
case ISL_FORMAT_R10G10B10A2_UINT:
case ISL_FORMAT_R10G10B10A2_UNORM:
case ISL_FORMAT_R11G11B10_FLOAT:
return ISL_FORMAT_R32_UINT;
/* No normalized fixed-point formats are supported by the hardware. */
case ISL_FORMAT_R16G16B16A16_UNORM:
case ISL_FORMAT_R16G16B16A16_SNORM:
return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
ISL_FORMAT_R16G16B16A16_UINT :
ISL_FORMAT_R32G32_UINT);
case ISL_FORMAT_R8G8B8A8_UNORM:
case ISL_FORMAT_R8G8B8A8_SNORM:
return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT);
case ISL_FORMAT_R16G16_UNORM:
case ISL_FORMAT_R16G16_SNORM:
return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT);
case ISL_FORMAT_R8G8_UNORM:
case ISL_FORMAT_R8G8_SNORM:
return (ISL_DEV_GEN(dev) >= 8 || dev->info->is_haswell ?
ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT);
case ISL_FORMAT_R16_UNORM:
case ISL_FORMAT_R16_SNORM:
return ISL_FORMAT_R16_UINT;
case ISL_FORMAT_R8_UNORM:
case ISL_FORMAT_R8_SNORM:
return ISL_FORMAT_R8_UINT;
default:
assert(!"Unknown image format");
return ISL_FORMAT_UNSUPPORTED;
}
}
static const struct brw_image_param image_param_defaults = {
/* Set the swizzling shifts to all-ones to effectively disable
* swizzling -- See emit_address_calculation() in
* brw_fs_surface_builder.cpp for a more detailed explanation of
* these parameters.
*/
.swizzling = { 0xff, 0xff },
};
void
isl_surf_fill_image_param(const struct isl_device *dev,
struct brw_image_param *param,
const struct isl_surf *surf,
const struct isl_view *view)
{
*param = image_param_defaults;
param->size[0] = isl_minify(surf->logical_level0_px.w, view->base_level);
param->size[1] = isl_minify(surf->logical_level0_px.h, view->base_level);
if (surf->dim == ISL_SURF_DIM_3D) {
param->size[2] = isl_minify(surf->logical_level0_px.d, view->base_level);
} else {
param->size[2] = surf->logical_level0_px.array_len -
view->base_array_layer;
}
isl_surf_get_image_offset_el(surf, view->base_level, view->base_array_layer,
0, &param->offset[0], &param->offset[1]);
const int cpp = isl_format_get_layout(surf->format)->bs;
param->stride[0] = cpp;
param->stride[1] = surf->row_pitch / cpp;
const struct isl_extent3d image_align_sa =
isl_surf_get_image_alignment_sa(surf);
if (ISL_DEV_GEN(dev) < 9 && surf->dim == ISL_SURF_DIM_3D) {
param->stride[2] = isl_align_npot(param->size[0], image_align_sa.w);
param->stride[3] = isl_align_npot(param->size[1], image_align_sa.h);
} else {
param->stride[2] = 0;
param->stride[3] = isl_surf_get_array_pitch_el_rows(surf);
}
switch (surf->tiling) {
case ISL_TILING_LINEAR:
/* image_param_defaults is good enough */
break;
case ISL_TILING_X:
/* An X tile is a rectangular block of 512x8 bytes. */
param->tiling[0] = isl_log2u(512 / cpp);
param->tiling[1] = isl_log2u(8);
if (dev->has_bit6_swizzling) {
/* Right shifts required to swizzle bits 9 and 10 of the memory
* address with bit 6.
*/
param->swizzling[0] = 3;
param->swizzling[1] = 4;
}
break;
case ISL_TILING_Y0:
/* The layout of a Y-tiled surface in memory isn't really fundamentally
* different to the layout of an X-tiled surface, we simply pretend that
* the surface is broken up in a number of smaller 16Bx32 tiles, each
* one arranged in X-major order just like is the case for X-tiling.
*/
param->tiling[0] = isl_log2u(16 / cpp);
param->tiling[1] = isl_log2u(32);
if (dev->has_bit6_swizzling) {
/* Right shift required to swizzle bit 9 of the memory address with
* bit 6.
*/
param->swizzling[0] = 3;
param->swizzling[1] = 0xff;
}
break;
default:
assert(!"Unhandled storage image tiling");
}
/* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
* address calculation algorithm (emit_address_calculation() in
* brw_fs_surface_builder.cpp) handles this as a sort of tiling with
* modulus equal to the LOD.
*/
param->tiling[2] = (ISL_DEV_GEN(dev) < 9 && surf->dim == ISL_SURF_DIM_3D ?
view->base_level : 0);
}
void
isl_buffer_fill_image_param(const struct isl_device *dev,
struct brw_image_param *param,
enum isl_format format,
uint64_t size)
{
*param = image_param_defaults;
param->stride[0] = isl_format_layouts[format].bs;
param->size[0] = size / param->stride[0];
}

View file

@ -0,0 +1,480 @@
/*
* Copyright 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdint.h>
#define __gen_address_type uint64_t
#define __gen_user_data void
static inline uint64_t
__gen_combine_address(void *data, void *loc, uint64_t addr, uint32_t delta)
{
return addr + delta;
}
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "isl_priv.h"
#define __PASTE2(x, y) x ## y
#define __PASTE(x, y) __PASTE2(x, y)
#define isl_genX(x) __PASTE(isl_, genX(x))
#if GEN_GEN >= 8
static const uint8_t isl_to_gen_halign[] = {
[4] = HALIGN4,
[8] = HALIGN8,
[16] = HALIGN16,
};
static const uint8_t isl_to_gen_valign[] = {
[4] = VALIGN4,
[8] = VALIGN8,
[16] = VALIGN16,
};
#else
static const uint8_t isl_to_gen_halign[] = {
[4] = HALIGN_4,
[8] = HALIGN_8,
};
static const uint8_t isl_to_gen_valign[] = {
[2] = VALIGN_2,
[4] = VALIGN_4,
};
#endif
#if GEN_GEN >= 8
static const uint8_t isl_to_gen_tiling[] = {
[ISL_TILING_LINEAR] = LINEAR,
[ISL_TILING_X] = XMAJOR,
[ISL_TILING_Y0] = YMAJOR,
[ISL_TILING_Yf] = YMAJOR,
[ISL_TILING_Ys] = YMAJOR,
[ISL_TILING_W] = WMAJOR,
};
#endif
#if GEN_GEN >= 8
static const uint32_t isl_to_gen_multisample_layout[] = {
[ISL_MSAA_LAYOUT_NONE] = MSS,
[ISL_MSAA_LAYOUT_INTERLEAVED] = DEPTH_STENCIL,
[ISL_MSAA_LAYOUT_ARRAY] = MSS,
};
#else
static const uint32_t isl_to_gen_multisample_layout[] = {
[ISL_MSAA_LAYOUT_NONE] = MSFMT_MSS,
[ISL_MSAA_LAYOUT_INTERLEAVED] = MSFMT_DEPTH_STENCIL,
[ISL_MSAA_LAYOUT_ARRAY] = MSFMT_MSS,
};
#endif
static const uint8_t
get_surftype(enum isl_surf_dim dim, isl_surf_usage_flags_t usage)
{
switch (dim) {
default:
unreachable("bad isl_surf_dim");
case ISL_SURF_DIM_1D:
assert(!(usage & ISL_SURF_USAGE_CUBE_BIT));
return SURFTYPE_1D;
case ISL_SURF_DIM_2D:
if (usage & ISL_SURF_USAGE_STORAGE_BIT) {
/* Storage images are always plain 2-D, not cube */
return SURFTYPE_2D;
} else if (usage & ISL_SURF_USAGE_CUBE_BIT) {
return SURFTYPE_CUBE;
} else {
return SURFTYPE_2D;
}
case ISL_SURF_DIM_3D:
assert(!(usage & ISL_SURF_USAGE_CUBE_BIT));
return SURFTYPE_3D;
}
}
/**
* Get the values to pack into RENDER_SUFFACE_STATE.SurfaceHorizontalAlignment
* and SurfaceVerticalAlignment.
*/
static void
get_halign_valign(const struct isl_surf *surf,
uint32_t *halign, uint32_t *valign)
{
if (GEN_GEN >= 9) {
if (isl_tiling_is_std_y(surf->tiling) ||
surf->dim_layout == ISL_DIM_LAYOUT_GEN9_1D) {
/* The hardware ignores the alignment values. Anyway, the surface's
* true alignment is likely outside the enum range of HALIGN* and
* VALIGN*.
*/
*halign = 0;
*valign = 0;
} else {
/* In Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in units
* of surface elements (not pixels nor samples). For compressed formats,
* a "surface element" is defined as a compression block. For example,
* if SurfaceVerticalAlignment is VALIGN_4 and SurfaceFormat is an ETC2
* format (ETC2 has a block height of 4), then the vertical alignment is
* 4 compression blocks or, equivalently, 16 pixels.
*/
struct isl_extent3d image_align_el
= isl_surf_get_image_alignment_el(surf);
*halign = isl_to_gen_halign[image_align_el.width];
*valign = isl_to_gen_valign[image_align_el.height];
}
} else {
/* Pre-Skylake, RENDER_SUFFACE_STATE.SurfaceVerticalAlignment is in
* units of surface samples. For example, if SurfaceVerticalAlignment
* is VALIGN_4 and the surface is singlesampled, then for any surface
* format (compressed or not) the vertical alignment is
* 4 pixels.
*/
struct isl_extent3d image_align_sa
= isl_surf_get_image_alignment_sa(surf);
*halign = isl_to_gen_halign[image_align_sa.width];
*valign = isl_to_gen_valign[image_align_sa.height];
}
}
#if GEN_GEN >= 8
static uint32_t
get_qpitch(const struct isl_surf *surf)
{
switch (surf->dim) {
default:
assert(!"Bad isl_surf_dim");
case ISL_SURF_DIM_1D:
if (GEN_GEN >= 9) {
/* QPitch is usually expressed as rows of surface elements (where
* a surface element is an compression block or a single surface
* sample). Skylake 1D is an outlier.
*
* From the Skylake BSpec >> Memory Views >> Common Surface
* Formats >> Surface Layout and Tiling >> 1D Surfaces:
*
* Surface QPitch specifies the distance in pixels between array
* slices.
*/
return isl_surf_get_array_pitch_el(surf);
} else {
return isl_surf_get_array_pitch_el_rows(surf);
}
case ISL_SURF_DIM_2D:
case ISL_SURF_DIM_3D:
if (GEN_GEN >= 9) {
return isl_surf_get_array_pitch_el_rows(surf);
} else {
/* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch
*
* "This field must be set to an integer multiple of the Surface
* Vertical Alignment. For compressed textures (BC*, FXT1,
* ETC*, and EAC* Surface Formats), this field is in units of
* rows in the uncompressed surface, and must be set to an
* integer multiple of the vertical alignment parameter "j"
* defined in the Common Surface Formats section."
*/
return isl_surf_get_array_pitch_sa_rows(surf);
}
}
}
#endif /* GEN_GEN >= 8 */
void
isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state,
const struct isl_surf_fill_state_info *restrict info)
{
uint32_t halign, valign;
get_halign_valign(info->surf, &halign, &valign);
struct GENX(RENDER_SURFACE_STATE) s = {
.SurfaceType = get_surftype(info->surf->dim, info->view->usage),
.SurfaceArray = info->surf->phys_level0_sa.array_len > 1,
.SurfaceVerticalAlignment = valign,
.SurfaceHorizontalAlignment = halign,
#if GEN_GEN >= 8
.TileMode = isl_to_gen_tiling[info->surf->tiling],
#else
.TiledSurface = info->surf->tiling != ISL_TILING_LINEAR,
.TileWalk = info->surf->tiling == ISL_TILING_X ? TILEWALK_XMAJOR :
TILEWALK_YMAJOR,
#endif
.VerticalLineStride = 0,
.VerticalLineStrideOffset = 0,
#if (GEN_GEN == 7)
.SurfaceArraySpacing = info->surf->array_pitch_span ==
ISL_ARRAY_PITCH_SPAN_COMPACT,
#endif
#if GEN_GEN >= 8
.SamplerL2BypassModeDisable = true,
#endif
#if GEN_GEN >= 8
.RenderCacheReadWriteMode = WriteOnlyCache,
#else
.RenderCacheReadWriteMode = 0,
#endif
#if GEN_GEN >= 8
.CubeFaceEnablePositiveZ = 1,
.CubeFaceEnableNegativeZ = 1,
.CubeFaceEnablePositiveY = 1,
.CubeFaceEnableNegativeY = 1,
.CubeFaceEnablePositiveX = 1,
.CubeFaceEnableNegativeX = 1,
#else
.CubeFaceEnables = 0x3f,
#endif
#if GEN_GEN >= 8
.SurfaceQPitch = get_qpitch(info->surf) >> 2,
#endif
.Width = info->surf->logical_level0_px.width - 1,
.Height = info->surf->logical_level0_px.height - 1,
.Depth = 0, /* TEMPLATE */
.RenderTargetViewExtent = 0, /* TEMPLATE */
.MinimumArrayElement = 0, /* TEMPLATE */
.MultisampledSurfaceStorageFormat =
isl_to_gen_multisample_layout[info->surf->msaa_layout],
.NumberofMultisamples = ffs(info->surf->samples) - 1,
.MultisamplePositionPaletteIndex = 0, /* UNUSED */
.XOffset = 0,
.YOffset = 0,
.ResourceMinLOD = 0.0,
.MIPCountLOD = 0, /* TEMPLATE */
.SurfaceMinLOD = 0, /* TEMPLATE */
#if (GEN_GEN >= 8 || GEN_IS_HASWELL)
.ShaderChannelSelectRed = info->view->channel_select[0],
.ShaderChannelSelectGreen = info->view->channel_select[1],
.ShaderChannelSelectBlue = info->view->channel_select[2],
.ShaderChannelSelectAlpha = info->view->channel_select[3],
#endif
.SurfaceBaseAddress = info->address,
.MOCS = info->mocs,
#if GEN_GEN >= 8
.AuxiliarySurfaceMode = AUX_NONE,
#else
.MCSEnable = false,
#endif
};
if (info->surf->tiling == ISL_TILING_W) {
/* From the Broadwell PRM documentation for this field:
*
* "If the surface is a stencil buffer (and thus has Tile Mode set
* to TILEMODE_WMAJOR), the pitch must be set to 2x the value
* computed based on width, as the stencil buffer is stored with
* two rows interleaved."
*/
s.SurfacePitch = info->surf->row_pitch * 2 - 1;
} else {
s.SurfacePitch = info->surf->row_pitch - 1;
}
if (info->view->usage & ISL_SURF_USAGE_STORAGE_BIT) {
s.SurfaceFormat = isl_lower_storage_image_format(dev, info->view->format);
} else {
s.SurfaceFormat = info->view->format;
}
switch (s.SurfaceType) {
case SURFTYPE_1D:
case SURFTYPE_2D:
s.MinimumArrayElement = info->view->base_array_layer;
/* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth:
*
* For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced
* by one for each increase from zero of Minimum Array Element. For
* example, if Minimum Array Element is set to 1024 on a 2D surface,
* the range of this field is reduced to [0,1023].
*
* In other words, 'Depth' is the number of array layers.
*/
s.Depth = info->view->array_len - 1;
/* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent:
*
* For Render Target and Typed Dataport 1D and 2D Surfaces:
* This field must be set to the same value as the Depth field.
*/
s.RenderTargetViewExtent = s.Depth;
break;
case SURFTYPE_CUBE:
s.MinimumArrayElement = info->view->base_array_layer;
/* Same as SURFTYPE_2D, but divided by 6 */
s.Depth = info->view->array_len / 6 - 1;
s.RenderTargetViewExtent = s.Depth;
break;
case SURFTYPE_3D:
s.MinimumArrayElement = info->view->base_array_layer;
/* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth:
*
* If the volume texture is MIP-mapped, this field specifies the
* depth of the base MIP level.
*/
s.Depth = info->surf->logical_level0_px.depth - 1;
/* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent:
*
* For Render Target and Typed Dataport 3D Surfaces: This field
* indicates the extent of the accessible 'R' coordinates minus 1 on
* the LOD currently being rendered to.
*/
s.RenderTargetViewExtent = isl_minify(info->surf->logical_level0_px.depth,
info->view->base_level) - 1;
break;
default:
unreachable(!"bad SurfaceType");
}
if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) {
/* For render target surfaces, the hardware interprets field
* MIPCount/LOD as LOD. The Broadwell PRM says:
*
* MIPCountLOD defines the LOD that will be rendered into.
* SurfaceMinLOD is ignored.
*/
s.MIPCountLOD = info->view->base_level;
s.SurfaceMinLOD = 0;
} else {
/* For non render target surfaces, the hardware interprets field
* MIPCount/LOD as MIPCount. The range of levels accessible by the
* sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD].
*/
s.SurfaceMinLOD = info->view->base_level;
s.MIPCountLOD = MAX(info->view->levels, 1) - 1;
}
#if GEN_GEN >= 8
/* From the CHV PRM, Volume 2d, page 321 (RENDER_SURFACE_STATE dword 0
* bit 9 "Sampler L2 Bypass Mode Disable" Programming Notes):
*
* This bit must be set for the following surface types: BC2_UNORM
* BC3_UNORM BC5_UNORM BC5_SNORM BC7_UNORM
*/
if (GEN_GEN >= 9 || dev->info->is_cherryview) {
switch (info->view->format) {
case ISL_FORMAT_BC2_UNORM:
case ISL_FORMAT_BC3_UNORM:
case ISL_FORMAT_BC5_UNORM:
case ISL_FORMAT_BC5_SNORM:
case ISL_FORMAT_BC7_UNORM:
s.SamplerL2BypassModeDisable = true;
break;
default:
break;
}
}
#endif
if (GEN_GEN <= 8) {
/* Prior to Sky Lake, we only have one bit for the clear color which
* gives us 0 or 1 in whatever the surface's format happens to be.
*/
if (isl_format_has_int_channel(info->view->format)) {
for (unsigned i = 0; i < 4; i++) {
assert(info->clear_color.u32[i] == 0 ||
info->clear_color.u32[i] == 1);
}
} else {
for (unsigned i = 0; i < 4; i++) {
assert(info->clear_color.f32[i] == 0.0f ||
info->clear_color.f32[i] == 1.0f);
}
}
s.RedClearColor = info->clear_color.u32[0] != 0;
s.GreenClearColor = info->clear_color.u32[1] != 0;
s.BlueClearColor = info->clear_color.u32[2] != 0;
s.AlphaClearColor = info->clear_color.u32[3] != 0;
} else {
s.RedClearColor = info->clear_color.u32[0];
s.GreenClearColor = info->clear_color.u32[1];
s.BlueClearColor = info->clear_color.u32[2];
s.AlphaClearColor = info->clear_color.u32[3];
}
GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s);
}
void
isl_genX(buffer_fill_state_s)(void *state,
const struct isl_buffer_fill_state_info *restrict info)
{
uint32_t num_elements = info->size / info->stride;
struct GENX(RENDER_SURFACE_STATE) surface_state = {
.SurfaceType = SURFTYPE_BUFFER,
.SurfaceArray = false,
.SurfaceFormat = info->format,
.SurfaceVerticalAlignment = isl_to_gen_valign[4],
.SurfaceHorizontalAlignment = isl_to_gen_halign[4],
.Height = ((num_elements - 1) >> 7) & 0x3fff,
.Width = (num_elements - 1) & 0x7f,
.Depth = ((num_elements - 1) >> 21) & 0x3f,
.SurfacePitch = info->stride - 1,
.NumberofMultisamples = MULTISAMPLECOUNT_1,
#if (GEN_GEN >= 8)
.TileMode = LINEAR,
#else
.TiledSurface = false,
#endif
#if (GEN_GEN >= 8)
.SamplerL2BypassModeDisable = true,
.RenderCacheReadWriteMode = WriteOnlyCache,
#else
.RenderCacheReadWriteMode = 0,
#endif
.MOCS = info->mocs,
#if (GEN_GEN >= 8 || GEN_IS_HASWELL)
.ShaderChannelSelectRed = SCS_RED,
.ShaderChannelSelectGreen = SCS_GREEN,
.ShaderChannelSelectBlue = SCS_BLUE,
.ShaderChannelSelectAlpha = SCS_ALPHA,
#endif
.SurfaceBaseAddress = info->address,
};
GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &surface_state);
}

1
src/intel/isl/tests/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/isl_surf_get_image_offset_test

View file

@ -0,0 +1,278 @@
/*
* Copyright 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include "brw_device_info.h"
#include "isl.h"
#include "isl_priv.h"
#define BDW_GT2_DEVID 0x161a
// An asssert that works regardless of NDEBUG.
#define t_assert(cond) \
do { \
if (!(cond)) { \
fprintf(stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
abort(); \
} \
} while (0)
static void
t_assert_extent4d(const struct isl_extent4d *e, uint32_t width,
uint32_t height, uint32_t depth, uint32_t array_len)
{
t_assert(e->width == width);
t_assert(e->height == height);
t_assert(e->depth == depth);
t_assert(e->array_len == array_len);
}
static void
t_assert_image_alignment_el(const struct isl_surf *surf,
uint32_t w, uint32_t h, uint32_t d)
{
struct isl_extent3d align_el;
align_el = isl_surf_get_image_alignment_el(surf);
t_assert(align_el.w == w);
t_assert(align_el.h == h);
t_assert(align_el.d == d);
}
static void
t_assert_image_alignment_sa(const struct isl_surf *surf,
uint32_t w, uint32_t h, uint32_t d)
{
struct isl_extent3d align_sa;
align_sa = isl_surf_get_image_alignment_sa(surf);
t_assert(align_sa.w == w);
t_assert(align_sa.h == h);
t_assert(align_sa.d == d);
}
static void
t_assert_offset_el(const struct isl_surf *surf,
uint32_t level,
uint32_t logical_array_layer,
uint32_t logical_z_offset_px,
uint32_t expected_x_offset_el,
uint32_t expected_y_offset_el)
{
uint32_t x, y;
isl_surf_get_image_offset_el(surf, level, logical_array_layer,
logical_z_offset_px, &x, &y);
t_assert(x == expected_x_offset_el);
t_assert(y == expected_y_offset_el);
}
static void
t_assert_phys_level0_sa(const struct isl_surf *surf, uint32_t width,
uint32_t height, uint32_t depth, uint32_t array_len)
{
t_assert_extent4d(&surf->phys_level0_sa, width, height, depth, array_len);
}
static void
t_assert_gen4_3d_layer(const struct isl_surf *surf,
uint32_t level,
uint32_t aligned_width,
uint32_t aligned_height,
uint32_t depth,
uint32_t horiz_layers,
uint32_t vert_layers,
uint32_t *base_y)
{
for (uint32_t z = 0; z < depth; ++z) {
t_assert_offset_el(surf, level, 0, z,
aligned_width * (z % horiz_layers),
*base_y + aligned_height * (z / horiz_layers));
}
*base_y += aligned_height * vert_layers;
}
static void
test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0(void)
{
bool ok;
struct isl_device dev;
isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID),
/*bit6_swizzle*/ false);
struct isl_surf surf;
ok = isl_surf_init(&dev, &surf,
.dim = ISL_SURF_DIM_2D,
.format = ISL_FORMAT_R8G8B8A8_UNORM,
.width = 512,
.height = 512,
.depth = 1,
.levels = 10,
.array_len = 1,
.samples = 1,
.usage = ISL_SURF_USAGE_TEXTURE_BIT |
ISL_SURF_USAGE_DISABLE_AUX_BIT,
.tiling_flags = ISL_TILING_Y0_BIT);
t_assert(ok);
t_assert_image_alignment_el(&surf, 4, 4, 1);
t_assert_image_alignment_sa(&surf, 4, 4, 1);
t_assert_phys_level0_sa(&surf, 512, 512, 1, 1);
t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 772);
t_assert(isl_surf_get_array_pitch_el_rows(&surf) ==
isl_surf_get_array_pitch_sa_rows(&surf));
/* Row pitch should be minimal possible */
t_assert(surf.row_pitch == 2048);
t_assert_offset_el(&surf, 0, 0, 0, 0, 0); // +0, +0
t_assert_offset_el(&surf, 1, 0, 0, 0, 512); // +0, +512
t_assert_offset_el(&surf, 2, 0, 0, 256, 512); // +256, +0
t_assert_offset_el(&surf, 3, 0, 0, 256, 640); // +0, +128
t_assert_offset_el(&surf, 4, 0, 0, 256, 704); // +0, +64
t_assert_offset_el(&surf, 5, 0, 0, 256, 736); // +0, +32
t_assert_offset_el(&surf, 6, 0, 0, 256, 752); // +0, +16
t_assert_offset_el(&surf, 7, 0, 0, 256, 760); // +0, +8
t_assert_offset_el(&surf, 8, 0, 0, 256, 764); // +0, +4
t_assert_offset_el(&surf, 9, 0, 0, 256, 768); // +0, +4
}
static void
test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0(void)
{
bool ok;
struct isl_device dev;
isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID),
/*bit6_swizzle*/ false);
struct isl_surf surf;
ok = isl_surf_init(&dev, &surf,
.dim = ISL_SURF_DIM_2D,
.format = ISL_FORMAT_R8G8B8A8_UNORM,
.width = 1024,
.height = 1024,
.depth = 1,
.levels = 11,
.array_len = 6,
.samples = 1,
.usage = ISL_SURF_USAGE_TEXTURE_BIT |
ISL_SURF_USAGE_DISABLE_AUX_BIT,
.tiling_flags = ISL_TILING_Y0_BIT);
t_assert(ok);
t_assert_image_alignment_el(&surf, 4, 4, 1);
t_assert_image_alignment_sa(&surf, 4, 4, 1);
t_assert(isl_surf_get_array_pitch_el_rows(&surf) >= 1540);
t_assert(isl_surf_get_array_pitch_el_rows(&surf) ==
isl_surf_get_array_pitch_sa_rows(&surf));
/* Row pitch should be minimal possible */
t_assert(surf.row_pitch == 4096);
for (uint32_t a = 0; a < 6; ++a) {
uint32_t b = a * isl_surf_get_array_pitch_sa_rows(&surf);
t_assert_offset_el(&surf, 0, a, 0, 0, b + 0); // +0, +0
t_assert_offset_el(&surf, 1, a, 0, 0, b + 1024); // +0, +1024
t_assert_offset_el(&surf, 2, a, 0, 512, b + 1024); // +512, +0
t_assert_offset_el(&surf, 3, a, 0, 512, b + 1280); // +0, +256
t_assert_offset_el(&surf, 4, a, 0, 512, b + 1408); // +0, +128
t_assert_offset_el(&surf, 5, a, 0, 512, b + 1472); // +0, +64
t_assert_offset_el(&surf, 6, a, 0, 512, b + 1504); // +0, +32
t_assert_offset_el(&surf, 7, a, 0, 512, b + 1520); // +0, +16
t_assert_offset_el(&surf, 8, a, 0, 512, b + 1528); // +0, +8
t_assert_offset_el(&surf, 9, a, 0, 512, b + 1532); // +0, +4
t_assert_offset_el(&surf, 10, a, 0, 512, b + 1536); // +0, +4
}
/* The layout below assumes a specific array pitch. It will need updating
* if isl's array pitch calculations ever change.
*/
t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 1540);
/* skip the remaining array layers */
}
static void
test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0(void)
{
bool ok;
struct isl_device dev;
isl_device_init(&dev, brw_get_device_info(BDW_GT2_DEVID),
/*bit6_swizzle*/ false);
struct isl_surf surf;
ok = isl_surf_init(&dev, &surf,
.dim = ISL_SURF_DIM_3D,
.format = ISL_FORMAT_R8G8B8A8_UNORM,
.width = 256,
.height = 256,
.depth = 256,
.levels = 9,
.array_len = 1,
.samples = 1,
.usage = ISL_SURF_USAGE_TEXTURE_BIT |
ISL_SURF_USAGE_DISABLE_AUX_BIT,
.tiling_flags = ISL_TILING_Y0_BIT);
t_assert(ok);
t_assert_image_alignment_el(&surf, 4, 4, 1);
t_assert_image_alignment_sa(&surf, 4, 4, 1);
t_assert(isl_surf_get_array_pitch_el_rows(&surf) == 74916);
t_assert(isl_surf_get_array_pitch_sa_rows(&surf) ==
isl_surf_get_array_pitch_el_rows(&surf));
uint32_t base_y = 0;
t_assert_gen4_3d_layer(&surf, 0, 256, 256, 256, 1, 256, &base_y);
t_assert_gen4_3d_layer(&surf, 1, 128, 128, 128, 2, 64, &base_y);
t_assert_gen4_3d_layer(&surf, 2, 64, 64, 64, 4, 16, &base_y);
t_assert_gen4_3d_layer(&surf, 3, 32, 32, 32, 8, 4, &base_y);
t_assert_gen4_3d_layer(&surf, 4, 16, 16, 16, 16, 1, &base_y);
t_assert_gen4_3d_layer(&surf, 5, 8, 8, 8, 32, 1, &base_y);
t_assert_gen4_3d_layer(&surf, 6, 4, 4, 4, 64, 1, &base_y);
t_assert_gen4_3d_layer(&surf, 7, 4, 4, 2, 128, 1, &base_y);
t_assert_gen4_3d_layer(&surf, 8, 4, 4, 1, 256, 1, &base_y);
}
int main(void)
{
/* FINISHME: Add tests for npot sizes */
/* FINISHME: Add tests for 1D surfaces */
test_bdw_2d_r8g8b8a8_unorm_512x512_array01_samples01_noaux_tiley0();
test_bdw_2d_r8g8b8a8_unorm_1024x1024_array06_samples01_noaux_tiley0();
test_bdw_3d_r8g8b8a8_unorm_256x256x256_levels09_tiley0();
}

8
src/intel/vulkan/.gitignore vendored Normal file
View file

@ -0,0 +1,8 @@
# Generated source files
/*_spirv_autogen.h
/anv_entrypoints.c
/anv_entrypoints.h
/wayland-drm-protocol.c
/wayland-drm-client-protocol.h
/dev_icd.json
/gen*_pack.h

View file

@ -0,0 +1,212 @@
# Copyright © 2015 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
SUBDIRS = . tests
vulkan_includedir = $(includedir)/vulkan
vulkan_include_HEADERS = \
$(top_srcdir)/include/vulkan/vk_platform.h \
$(top_srcdir)/include/vulkan/vulkan.h \
$(top_srcdir)/include/vulkan/vulkan_intel.h
# Used when generating entrypoints to filter out unwanted extensions
VULKAN_ENTRYPOINT_CPPFLAGS = \
-I$(top_srcdir)/include/vulkan \
-DVK_USE_PLATFORM_XCB_KHR \
-DVK_USE_PLATFORM_WAYLAND_KHR
lib_LTLIBRARIES = libvulkan_intel.la
check_LTLIBRARIES = libvulkan-test.la
PER_GEN_LIBS = \
libanv-gen7.la \
libanv-gen75.la \
libanv-gen8.la \
libanv-gen9.la
noinst_LTLIBRARIES = $(PER_GEN_LIBS)
# The gallium includes are for the util/u_math.h include from main/macros.h
AM_CPPFLAGS = \
$(INTEL_CFLAGS) \
$(VALGRIND_CFLAGS) \
$(DEFINES) \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src \
-I$(top_srcdir)/src/compiler \
-I$(top_srcdir)/src/mapi \
-I$(top_srcdir)/src/mesa \
-I$(top_srcdir)/src/mesa/drivers/dri/common \
-I$(top_srcdir)/src/mesa/drivers/dri/i965 \
-I$(top_srcdir)/src/gallium/auxiliary \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/intel/ \
-I$(top_builddir)/src \
-I$(top_builddir)/src/compiler \
-I$(top_builddir)/src/compiler/nir \
-I$(top_builddir)/src/intel
libvulkan_intel_la_CFLAGS = $(CFLAGS) -Wno-override-init
VULKAN_SOURCES = \
anv_allocator.c \
anv_cmd_buffer.c \
anv_batch_chain.c \
anv_descriptor_set.c \
anv_device.c \
anv_dump.c \
anv_entrypoints.c \
anv_entrypoints.h \
anv_formats.c \
anv_image.c \
anv_intel.c \
anv_meta.c \
anv_meta_blit.c \
anv_meta_blit2d.c \
anv_meta_clear.c \
anv_meta_copy.c \
anv_meta_resolve.c \
anv_nir_apply_dynamic_offsets.c \
anv_nir_apply_pipeline_layout.c \
anv_nir_lower_push_constants.c \
anv_pass.c \
anv_pipeline.c \
anv_pipeline_cache.c \
anv_private.h \
anv_query.c \
anv_util.c \
anv_wsi.c \
anv_wsi_x11.c
BUILT_SOURCES = \
anv_entrypoints.h \
anv_entrypoints.c
libanv_gen7_la_SOURCES = \
genX_cmd_buffer.c \
genX_pipeline.c \
gen7_cmd_buffer.c \
gen7_pipeline.c \
genX_state.c
libanv_gen7_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=70
libanv_gen75_la_SOURCES = \
genX_cmd_buffer.c \
genX_pipeline.c \
gen7_cmd_buffer.c \
gen7_pipeline.c \
genX_state.c
libanv_gen75_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=75
libanv_gen8_la_SOURCES = \
genX_cmd_buffer.c \
genX_pipeline.c \
gen8_cmd_buffer.c \
gen8_pipeline.c \
genX_state.c
libanv_gen8_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=80
libanv_gen9_la_SOURCES = \
genX_cmd_buffer.c \
genX_pipeline.c \
gen8_cmd_buffer.c \
gen8_pipeline.c \
genX_state.c
libanv_gen9_la_CFLAGS = $(libvulkan_intel_la_CFLAGS) -DGEN_VERSIONx10=90
if HAVE_EGL_PLATFORM_WAYLAND
BUILT_SOURCES += \
wayland-drm-protocol.c \
wayland-drm-client-protocol.h
%-protocol.c : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml
$(AM_V_GEN)$(WAYLAND_SCANNER) code < $< > $@
%-client-protocol.h : $(top_srcdir)/src/egl/wayland/wayland-drm/%.xml
$(AM_V_GEN)$(WAYLAND_SCANNER) client-header < $< > $@
AM_CPPFLAGS += -I$(top_srcdir)/src/egl/wayland/wayland-drm
VULKAN_SOURCES += \
wayland-drm-protocol.c \
anv_wsi_wayland.c
libvulkan_intel_la_CFLAGS += -DHAVE_WAYLAND_PLATFORM
endif
libvulkan_intel_la_SOURCES = \
$(VULKAN_SOURCES) \
anv_gem.c
anv_entrypoints.h : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
$(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< header > $@
anv_entrypoints.c : anv_entrypoints_gen.py $(vulkan_include_HEADERS)
$(AM_V_GEN) cat $(vulkan_include_HEADERS) | $(CPP) $(VULKAN_ENTRYPOINT_CPPFLAGS) - | $(PYTHON2) $< code > $@
CLEANFILES = $(BUILT_SOURCES)
libvulkan_intel_la_LIBADD = $(WAYLAND_LIBS) \
-lxcb -lxcb-dri3 -lxcb-present -lxcb-sync -lxshmfence \
$(top_builddir)/src/intel/isl/libisl.la \
$(top_builddir)/src/mesa/drivers/dri/i965/libi965_compiler.la \
$(top_builddir)/src/mesa/libmesa.la \
$(top_builddir)/src/mesa/drivers/dri/common/libdri_test_stubs.la \
-lpthread -ldl -lstdc++ \
$(PER_GEN_LIBS)
libvulkan_intel_la_LDFLAGS = \
-module -avoid-version -shared -shrext .so
# Generate icd files. It would be nice to just be able to add these to
# AC_CONFIG_FILES, but @libdir@ typically expands to '${exec_prefix}/lib64',
# which we can't put in the icd file. When running sed from the Makefile we
# can use ${libdir}, which expands completely and we avoid putting Makefile
# variables in the icd file.
icdconfdir=$(sysconfdir)/vulkan/icd.d
icdconf_DATA = intel_icd.json
noinst_DATA = dev_icd.json
%.json : %.json.in
$(AM_V_GEN) $(SED) \
-e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
-e "s#@install_libdir@#${libdir}#" < $< > $@
# Libvulkan with dummy gem. Used for unit tests.
libvulkan_test_la_SOURCES = \
$(VULKAN_SOURCES) \
anv_gem_stubs.c
libvulkan_test_la_CFLAGS = \
-I$(top_srcdir)/src/intel/vulkan \
$(libvulkan_intel_la_CFLAGS)
libvulkan_test_la_LIBADD = $(libvulkan_intel_la_LIBADD)
include $(top_srcdir)/install-lib-links.mk
install-data-local:
$(INSTALL_DATA) -D $(srcdir)/intel_icd.json $(VULKAN_ICD_INSTALL_DIR)/intel_icd.json

View file

@ -0,0 +1,880 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#define _DEFAULT_SOURCE
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <values.h>
#include <assert.h>
#include <linux/futex.h>
#include <linux/memfd.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include "anv_private.h"
#ifdef HAVE_VALGRIND
#define VG_NOACCESS_READ(__ptr) ({ \
VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \
__typeof(*(__ptr)) __val = *(__ptr); \
VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\
__val; \
})
#define VG_NOACCESS_WRITE(__ptr, __val) ({ \
VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \
*(__ptr) = (__val); \
VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \
})
#else
#define VG_NOACCESS_READ(__ptr) (*(__ptr))
#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val))
#endif
/* Design goals:
*
* - Lock free (except when resizing underlying bos)
*
* - Constant time allocation with typically only one atomic
*
* - Multiple allocation sizes without fragmentation
*
* - Can grow while keeping addresses and offset of contents stable
*
* - All allocations within one bo so we can point one of the
* STATE_BASE_ADDRESS pointers at it.
*
* The overall design is a two-level allocator: top level is a fixed size, big
* block (8k) allocator, which operates out of a bo. Allocation is done by
* either pulling a block from the free list or growing the used range of the
* bo. Growing the range may run out of space in the bo which we then need to
* grow. Growing the bo is tricky in a multi-threaded, lockless environment:
* we need to keep all pointers and contents in the old map valid. GEM bos in
* general can't grow, but we use a trick: we create a memfd and use ftruncate
* to grow it as necessary. We mmap the new size and then create a gem bo for
* it using the new gem userptr ioctl. Without heavy-handed locking around
* our allocation fast-path, there isn't really a way to munmap the old mmap,
* so we just keep it around until garbage collection time. While the block
* allocator is lockless for normal operations, we block other threads trying
* to allocate while we're growing the map. It sholdn't happen often, and
* growing is fast anyway.
*
* At the next level we can use various sub-allocators. The state pool is a
* pool of smaller, fixed size objects, which operates much like the block
* pool. It uses a free list for freeing objects, but when it runs out of
* space it just allocates a new block from the block pool. This allocator is
* intended for longer lived state objects such as SURFACE_STATE and most
* other persistent state objects in the API. We may need to track more info
* with these object and a pointer back to the CPU object (eg VkImage). In
* those cases we just allocate a slightly bigger object and put the extra
* state after the GPU state object.
*
* The state stream allocator works similar to how the i965 DRI driver streams
* all its state. Even with Vulkan, we need to emit transient state (whether
* surface state base or dynamic state base), and for that we can just get a
* block and fill it up. These cases are local to a command buffer and the
* sub-allocator need not be thread safe. The streaming allocator gets a new
* block when it runs out of space and chains them together so they can be
* easily freed.
*/
/* Allocations are always at least 64 byte aligned, so 1 is an invalid value.
* We use it to indicate the free list is empty. */
#define EMPTY 1
struct anv_mmap_cleanup {
void *map;
size_t size;
uint32_t gem_handle;
};
#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0})
static inline long
sys_futex(void *addr1, int op, int val1,
struct timespec *timeout, void *addr2, int val3)
{
return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
}
static inline int
futex_wake(uint32_t *addr, int count)
{
return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0);
}
static inline int
futex_wait(uint32_t *addr, int32_t value)
{
return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0);
}
static inline int
memfd_create(const char *name, unsigned int flags)
{
return syscall(SYS_memfd_create, name, flags);
}
static inline uint32_t
ilog2_round_up(uint32_t value)
{
assert(value != 0);
return 32 - __builtin_clz(value - 1);
}
static inline uint32_t
round_to_power_of_two(uint32_t value)
{
return 1 << ilog2_round_up(value);
}
static bool
anv_free_list_pop(union anv_free_list *list, void **map, int32_t *offset)
{
union anv_free_list current, new, old;
current.u64 = list->u64;
while (current.offset != EMPTY) {
/* We have to add a memory barrier here so that the list head (and
* offset) gets read before we read the map pointer. This way we
* know that the map pointer is valid for the given offset at the
* point where we read it.
*/
__sync_synchronize();
int32_t *next_ptr = *map + current.offset;
new.offset = VG_NOACCESS_READ(next_ptr);
new.count = current.count + 1;
old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
if (old.u64 == current.u64) {
*offset = current.offset;
return true;
}
current = old;
}
return false;
}
static void
anv_free_list_push(union anv_free_list *list, void *map, int32_t offset)
{
union anv_free_list current, old, new;
int32_t *next_ptr = map + offset;
old = *list;
do {
current = old;
VG_NOACCESS_WRITE(next_ptr, current.offset);
new.offset = offset;
new.count = current.count + 1;
old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
} while (old.u64 != current.u64);
}
/* All pointers in the ptr_free_list are assumed to be page-aligned. This
* means that the bottom 12 bits should all be zero.
*/
#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff)
#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~(uintptr_t)0xfff))
#define PFL_PACK(ptr, count) ({ \
(void *)(((uintptr_t)(ptr) & ~(uintptr_t)0xfff) | ((count) & 0xfff)); \
})
static bool
anv_ptr_free_list_pop(void **list, void **elem)
{
void *current = *list;
while (PFL_PTR(current) != NULL) {
void **next_ptr = PFL_PTR(current);
void *new_ptr = VG_NOACCESS_READ(next_ptr);
unsigned new_count = PFL_COUNT(current) + 1;
void *new = PFL_PACK(new_ptr, new_count);
void *old = __sync_val_compare_and_swap(list, current, new);
if (old == current) {
*elem = PFL_PTR(current);
return true;
}
current = old;
}
return false;
}
static void
anv_ptr_free_list_push(void **list, void *elem)
{
void *old, *current;
void **next_ptr = elem;
/* The pointer-based free list requires that the pointer be
* page-aligned. This is because we use the bottom 12 bits of the
* pointer to store a counter to solve the ABA concurrency problem.
*/
assert(((uintptr_t)elem & 0xfff) == 0);
old = *list;
do {
current = old;
VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current));
unsigned new_count = PFL_COUNT(current) + 1;
void *new = PFL_PACK(elem, new_count);
old = __sync_val_compare_and_swap(list, current, new);
} while (old != current);
}
static uint32_t
anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state);
void
anv_block_pool_init(struct anv_block_pool *pool,
struct anv_device *device, uint32_t block_size)
{
assert(util_is_power_of_two(block_size));
pool->device = device;
pool->bo.gem_handle = 0;
pool->bo.offset = 0;
pool->bo.size = 0;
pool->bo.is_winsys_bo = false;
pool->block_size = block_size;
pool->free_list = ANV_FREE_LIST_EMPTY;
pool->back_free_list = ANV_FREE_LIST_EMPTY;
pool->fd = memfd_create("block pool", MFD_CLOEXEC);
if (pool->fd == -1)
return;
/* Just make it 2GB up-front. The Linux kernel won't actually back it
* with pages until we either map and fault on one of them or we use
* userptr and send a chunk of it off to the GPU.
*/
if (ftruncate(pool->fd, BLOCK_POOL_MEMFD_SIZE) == -1)
return;
anv_vector_init(&pool->mmap_cleanups,
round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128);
pool->state.next = 0;
pool->state.end = 0;
pool->back_state.next = 0;
pool->back_state.end = 0;
/* Immediately grow the pool so we'll have a backing bo. */
pool->state.end = anv_block_pool_grow(pool, &pool->state);
}
void
anv_block_pool_finish(struct anv_block_pool *pool)
{
struct anv_mmap_cleanup *cleanup;
anv_vector_foreach(cleanup, &pool->mmap_cleanups) {
if (cleanup->map)
munmap(cleanup->map, cleanup->size);
if (cleanup->gem_handle)
anv_gem_close(pool->device, cleanup->gem_handle);
}
anv_vector_finish(&pool->mmap_cleanups);
close(pool->fd);
}
#define PAGE_SIZE 4096
/** Grows and re-centers the block pool.
*
* We grow the block pool in one or both directions in such a way that the
* following conditions are met:
*
* 1) The size of the entire pool is always a power of two.
*
* 2) The pool only grows on both ends. Neither end can get
* shortened.
*
* 3) At the end of the allocation, we have about twice as much space
* allocated for each end as we have used. This way the pool doesn't
* grow too far in one direction or the other.
*
* 4) If the _alloc_back() has never been called, then the back portion of
* the pool retains a size of zero. (This makes it easier for users of
* the block pool that only want a one-sided pool.)
*
* 5) We have enough space allocated for at least one more block in
* whichever side `state` points to.
*
* 6) The center of the pool is always aligned to both the block_size of
* the pool and a 4K CPU page.
*/
static uint32_t
anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
{
size_t size;
void *map;
uint32_t gem_handle;
struct anv_mmap_cleanup *cleanup;
pthread_mutex_lock(&pool->device->mutex);
assert(state == &pool->state || state == &pool->back_state);
/* Gather a little usage information on the pool. Since we may have
* threadsd waiting in queue to get some storage while we resize, it's
* actually possible that total_used will be larger than old_size. In
* particular, block_pool_alloc() increments state->next prior to
* calling block_pool_grow, so this ensures that we get enough space for
* which ever side tries to grow the pool.
*
* We align to a page size because it makes it easier to do our
* calculations later in such a way that we state page-aigned.
*/
uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE);
uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE);
uint32_t total_used = front_used + back_used;
assert(state == &pool->state || back_used > 0);
size_t old_size = pool->bo.size;
if (old_size != 0 &&
back_used * 2 <= pool->center_bo_offset &&
front_used * 2 <= (old_size - pool->center_bo_offset)) {
/* If we're in this case then this isn't the firsta allocation and we
* already have enough space on both sides to hold double what we
* have allocated. There's nothing for us to do.
*/
goto done;
}
if (old_size == 0) {
/* This is the first allocation */
size = MAX2(32 * pool->block_size, PAGE_SIZE);
} else {
size = old_size * 2;
}
/* We can't have a block pool bigger than 1GB because we use signed
* 32-bit offsets in the free list and we don't want overflow. We
* should never need a block pool bigger than 1GB anyway.
*/
assert(size <= (1u << 31));
/* We compute a new center_bo_offset such that, when we double the size
* of the pool, we maintain the ratio of how much is used by each side.
* This way things should remain more-or-less balanced.
*/
uint32_t center_bo_offset;
if (back_used == 0) {
/* If we're in this case then we have never called alloc_back(). In
* this case, we want keep the offset at 0 to make things as simple
* as possible for users that don't care about back allocations.
*/
center_bo_offset = 0;
} else {
/* Try to "center" the allocation based on how much is currently in
* use on each side of the center line.
*/
center_bo_offset = ((uint64_t)size * back_used) / total_used;
/* Align down to a multiple of both the block size and page size */
uint32_t granularity = MAX2(pool->block_size, PAGE_SIZE);
assert(util_is_power_of_two(granularity));
center_bo_offset &= ~(granularity - 1);
assert(center_bo_offset >= back_used);
/* Make sure we don't shrink the back end of the pool */
if (center_bo_offset < pool->back_state.end)
center_bo_offset = pool->back_state.end;
/* Make sure that we don't shrink the front end of the pool */
if (size - center_bo_offset < pool->state.end)
center_bo_offset = size - pool->state.end;
}
assert(center_bo_offset % pool->block_size == 0);
assert(center_bo_offset % PAGE_SIZE == 0);
/* Assert that we only ever grow the pool */
assert(center_bo_offset >= pool->back_state.end);
assert(size - center_bo_offset >= pool->state.end);
cleanup = anv_vector_add(&pool->mmap_cleanups);
if (!cleanup)
goto fail;
*cleanup = ANV_MMAP_CLEANUP_INIT;
/* Just leak the old map until we destroy the pool. We can't munmap it
* without races or imposing locking on the block allocate fast path. On
* the whole the leaked maps adds up to less than the size of the
* current map. MAP_POPULATE seems like the right thing to do, but we
* should try to get some numbers.
*/
map = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, pool->fd,
BLOCK_POOL_MEMFD_CENTER - center_bo_offset);
cleanup->map = map;
cleanup->size = size;
if (map == MAP_FAILED)
goto fail;
gem_handle = anv_gem_userptr(pool->device, map, size);
if (gem_handle == 0)
goto fail;
cleanup->gem_handle = gem_handle;
#if 0
/* Regular objects are created I915_CACHING_CACHED on LLC platforms and
* I915_CACHING_NONE on non-LLC platforms. However, userptr objects are
* always created as I915_CACHING_CACHED, which on non-LLC means
* snooped. That can be useful but comes with a bit of overheard. Since
* we're eplicitly clflushing and don't want the overhead we need to turn
* it off. */
if (!pool->device->info.has_llc) {
anv_gem_set_caching(pool->device, gem_handle, I915_CACHING_NONE);
anv_gem_set_domain(pool->device, gem_handle,
I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
}
#endif
/* Now that we successfull allocated everything, we can write the new
* values back into pool. */
pool->map = map + center_bo_offset;
pool->center_bo_offset = center_bo_offset;
pool->bo.gem_handle = gem_handle;
pool->bo.size = size;
pool->bo.map = map;
pool->bo.index = 0;
done:
pthread_mutex_unlock(&pool->device->mutex);
/* Return the appropreate new size. This function never actually
* updates state->next. Instead, we let the caller do that because it
* needs to do so in order to maintain its concurrency model.
*/
if (state == &pool->state) {
return pool->bo.size - pool->center_bo_offset;
} else {
assert(pool->center_bo_offset > 0);
return pool->center_bo_offset;
}
fail:
pthread_mutex_unlock(&pool->device->mutex);
return 0;
}
static uint32_t
anv_block_pool_alloc_new(struct anv_block_pool *pool,
struct anv_block_state *pool_state)
{
struct anv_block_state state, old, new;
while (1) {
state.u64 = __sync_fetch_and_add(&pool_state->u64, pool->block_size);
if (state.next < state.end) {
assert(pool->map);
return state.next;
} else if (state.next == state.end) {
/* We allocated the first block outside the pool, we have to grow it.
* pool_state->next acts a mutex: threads who try to allocate now will
* get block indexes above the current limit and hit futex_wait
* below. */
new.next = state.next + pool->block_size;
new.end = anv_block_pool_grow(pool, pool_state);
assert(new.end >= new.next && new.end % pool->block_size == 0);
old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64);
if (old.next != state.next)
futex_wake(&pool_state->end, INT_MAX);
return state.next;
} else {
futex_wait(&pool_state->end, state.end);
continue;
}
}
}
int32_t
anv_block_pool_alloc(struct anv_block_pool *pool)
{
int32_t offset;
/* Try free list first. */
if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) {
assert(offset >= 0);
assert(pool->map);
return offset;
}
return anv_block_pool_alloc_new(pool, &pool->state);
}
/* Allocates a block out of the back of the block pool.
*
* This will allocated a block earlier than the "start" of the block pool.
* The offsets returned from this function will be negative but will still
* be correct relative to the block pool's map pointer.
*
* If you ever use anv_block_pool_alloc_back, then you will have to do
* gymnastics with the block pool's BO when doing relocations.
*/
int32_t
anv_block_pool_alloc_back(struct anv_block_pool *pool)
{
int32_t offset;
/* Try free list first. */
if (anv_free_list_pop(&pool->back_free_list, &pool->map, &offset)) {
assert(offset < 0);
assert(pool->map);
return offset;
}
offset = anv_block_pool_alloc_new(pool, &pool->back_state);
/* The offset we get out of anv_block_pool_alloc_new() is actually the
* number of bytes downwards from the middle to the end of the block.
* We need to turn it into a (negative) offset from the middle to the
* start of the block.
*/
assert(offset >= 0);
return -(offset + pool->block_size);
}
void
anv_block_pool_free(struct anv_block_pool *pool, int32_t offset)
{
if (offset < 0) {
anv_free_list_push(&pool->back_free_list, pool->map, offset);
} else {
anv_free_list_push(&pool->free_list, pool->map, offset);
}
}
static void
anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool,
size_t state_size)
{
/* At least a cache line and must divide the block size. */
assert(state_size >= 64 && util_is_power_of_two(state_size));
pool->state_size = state_size;
pool->free_list = ANV_FREE_LIST_EMPTY;
pool->block.next = 0;
pool->block.end = 0;
}
static uint32_t
anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool,
struct anv_block_pool *block_pool)
{
int32_t offset;
struct anv_block_state block, old, new;
/* Try free list first. */
if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset)) {
assert(offset >= 0);
return offset;
}
/* If free list was empty (or somebody raced us and took the items) we
* allocate a new item from the end of the block */
restart:
block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size);
if (block.next < block.end) {
return block.next;
} else if (block.next == block.end) {
offset = anv_block_pool_alloc(block_pool);
new.next = offset + pool->state_size;
new.end = offset + block_pool->block_size;
old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
if (old.next != block.next)
futex_wake(&pool->block.end, INT_MAX);
return offset;
} else {
futex_wait(&pool->block.end, block.end);
goto restart;
}
}
static void
anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool,
struct anv_block_pool *block_pool,
uint32_t offset)
{
anv_free_list_push(&pool->free_list, block_pool->map, offset);
}
void
anv_state_pool_init(struct anv_state_pool *pool,
struct anv_block_pool *block_pool)
{
pool->block_pool = block_pool;
for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) {
size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i);
anv_fixed_size_state_pool_init(&pool->buckets[i], size);
}
VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false));
}
void
anv_state_pool_finish(struct anv_state_pool *pool)
{
VG(VALGRIND_DESTROY_MEMPOOL(pool));
}
struct anv_state
anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align)
{
unsigned size_log2 = ilog2_round_up(size < align ? align : size);
assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2);
if (size_log2 < ANV_MIN_STATE_SIZE_LOG2)
size_log2 = ANV_MIN_STATE_SIZE_LOG2;
unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2;
struct anv_state state;
state.alloc_size = 1 << size_log2;
state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket],
pool->block_pool);
state.map = pool->block_pool->map + state.offset;
VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size));
return state;
}
void
anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state)
{
assert(util_is_power_of_two(state.alloc_size));
unsigned size_log2 = ilog2_round_up(state.alloc_size);
assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 &&
size_log2 <= ANV_MAX_STATE_SIZE_LOG2);
unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2;
VG(VALGRIND_MEMPOOL_FREE(pool, state.map));
anv_fixed_size_state_pool_free(&pool->buckets[bucket],
pool->block_pool, state.offset);
}
#define NULL_BLOCK 1
struct anv_state_stream_block {
/* The next block */
struct anv_state_stream_block *next;
/* The offset into the block pool at which this block starts */
uint32_t offset;
#ifdef HAVE_VALGRIND
/* A pointer to the first user-allocated thing in this block. This is
* what valgrind sees as the start of the block.
*/
void *_vg_ptr;
#endif
};
/* The state stream allocator is a one-shot, single threaded allocator for
* variable sized blocks. We use it for allocating dynamic state.
*/
void
anv_state_stream_init(struct anv_state_stream *stream,
struct anv_block_pool *block_pool)
{
stream->block_pool = block_pool;
stream->block = NULL;
/* Ensure that next + whatever > end. This way the first call to
* state_stream_alloc fetches a new block.
*/
stream->next = 1;
stream->end = 0;
VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false));
}
void
anv_state_stream_finish(struct anv_state_stream *stream)
{
VG(const uint32_t block_size = stream->block_pool->block_size);
struct anv_state_stream_block *next = stream->block;
while (next != NULL) {
VG(VALGRIND_MAKE_MEM_DEFINED(next, sizeof(*next)));
struct anv_state_stream_block sb = VG_NOACCESS_READ(next);
VG(VALGRIND_MEMPOOL_FREE(stream, sb._vg_ptr));
VG(VALGRIND_MAKE_MEM_UNDEFINED(next, block_size));
anv_block_pool_free(stream->block_pool, sb.offset);
next = sb.next;
}
VG(VALGRIND_DESTROY_MEMPOOL(stream));
}
struct anv_state
anv_state_stream_alloc(struct anv_state_stream *stream,
uint32_t size, uint32_t alignment)
{
struct anv_state_stream_block *sb = stream->block;
struct anv_state state;
state.offset = align_u32(stream->next, alignment);
if (state.offset + size > stream->end) {
uint32_t block = anv_block_pool_alloc(stream->block_pool);
sb = stream->block_pool->map + block;
VG(VALGRIND_MAKE_MEM_UNDEFINED(sb, sizeof(*sb)));
sb->next = stream->block;
sb->offset = block;
VG(sb->_vg_ptr = NULL);
VG(VALGRIND_MAKE_MEM_NOACCESS(sb, stream->block_pool->block_size));
stream->block = sb;
stream->start = block;
stream->next = block + sizeof(*sb);
stream->end = block + stream->block_pool->block_size;
state.offset = align_u32(stream->next, alignment);
assert(state.offset + size <= stream->end);
}
assert(state.offset > stream->start);
state.map = (void *)sb + (state.offset - stream->start);
state.alloc_size = size;
#ifdef HAVE_VALGRIND
void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr);
if (vg_ptr == NULL) {
vg_ptr = state.map;
VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr);
VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size);
} else {
void *state_end = state.map + state.alloc_size;
/* This only updates the mempool. The newly allocated chunk is still
* marked as NOACCESS. */
VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr, state_end - vg_ptr);
/* Mark the newly allocated chunk as undefined */
VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size);
}
#endif
stream->next = state.offset + size;
return state;
}
struct bo_pool_bo_link {
struct bo_pool_bo_link *next;
struct anv_bo bo;
};
void
anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device)
{
pool->device = device;
memset(pool->free_list, 0, sizeof(pool->free_list));
VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false));
}
void
anv_bo_pool_finish(struct anv_bo_pool *pool)
{
for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) {
struct bo_pool_bo_link *link = PFL_PTR(pool->free_list[i]);
while (link != NULL) {
struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link);
anv_gem_munmap(link_copy.bo.map, link_copy.bo.size);
anv_gem_close(pool->device, link_copy.bo.gem_handle);
link = link_copy.next;
}
}
VG(VALGRIND_DESTROY_MEMPOOL(pool));
}
VkResult
anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size)
{
VkResult result;
const unsigned size_log2 = size < 4096 ? 12 : ilog2_round_up(size);
const unsigned pow2_size = 1 << size_log2;
const unsigned bucket = size_log2 - 12;
assert(bucket < ARRAY_SIZE(pool->free_list));
void *next_free_void;
if (anv_ptr_free_list_pop(&pool->free_list[bucket], &next_free_void)) {
struct bo_pool_bo_link *next_free = next_free_void;
*bo = VG_NOACCESS_READ(&next_free->bo);
assert(bo->map == next_free);
assert(size <= bo->size);
VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size));
return VK_SUCCESS;
}
struct anv_bo new_bo;
result = anv_bo_init_new(&new_bo, pool->device, pow2_size);
if (result != VK_SUCCESS)
return result;
assert(new_bo.size == pow2_size);
new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 0);
if (new_bo.map == NULL) {
anv_gem_close(pool->device, new_bo.gem_handle);
return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
}
*bo = new_bo;
VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size));
return VK_SUCCESS;
}
void
anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo_in)
{
/* Make a copy in case the anv_bo happens to be storred in the BO */
struct anv_bo bo = *bo_in;
struct bo_pool_bo_link *link = bo.map;
link->bo = bo;
assert(util_is_power_of_two(bo.size));
const unsigned size_log2 = ilog2_round_up(bo.size);
const unsigned bucket = size_log2 - 12;
assert(bucket < ARRAY_SIZE(pool->free_list));
VG(VALGRIND_MEMPOOL_FREE(pool, bo.map));
anv_ptr_free_list_push(&pool->free_list[bucket], link);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,655 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
/*
* Descriptor set layouts.
*/
VkResult anv_CreateDescriptorSetLayout(
VkDevice _device,
const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkDescriptorSetLayout* pSetLayout)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_descriptor_set_layout *set_layout;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
uint32_t max_binding = 0;
uint32_t immutable_sampler_count = 0;
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
if (pCreateInfo->pBindings[j].pImmutableSamplers)
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
}
size_t size = sizeof(struct anv_descriptor_set_layout) +
(max_binding + 1) * sizeof(set_layout->binding[0]) +
immutable_sampler_count * sizeof(struct anv_sampler *);
set_layout = anv_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!set_layout)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
/* We just allocate all the samplers at the end of the struct */
struct anv_sampler **samplers =
(struct anv_sampler **)&set_layout->binding[max_binding + 1];
set_layout->binding_count = max_binding + 1;
set_layout->shader_stages = 0;
set_layout->size = 0;
for (uint32_t b = 0; b <= max_binding; b++) {
/* Initialize all binding_layout entries to -1 */
memset(&set_layout->binding[b], -1, sizeof(set_layout->binding[b]));
set_layout->binding[b].immutable_samplers = NULL;
}
/* Initialize all samplers to 0 */
memset(samplers, 0, immutable_sampler_count * sizeof(*samplers));
uint32_t sampler_count[MESA_SHADER_STAGES] = { 0, };
uint32_t surface_count[MESA_SHADER_STAGES] = { 0, };
uint32_t image_count[MESA_SHADER_STAGES] = { 0, };
uint32_t buffer_count = 0;
uint32_t dynamic_offset_count = 0;
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j];
uint32_t b = binding->binding;
assert(binding->descriptorCount > 0);
set_layout->binding[b].array_size = binding->descriptorCount;
set_layout->binding[b].descriptor_index = set_layout->size;
set_layout->size += binding->descriptorCount;
switch (binding->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
anv_foreach_stage(s, binding->stageFlags) {
set_layout->binding[b].stage[s].sampler_index = sampler_count[s];
sampler_count[s] += binding->descriptorCount;
}
break;
default:
break;
}
switch (binding->descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
set_layout->binding[b].buffer_index = buffer_count;
buffer_count += binding->descriptorCount;
/* fall through */
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
anv_foreach_stage(s, binding->stageFlags) {
set_layout->binding[b].stage[s].surface_index = surface_count[s];
surface_count[s] += binding->descriptorCount;
}
break;
default:
break;
}
switch (binding->descriptorType) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
set_layout->binding[b].dynamic_offset_index = dynamic_offset_count;
dynamic_offset_count += binding->descriptorCount;
break;
default:
break;
}
switch (binding->descriptorType) {
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
anv_foreach_stage(s, binding->stageFlags) {
set_layout->binding[b].stage[s].image_index = image_count[s];
image_count[s] += binding->descriptorCount;
}
break;
default:
break;
}
if (binding->pImmutableSamplers) {
set_layout->binding[b].immutable_samplers = samplers;
samplers += binding->descriptorCount;
for (uint32_t i = 0; i < binding->descriptorCount; i++)
set_layout->binding[b].immutable_samplers[i] =
anv_sampler_from_handle(binding->pImmutableSamplers[i]);
} else {
set_layout->binding[b].immutable_samplers = NULL;
}
set_layout->shader_stages |= binding->stageFlags;
}
set_layout->buffer_count = buffer_count;
set_layout->dynamic_offset_count = dynamic_offset_count;
*pSetLayout = anv_descriptor_set_layout_to_handle(set_layout);
return VK_SUCCESS;
}
void anv_DestroyDescriptorSetLayout(
VkDevice _device,
VkDescriptorSetLayout _set_layout,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout);
anv_free2(&device->alloc, pAllocator, set_layout);
}
/*
* Pipeline layouts. These have nothing to do with the pipeline. They are
* just muttiple descriptor set layouts pasted together
*/
VkResult anv_CreatePipelineLayout(
VkDevice _device,
const VkPipelineLayoutCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipelineLayout* pPipelineLayout)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_pipeline_layout *layout;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
layout = anv_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (layout == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
layout->num_sets = pCreateInfo->setLayoutCount;
unsigned dynamic_offset_count = 0;
memset(layout->stage, 0, sizeof(layout->stage));
for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout,
pCreateInfo->pSetLayouts[set]);
layout->set[set].layout = set_layout;
layout->set[set].dynamic_offset_start = dynamic_offset_count;
for (uint32_t b = 0; b < set_layout->binding_count; b++) {
if (set_layout->binding[b].dynamic_offset_index < 0)
continue;
dynamic_offset_count += set_layout->binding[b].array_size;
for (gl_shader_stage s = 0; s < MESA_SHADER_STAGES; s++) {
if (set_layout->binding[b].stage[s].surface_index >= 0)
layout->stage[s].has_dynamic_offsets = true;
}
}
}
*pPipelineLayout = anv_pipeline_layout_to_handle(layout);
return VK_SUCCESS;
}
void anv_DestroyPipelineLayout(
VkDevice _device,
VkPipelineLayout _pipelineLayout,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, _pipelineLayout);
anv_free2(&device->alloc, pAllocator, pipeline_layout);
}
/*
* Descriptor pools.
*
* These are implemented using a big pool of memory and a free-list for the
* host memory allocations and a state_stream and a free list for the buffer
* view surface state. The spec allows us to fail to allocate due to
* fragmentation in all cases but two: 1) after pool reset, allocating up
* until the pool size with no freeing must succeed and 2) allocating and
* freeing only descriptor sets with the same layout. Case 1) is easy enogh,
* and the free lists lets us recycle blocks for case 2).
*/
#define EMPTY 1
VkResult anv_CreateDescriptorPool(
VkDevice _device,
const VkDescriptorPoolCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkDescriptorPool* pDescriptorPool)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_descriptor_pool *pool;
uint32_t descriptor_count = 0;
uint32_t buffer_count = 0;
for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
switch (pCreateInfo->pPoolSizes[i].type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
buffer_count += pCreateInfo->pPoolSizes[i].descriptorCount;
default:
descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
break;
}
}
const size_t size =
sizeof(*pool) +
pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
descriptor_count * sizeof(struct anv_descriptor) +
buffer_count * sizeof(struct anv_buffer_view);
pool = anv_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!pool)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
pool->size = size;
pool->next = 0;
pool->free_list = EMPTY;
anv_state_stream_init(&pool->surface_state_stream,
&device->surface_state_block_pool);
pool->surface_state_free_list = NULL;
*pDescriptorPool = anv_descriptor_pool_to_handle(pool);
return VK_SUCCESS;
}
void anv_DestroyDescriptorPool(
VkDevice _device,
VkDescriptorPool _pool,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_descriptor_pool, pool, _pool);
anv_state_stream_finish(&pool->surface_state_stream);
anv_free2(&device->alloc, pAllocator, pool);
}
VkResult anv_ResetDescriptorPool(
VkDevice _device,
VkDescriptorPool descriptorPool,
VkDescriptorPoolResetFlags flags)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool);
pool->next = 0;
pool->free_list = EMPTY;
anv_state_stream_finish(&pool->surface_state_stream);
anv_state_stream_init(&pool->surface_state_stream,
&device->surface_state_block_pool);
pool->surface_state_free_list = NULL;
return VK_SUCCESS;
}
struct pool_free_list_entry {
uint32_t next;
uint32_t size;
};
static size_t
layout_size(const struct anv_descriptor_set_layout *layout)
{
return
sizeof(struct anv_descriptor_set) +
layout->size * sizeof(struct anv_descriptor) +
layout->buffer_count * sizeof(struct anv_buffer_view);
}
struct surface_state_free_list_entry {
void *next;
uint32_t offset;
};
VkResult
anv_descriptor_set_create(struct anv_device *device,
struct anv_descriptor_pool *pool,
const struct anv_descriptor_set_layout *layout,
struct anv_descriptor_set **out_set)
{
struct anv_descriptor_set *set;
const size_t size = layout_size(layout);
set = NULL;
if (size <= pool->size - pool->next) {
set = (struct anv_descriptor_set *) (pool->data + pool->next);
pool->next += size;
} else {
struct pool_free_list_entry *entry;
uint32_t *link = &pool->free_list;
for (uint32_t f = pool->free_list; f != EMPTY; f = entry->next) {
entry = (struct pool_free_list_entry *) (pool->data + f);
if (size <= entry->size) {
*link = entry->next;
set = (struct anv_descriptor_set *) entry;
break;
}
link = &entry->next;
}
}
if (set == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
set->size = size;
set->layout = layout;
set->buffer_views =
(struct anv_buffer_view *) &set->descriptors[layout->size];
set->buffer_count = layout->buffer_count;
/* Go through and fill out immutable samplers if we have any */
struct anv_descriptor *desc = set->descriptors;
for (uint32_t b = 0; b < layout->binding_count; b++) {
if (layout->binding[b].immutable_samplers) {
for (uint32_t i = 0; i < layout->binding[b].array_size; i++) {
/* The type will get changed to COMBINED_IMAGE_SAMPLER in
* UpdateDescriptorSets if needed. However, if the descriptor
* set has an immutable sampler, UpdateDescriptorSets may never
* touch it, so we need to make sure it's 100% valid now.
*/
desc[i] = (struct anv_descriptor) {
.type = VK_DESCRIPTOR_TYPE_SAMPLER,
.sampler = layout->binding[b].immutable_samplers[i],
};
}
}
desc += layout->binding[b].array_size;
}
/* Allocate surface state for the buffer views. */
for (uint32_t b = 0; b < layout->buffer_count; b++) {
struct surface_state_free_list_entry *entry =
pool->surface_state_free_list;
struct anv_state state;
if (entry) {
state.map = entry;
state.offset = entry->offset;
state.alloc_size = 64;
pool->surface_state_free_list = entry->next;
} else {
state = anv_state_stream_alloc(&pool->surface_state_stream, 64, 64);
}
set->buffer_views[b].surface_state = state;
}
*out_set = set;
return VK_SUCCESS;
}
void
anv_descriptor_set_destroy(struct anv_device *device,
struct anv_descriptor_pool *pool,
struct anv_descriptor_set *set)
{
/* Put the buffer view surface state back on the free list. */
for (uint32_t b = 0; b < set->buffer_count; b++) {
struct surface_state_free_list_entry *entry =
set->buffer_views[b].surface_state.map;
entry->next = pool->surface_state_free_list;
pool->surface_state_free_list = entry;
}
/* Put the descriptor set allocation back on the free list. */
const uint32_t index = (char *) set - pool->data;
if (index + set->size == pool->next) {
pool->next = index;
} else {
struct pool_free_list_entry *entry = (struct pool_free_list_entry *) set;
entry->next = pool->free_list;
entry->size = set->size;
pool->free_list = (char *) entry - pool->data;
}
}
VkResult anv_AllocateDescriptorSets(
VkDevice _device,
const VkDescriptorSetAllocateInfo* pAllocateInfo,
VkDescriptorSet* pDescriptorSets)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_descriptor_pool, pool, pAllocateInfo->descriptorPool);
VkResult result = VK_SUCCESS;
struct anv_descriptor_set *set;
uint32_t i;
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
ANV_FROM_HANDLE(anv_descriptor_set_layout, layout,
pAllocateInfo->pSetLayouts[i]);
result = anv_descriptor_set_create(device, pool, layout, &set);
if (result != VK_SUCCESS)
break;
pDescriptorSets[i] = anv_descriptor_set_to_handle(set);
}
if (result != VK_SUCCESS)
anv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool,
i, pDescriptorSets);
return result;
}
VkResult anv_FreeDescriptorSets(
VkDevice _device,
VkDescriptorPool descriptorPool,
uint32_t count,
const VkDescriptorSet* pDescriptorSets)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_descriptor_pool, pool, descriptorPool);
for (uint32_t i = 0; i < count; i++) {
ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
anv_descriptor_set_destroy(device, pool, set);
}
return VK_SUCCESS;
}
void anv_UpdateDescriptorSets(
VkDevice _device,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet* pDescriptorWrites,
uint32_t descriptorCopyCount,
const VkCopyDescriptorSet* pDescriptorCopies)
{
ANV_FROM_HANDLE(anv_device, device, _device);
for (uint32_t i = 0; i < descriptorWriteCount; i++) {
const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
ANV_FROM_HANDLE(anv_descriptor_set, set, write->dstSet);
const struct anv_descriptor_set_binding_layout *bind_layout =
&set->layout->binding[write->dstBinding];
struct anv_descriptor *desc =
&set->descriptors[bind_layout->descriptor_index];
desc += write->dstArrayElement;
switch (write->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
ANV_FROM_HANDLE(anv_sampler, sampler,
write->pImageInfo[j].sampler);
desc[j] = (struct anv_descriptor) {
.type = VK_DESCRIPTOR_TYPE_SAMPLER,
.sampler = sampler,
};
}
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
ANV_FROM_HANDLE(anv_image_view, iview,
write->pImageInfo[j].imageView);
ANV_FROM_HANDLE(anv_sampler, sampler,
write->pImageInfo[j].sampler);
desc[j].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
desc[j].image_view = iview;
/* If this descriptor has an immutable sampler, we don't want
* to stomp on it.
*/
if (sampler)
desc[j].sampler = sampler;
}
break;
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
ANV_FROM_HANDLE(anv_image_view, iview,
write->pImageInfo[j].imageView);
desc[j] = (struct anv_descriptor) {
.type = write->descriptorType,
.image_view = iview,
};
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
ANV_FROM_HANDLE(anv_buffer_view, bview,
write->pTexelBufferView[j]);
desc[j] = (struct anv_descriptor) {
.type = write->descriptorType,
.buffer_view = bview,
};
}
break;
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
anv_finishme("input attachments not implemented");
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
assert(write->pBufferInfo[j].buffer);
ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer);
assert(buffer);
struct anv_buffer_view *view =
&set->buffer_views[bind_layout->buffer_index];
view += write->dstArrayElement + j;
const struct anv_format *format =
anv_format_for_descriptor_type(write->descriptorType);
view->format = format->isl_format;
view->bo = buffer->bo;
view->offset = buffer->offset + write->pBufferInfo[j].offset;
/* For buffers with dynamic offsets, we use the full possible
* range in the surface state and do the actual range-checking
* in the shader.
*/
if (bind_layout->dynamic_offset_index >= 0 ||
write->pBufferInfo[j].range == VK_WHOLE_SIZE)
view->range = buffer->size - write->pBufferInfo[j].offset;
else
view->range = write->pBufferInfo[j].range;
anv_fill_buffer_surface_state(device, view->surface_state,
view->format,
view->offset, view->range, 1);
desc[j] = (struct anv_descriptor) {
.type = write->descriptorType,
.buffer_view = view,
};
}
default:
break;
}
}
for (uint32_t i = 0; i < descriptorCopyCount; i++) {
const VkCopyDescriptorSet *copy = &pDescriptorCopies[i];
ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet);
ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet);
const struct anv_descriptor_set_binding_layout *src_layout =
&src->layout->binding[copy->srcBinding];
struct anv_descriptor *src_desc =
&src->descriptors[src_layout->descriptor_index];
src_desc += copy->srcArrayElement;
const struct anv_descriptor_set_binding_layout *dst_layout =
&dst->layout->binding[copy->dstBinding];
struct anv_descriptor *dst_desc =
&dst->descriptors[dst_layout->descriptor_index];
dst_desc += copy->dstArrayElement;
for (uint32_t j = 0; j < copy->descriptorCount; j++)
dst_desc[j] = src_desc[j];
}
}

File diff suppressed because it is too large Load diff

209
src/intel/vulkan/anv_dump.c Normal file
View file

@ -0,0 +1,209 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
/* This file contains utility functions for help debugging. They can be
* called from GDB or similar to help inspect images and buffers.
*/
void
anv_dump_image_to_ppm(struct anv_device *device,
struct anv_image *image, unsigned miplevel,
unsigned array_layer, const char *filename)
{
VkDevice vk_device = anv_device_to_handle(device);
VkResult result;
VkExtent2D extent = { image->extent.width, image->extent.height };
for (unsigned i = 0; i < miplevel; i++) {
extent.width = MAX2(1, extent.width / 2);
extent.height = MAX2(1, extent.height / 2);
}
VkImage copy_image;
result = anv_CreateImage(vk_device,
&(VkImageCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = VK_FORMAT_R8G8B8A8_UNORM,
.extent = (VkExtent3D) { extent.width, extent.height, 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = 1,
.tiling = VK_IMAGE_TILING_LINEAR,
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.flags = 0,
}, NULL, &copy_image);
assert(result == VK_SUCCESS);
VkMemoryRequirements reqs;
anv_GetImageMemoryRequirements(vk_device, copy_image, &reqs);
VkDeviceMemory memory;
result = anv_AllocateMemory(vk_device,
&(VkMemoryAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = reqs.size,
.memoryTypeIndex = 0,
}, NULL, &memory);
assert(result == VK_SUCCESS);
result = anv_BindImageMemory(vk_device, copy_image, memory, 0);
assert(result == VK_SUCCESS);
VkCommandPool commandPool;
result = anv_CreateCommandPool(vk_device,
&(VkCommandPoolCreateInfo) {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.queueFamilyIndex = 0,
.flags = 0,
}, NULL, &commandPool);
assert(result == VK_SUCCESS);
VkCommandBuffer cmd;
result = anv_AllocateCommandBuffers(vk_device,
&(VkCommandBufferAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = commandPool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
}, &cmd);
assert(result == VK_SUCCESS);
result = anv_BeginCommandBuffer(cmd,
&(VkCommandBufferBeginInfo) {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
});
assert(result == VK_SUCCESS);
anv_CmdBlitImage(cmd,
anv_image_to_handle(image), VK_IMAGE_LAYOUT_GENERAL,
copy_image, VK_IMAGE_LAYOUT_GENERAL, 1,
&(VkImageBlit) {
.srcSubresource = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = miplevel,
.baseArrayLayer = array_layer,
.layerCount = 1,
},
.srcOffsets = {
{ 0, 0, 0 },
{ extent.width, extent.height, 1 },
},
.dstSubresource = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.dstOffsets = {
{ 0, 0, 0 },
{ extent.width, extent.height, 1 },
},
}, VK_FILTER_NEAREST);
ANV_CALL(CmdPipelineBarrier)(cmd,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
true, 0, NULL, 0, NULL, 1,
&(VkImageMemoryBarrier) {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_HOST_READ_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = 0,
.dstQueueFamilyIndex = 0,
.image = copy_image,
.subresourceRange = (VkImageSubresourceRange) {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
});
result = anv_EndCommandBuffer(cmd);
assert(result == VK_SUCCESS);
VkFence fence;
result = anv_CreateFence(vk_device,
&(VkFenceCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.flags = 0,
}, NULL, &fence);
assert(result == VK_SUCCESS);
result = anv_QueueSubmit(anv_queue_to_handle(&device->queue), 1,
&(VkSubmitInfo) {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &cmd,
}, fence);
assert(result == VK_SUCCESS);
result = anv_WaitForFences(vk_device, 1, &fence, true, UINT64_MAX);
assert(result == VK_SUCCESS);
anv_DestroyFence(vk_device, fence, NULL);
anv_DestroyCommandPool(vk_device, commandPool, NULL);
uint8_t *map;
result = anv_MapMemory(vk_device, memory, 0, reqs.size, 0, (void **)&map);
assert(result == VK_SUCCESS);
VkSubresourceLayout layout;
anv_GetImageSubresourceLayout(vk_device, copy_image,
&(VkImageSubresource) {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.arrayLayer = 0,
}, &layout);
map += layout.offset;
/* Now we can finally write the PPM file */
FILE *file = fopen(filename, "wb");
assert(file);
fprintf(file, "P6\n%d %d\n255\n", extent.width, extent.height);
for (unsigned y = 0; y < extent.height; y++) {
uint8_t row[extent.width * 3];
for (unsigned x = 0; x < extent.width; x++) {
row[x * 3 + 0] = map[x * 4 + 0];
row[x * 3 + 1] = map[x * 4 + 1];
row[x * 3 + 2] = map[x * 4 + 2];
}
fwrite(row, 3, extent.width, file);
map += layout.rowPitch;
}
fclose(file);
anv_UnmapMemory(vk_device, memory);
anv_DestroyImage(vk_device, copy_image, NULL);
anv_FreeMemory(vk_device, memory, NULL);
}

View file

@ -0,0 +1,323 @@
# coding=utf-8
#
# Copyright © 2015 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
import fileinput, re, sys
# Each function typedef in the vulkan.h header is all on one line and matches
# this regepx. We hope that won't change.
p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);')
entrypoints = []
# We generate a static hash table for entry point lookup
# (vkGetProcAddress). We use a linear congruential generator for our hash
# function and a power-of-two size table. The prime numbers are determined
# experimentally.
none = 0xffff
hash_size = 256
u32_mask = 2**32 - 1
hash_mask = hash_size - 1
prime_factor = 5024183
prime_step = 19
def hash(name):
h = 0;
for c in name:
h = (h * prime_factor + ord(c)) & u32_mask
return h
opt_header = False
opt_code = False
if (sys.argv[1] == "header"):
opt_header = True
sys.argv.pop()
elif (sys.argv[1] == "code"):
opt_code = True
sys.argv.pop()
# Parse the entry points in the header
i = 0
for line in fileinput.input():
m = p.match(line)
if (m):
if m.group(2) == 'VoidFunction':
continue
fullname = "vk" + m.group(2)
h = hash(fullname)
entrypoints.append((m.group(1), m.group(2), m.group(3), i, h))
i = i + 1
# For outputting entrypoints.h we generate a anv_EntryPoint() prototype
# per entry point.
if opt_header:
print "/* This file generated from vk_gen.py, don't edit directly. */\n"
print "struct anv_dispatch_table {"
print " union {"
print " void *entrypoints[%d];" % len(entrypoints)
print " struct {"
for type, name, args, num, h in entrypoints:
print " %s (*%s)%s;" % (type, name, args)
print " };\n"
print " };\n"
print "};\n"
print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n"
for type, name, args, num, h in entrypoints:
print "%s anv_%s%s;" % (type, name, args)
print "%s gen7_%s%s;" % (type, name, args)
print "%s gen75_%s%s;" % (type, name, args)
print "%s gen8_%s%s;" % (type, name, args)
print "%s gen9_%s%s;" % (type, name, args)
print "%s anv_validate_%s%s;" % (type, name, args)
exit()
print """/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/* DO NOT EDIT! This is a generated file. */
#include "anv_private.h"
struct anv_entrypoint {
uint32_t name;
uint32_t hash;
};
/* We use a big string constant to avoid lots of reloctions from the entry
* point table to lots of little strings. The entries in the entry point table
* store the index into this big string.
*/
static const char strings[] ="""
offsets = []
i = 0;
for type, name, args, num, h in entrypoints:
print " \"vk%s\\0\"" % name
offsets.append(i)
i += 2 + len(name) + 1
print """ ;
/* Weak aliases for all potential validate functions. These will resolve to
* NULL if they're not defined, which lets the resolve_entrypoint() function
* either pick a validate wrapper if available or just plug in the actual
* entry point.
*/
"""
# Now generate the table of all entry points and their validation functions
print "\nstatic const struct anv_entrypoint entrypoints[] = {"
for type, name, args, num, h in entrypoints:
print " { %5d, 0x%08x }," % (offsets[num], h)
print "};\n"
for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]:
for type, name, args, num, h in entrypoints:
print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args)
print "\nconst struct anv_dispatch_table %s_layer = {" % layer
for type, name, args, num, h in entrypoints:
print " .%s = %s_%s," % (name, layer, name)
print "};\n"
print """
#ifdef DEBUG
static bool enable_validate = true;
#else
static bool enable_validate = false;
#endif
/* We can't use symbols that need resolving (like, oh, getenv) in the resolve
* function. This means that we have to determine whether or not to use the
* validation layer sometime before that. The constructor function attribute asks
* the dynamic linker to invoke determine_validate() at dlopen() time which
* works.
*/
static void __attribute__ ((constructor))
determine_validate(void)
{
const char *s = getenv("ANV_VALIDATE");
if (s)
enable_validate = atoi(s);
}
static const struct brw_device_info *dispatch_devinfo;
void
anv_set_dispatch_devinfo(const struct brw_device_info *devinfo)
{
dispatch_devinfo = devinfo;
}
void * __attribute__ ((noinline))
anv_resolve_entrypoint(uint32_t index)
{
if (enable_validate && validate_layer.entrypoints[index])
return validate_layer.entrypoints[index];
if (dispatch_devinfo == NULL) {
return anv_layer.entrypoints[index];
}
switch (dispatch_devinfo->gen) {
case 9:
if (gen9_layer.entrypoints[index])
return gen9_layer.entrypoints[index];
/* fall through */
case 8:
if (gen8_layer.entrypoints[index])
return gen8_layer.entrypoints[index];
/* fall through */
case 7:
if (dispatch_devinfo->is_haswell && gen75_layer.entrypoints[index])
return gen75_layer.entrypoints[index];
if (gen7_layer.entrypoints[index])
return gen7_layer.entrypoints[index];
/* fall through */
case 0:
return anv_layer.entrypoints[index];
default:
unreachable("unsupported gen\\n");
}
}
"""
# Now output ifuncs and their resolve helpers for all entry points. The
# resolve helper calls resolve_entrypoint() with the entry point index, which
# lets the resolver look it up in the table.
for type, name, args, num, h in entrypoints:
print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num)
print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name)
# Now generate the hash table used for entry point look up. This is a
# uint16_t table of entry point indices. We use 0xffff to indicate an entry
# in the hash table is empty.
map = [none for f in xrange(hash_size)]
collisions = [0 for f in xrange(10)]
for type, name, args, num, h in entrypoints:
level = 0
while map[h & hash_mask] != none:
h = h + prime_step
level = level + 1
if level > 9:
collisions[9] += 1
else:
collisions[level] += 1
map[h & hash_mask] = num
print "/* Hash table stats:"
print " * size %d entries" % hash_size
print " * collisions entries"
for i in xrange(10):
if (i == 9):
plus = "+"
else:
plus = " "
print " * %2d%s %4d" % (i, plus, collisions[i])
print " */\n"
print "#define none 0x%04x\n" % none
print "static const uint16_t map[] = {"
for i in xrange(0, hash_size, 8):
print " ",
for j in xrange(i, i + 8):
if map[j] & 0xffff == 0xffff:
print " none,",
else:
print "0x%04x," % (map[j] & 0xffff),
print
print "};"
# Finally we generate the hash table lookup function. The hash function and
# linear probing algorithm matches the hash table generated above.
print """
void *
anv_lookup_entrypoint(const char *name)
{
static const uint32_t prime_factor = %d;
static const uint32_t prime_step = %d;
const struct anv_entrypoint *e;
uint32_t hash, h, i;
const char *p;
hash = 0;
for (p = name; *p; p++)
hash = hash * prime_factor + *p;
h = hash;
do {
i = map[h & %d];
if (i == none)
return NULL;
e = &entrypoints[i];
h += prime_step;
} while (e->hash != hash);
if (strcmp(name, strings + e->name) != 0)
return NULL;
return anv_resolve_entrypoint(i);
}
""" % (prime_factor, prime_step, hash_mask)

View file

@ -0,0 +1,601 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "brw_surface_formats.h"
#define RGBA { 0, 1, 2, 3 }
#define BGRA { 2, 1, 0, 3 }
#define swiz_fmt(__vk_fmt, __hw_fmt, __swizzle, ...) \
[__vk_fmt] = { \
.vk_format = __vk_fmt, \
.name = #__vk_fmt, \
.isl_format = __hw_fmt, \
.isl_layout = &isl_format_layouts[__hw_fmt], \
.swizzle = __swizzle, \
__VA_ARGS__ \
}
#define fmt(__vk_fmt, __hw_fmt, ...) \
swiz_fmt(__vk_fmt, __hw_fmt, RGBA, __VA_ARGS__)
/* HINT: For array formats, the ISL name should match the VK name. For
* packed formats, they should have the channels in reverse order from each
* other. The reason for this is that, for packed formats, the ISL (and
* bspec) names are in LSB -> MSB order while VK formats are MSB -> LSB.
*/
static const struct anv_format anv_formats[] = {
fmt(VK_FORMAT_UNDEFINED, ISL_FORMAT_RAW),
fmt(VK_FORMAT_R4G4_UNORM_PACK8, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_R4G4B4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM),
swiz_fmt(VK_FORMAT_B4G4R4A4_UNORM_PACK16, ISL_FORMAT_A4B4G4R4_UNORM, BGRA),
fmt(VK_FORMAT_R5G6B5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM),
swiz_fmt(VK_FORMAT_B5G6R5_UNORM_PACK16, ISL_FORMAT_B5G6R5_UNORM, BGRA),
fmt(VK_FORMAT_R5G5B5A1_UNORM_PACK16, ISL_FORMAT_A1B5G5R5_UNORM),
fmt(VK_FORMAT_B5G5R5A1_UNORM_PACK16, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_A1R5G5B5_UNORM_PACK16, ISL_FORMAT_B5G5R5A1_UNORM),
fmt(VK_FORMAT_R8_UNORM, ISL_FORMAT_R8_UNORM),
fmt(VK_FORMAT_R8_SNORM, ISL_FORMAT_R8_SNORM),
fmt(VK_FORMAT_R8_USCALED, ISL_FORMAT_R8_USCALED),
fmt(VK_FORMAT_R8_SSCALED, ISL_FORMAT_R8_SSCALED),
fmt(VK_FORMAT_R8_UINT, ISL_FORMAT_R8_UINT),
fmt(VK_FORMAT_R8_SINT, ISL_FORMAT_R8_SINT),
fmt(VK_FORMAT_R8_SRGB, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_R8G8_UNORM, ISL_FORMAT_R8G8_UNORM),
fmt(VK_FORMAT_R8G8_SNORM, ISL_FORMAT_R8G8_SNORM),
fmt(VK_FORMAT_R8G8_USCALED, ISL_FORMAT_R8G8_USCALED),
fmt(VK_FORMAT_R8G8_SSCALED, ISL_FORMAT_R8G8_SSCALED),
fmt(VK_FORMAT_R8G8_UINT, ISL_FORMAT_R8G8_UINT),
fmt(VK_FORMAT_R8G8_SINT, ISL_FORMAT_R8G8_SINT),
fmt(VK_FORMAT_R8G8_SRGB, ISL_FORMAT_UNSUPPORTED), /* L8A8_UNORM_SRGB */
fmt(VK_FORMAT_R8G8B8_UNORM, ISL_FORMAT_R8G8B8_UNORM),
fmt(VK_FORMAT_R8G8B8_SNORM, ISL_FORMAT_R8G8B8_SNORM),
fmt(VK_FORMAT_R8G8B8_USCALED, ISL_FORMAT_R8G8B8_USCALED),
fmt(VK_FORMAT_R8G8B8_SSCALED, ISL_FORMAT_R8G8B8_SSCALED),
fmt(VK_FORMAT_R8G8B8_UINT, ISL_FORMAT_R8G8B8_UINT),
fmt(VK_FORMAT_R8G8B8_SINT, ISL_FORMAT_R8G8B8_SINT),
fmt(VK_FORMAT_R8G8B8_SRGB, ISL_FORMAT_UNSUPPORTED), /* B8G8R8A8_UNORM_SRGB */
fmt(VK_FORMAT_R8G8B8A8_UNORM, ISL_FORMAT_R8G8B8A8_UNORM),
fmt(VK_FORMAT_R8G8B8A8_SNORM, ISL_FORMAT_R8G8B8A8_SNORM),
fmt(VK_FORMAT_R8G8B8A8_USCALED, ISL_FORMAT_R8G8B8A8_USCALED),
fmt(VK_FORMAT_R8G8B8A8_SSCALED, ISL_FORMAT_R8G8B8A8_SSCALED),
fmt(VK_FORMAT_R8G8B8A8_UINT, ISL_FORMAT_R8G8B8A8_UINT),
fmt(VK_FORMAT_R8G8B8A8_SINT, ISL_FORMAT_R8G8B8A8_SINT),
fmt(VK_FORMAT_R8G8B8A8_SRGB, ISL_FORMAT_R8G8B8A8_UNORM_SRGB),
fmt(VK_FORMAT_A8B8G8R8_UNORM_PACK32, ISL_FORMAT_R8G8B8A8_UNORM),
fmt(VK_FORMAT_A8B8G8R8_SNORM_PACK32, ISL_FORMAT_R8G8B8A8_SNORM),
fmt(VK_FORMAT_A8B8G8R8_USCALED_PACK32, ISL_FORMAT_R8G8B8A8_USCALED),
fmt(VK_FORMAT_A8B8G8R8_SSCALED_PACK32, ISL_FORMAT_R8G8B8A8_SSCALED),
fmt(VK_FORMAT_A8B8G8R8_UINT_PACK32, ISL_FORMAT_R8G8B8A8_UINT),
fmt(VK_FORMAT_A8B8G8R8_SINT_PACK32, ISL_FORMAT_R8G8B8A8_SINT),
fmt(VK_FORMAT_A8B8G8R8_SRGB_PACK32, ISL_FORMAT_R8G8B8A8_UNORM_SRGB),
fmt(VK_FORMAT_A2R10G10B10_UNORM_PACK32, ISL_FORMAT_B10G10R10A2_UNORM),
fmt(VK_FORMAT_A2R10G10B10_SNORM_PACK32, ISL_FORMAT_B10G10R10A2_SNORM),
fmt(VK_FORMAT_A2R10G10B10_USCALED_PACK32, ISL_FORMAT_B10G10R10A2_USCALED),
fmt(VK_FORMAT_A2R10G10B10_SSCALED_PACK32, ISL_FORMAT_B10G10R10A2_SSCALED),
fmt(VK_FORMAT_A2R10G10B10_UINT_PACK32, ISL_FORMAT_B10G10R10A2_UINT),
fmt(VK_FORMAT_A2R10G10B10_SINT_PACK32, ISL_FORMAT_B10G10R10A2_SINT),
fmt(VK_FORMAT_A2B10G10R10_UNORM_PACK32, ISL_FORMAT_R10G10B10A2_UNORM),
fmt(VK_FORMAT_A2B10G10R10_SNORM_PACK32, ISL_FORMAT_R10G10B10A2_SNORM),
fmt(VK_FORMAT_A2B10G10R10_USCALED_PACK32, ISL_FORMAT_R10G10B10A2_USCALED),
fmt(VK_FORMAT_A2B10G10R10_SSCALED_PACK32, ISL_FORMAT_R10G10B10A2_SSCALED),
fmt(VK_FORMAT_A2B10G10R10_UINT_PACK32, ISL_FORMAT_R10G10B10A2_UINT),
fmt(VK_FORMAT_A2B10G10R10_SINT_PACK32, ISL_FORMAT_R10G10B10A2_SINT),
fmt(VK_FORMAT_R16_UNORM, ISL_FORMAT_R16_UNORM),
fmt(VK_FORMAT_R16_SNORM, ISL_FORMAT_R16_SNORM),
fmt(VK_FORMAT_R16_USCALED, ISL_FORMAT_R16_USCALED),
fmt(VK_FORMAT_R16_SSCALED, ISL_FORMAT_R16_SSCALED),
fmt(VK_FORMAT_R16_UINT, ISL_FORMAT_R16_UINT),
fmt(VK_FORMAT_R16_SINT, ISL_FORMAT_R16_SINT),
fmt(VK_FORMAT_R16_SFLOAT, ISL_FORMAT_R16_FLOAT),
fmt(VK_FORMAT_R16G16_UNORM, ISL_FORMAT_R16G16_UNORM),
fmt(VK_FORMAT_R16G16_SNORM, ISL_FORMAT_R16G16_SNORM),
fmt(VK_FORMAT_R16G16_USCALED, ISL_FORMAT_R16G16_USCALED),
fmt(VK_FORMAT_R16G16_SSCALED, ISL_FORMAT_R16G16_SSCALED),
fmt(VK_FORMAT_R16G16_UINT, ISL_FORMAT_R16G16_UINT),
fmt(VK_FORMAT_R16G16_SINT, ISL_FORMAT_R16G16_SINT),
fmt(VK_FORMAT_R16G16_SFLOAT, ISL_FORMAT_R16G16_FLOAT),
fmt(VK_FORMAT_R16G16B16_UNORM, ISL_FORMAT_R16G16B16_UNORM),
fmt(VK_FORMAT_R16G16B16_SNORM, ISL_FORMAT_R16G16B16_SNORM),
fmt(VK_FORMAT_R16G16B16_USCALED, ISL_FORMAT_R16G16B16_USCALED),
fmt(VK_FORMAT_R16G16B16_SSCALED, ISL_FORMAT_R16G16B16_SSCALED),
fmt(VK_FORMAT_R16G16B16_UINT, ISL_FORMAT_R16G16B16_UINT),
fmt(VK_FORMAT_R16G16B16_SINT, ISL_FORMAT_R16G16B16_SINT),
fmt(VK_FORMAT_R16G16B16_SFLOAT, ISL_FORMAT_R16G16B16_FLOAT),
fmt(VK_FORMAT_R16G16B16A16_UNORM, ISL_FORMAT_R16G16B16A16_UNORM),
fmt(VK_FORMAT_R16G16B16A16_SNORM, ISL_FORMAT_R16G16B16A16_SNORM),
fmt(VK_FORMAT_R16G16B16A16_USCALED, ISL_FORMAT_R16G16B16A16_USCALED),
fmt(VK_FORMAT_R16G16B16A16_SSCALED, ISL_FORMAT_R16G16B16A16_SSCALED),
fmt(VK_FORMAT_R16G16B16A16_UINT, ISL_FORMAT_R16G16B16A16_UINT),
fmt(VK_FORMAT_R16G16B16A16_SINT, ISL_FORMAT_R16G16B16A16_SINT),
fmt(VK_FORMAT_R16G16B16A16_SFLOAT, ISL_FORMAT_R16G16B16A16_FLOAT),
fmt(VK_FORMAT_R32_UINT, ISL_FORMAT_R32_UINT,),
fmt(VK_FORMAT_R32_SINT, ISL_FORMAT_R32_SINT,),
fmt(VK_FORMAT_R32_SFLOAT, ISL_FORMAT_R32_FLOAT,),
fmt(VK_FORMAT_R32G32_UINT, ISL_FORMAT_R32G32_UINT,),
fmt(VK_FORMAT_R32G32_SINT, ISL_FORMAT_R32G32_SINT,),
fmt(VK_FORMAT_R32G32_SFLOAT, ISL_FORMAT_R32G32_FLOAT,),
fmt(VK_FORMAT_R32G32B32_UINT, ISL_FORMAT_R32G32B32_UINT,),
fmt(VK_FORMAT_R32G32B32_SINT, ISL_FORMAT_R32G32B32_SINT,),
fmt(VK_FORMAT_R32G32B32_SFLOAT, ISL_FORMAT_R32G32B32_FLOAT,),
fmt(VK_FORMAT_R32G32B32A32_UINT, ISL_FORMAT_R32G32B32A32_UINT,),
fmt(VK_FORMAT_R32G32B32A32_SINT, ISL_FORMAT_R32G32B32A32_SINT,),
fmt(VK_FORMAT_R32G32B32A32_SFLOAT, ISL_FORMAT_R32G32B32A32_FLOAT,),
fmt(VK_FORMAT_R64_UINT, ISL_FORMAT_R64_PASSTHRU),
fmt(VK_FORMAT_R64_SINT, ISL_FORMAT_R64_PASSTHRU),
fmt(VK_FORMAT_R64_SFLOAT, ISL_FORMAT_R64_FLOAT),
fmt(VK_FORMAT_R64G64_UINT, ISL_FORMAT_R64G64_PASSTHRU),
fmt(VK_FORMAT_R64G64_SINT, ISL_FORMAT_R64G64_PASSTHRU),
fmt(VK_FORMAT_R64G64_SFLOAT, ISL_FORMAT_R64G64_FLOAT),
fmt(VK_FORMAT_R64G64B64_UINT, ISL_FORMAT_R64G64B64_PASSTHRU),
fmt(VK_FORMAT_R64G64B64_SINT, ISL_FORMAT_R64G64B64_PASSTHRU),
fmt(VK_FORMAT_R64G64B64_SFLOAT, ISL_FORMAT_R64G64B64_FLOAT),
fmt(VK_FORMAT_R64G64B64A64_UINT, ISL_FORMAT_R64G64B64A64_PASSTHRU),
fmt(VK_FORMAT_R64G64B64A64_SINT, ISL_FORMAT_R64G64B64A64_PASSTHRU),
fmt(VK_FORMAT_R64G64B64A64_SFLOAT, ISL_FORMAT_R64G64B64A64_FLOAT),
fmt(VK_FORMAT_B10G11R11_UFLOAT_PACK32, ISL_FORMAT_R11G11B10_FLOAT),
fmt(VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, ISL_FORMAT_R9G9B9E5_SHAREDEXP),
fmt(VK_FORMAT_D16_UNORM, ISL_FORMAT_R16_UNORM, .has_depth = true),
fmt(VK_FORMAT_X8_D24_UNORM_PACK32, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true),
fmt(VK_FORMAT_D32_SFLOAT, ISL_FORMAT_R32_FLOAT, .has_depth = true),
fmt(VK_FORMAT_S8_UINT, ISL_FORMAT_R8_UINT, .has_stencil = true),
fmt(VK_FORMAT_D16_UNORM_S8_UINT, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS, .has_depth = true, .has_stencil = true),
fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT, .has_depth = true, .has_stencil = true),
fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB),
fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB),
fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM),
fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB),
fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM),
fmt(VK_FORMAT_BC2_SRGB_BLOCK, ISL_FORMAT_BC2_UNORM_SRGB),
fmt(VK_FORMAT_BC3_UNORM_BLOCK, ISL_FORMAT_BC3_UNORM),
fmt(VK_FORMAT_BC3_SRGB_BLOCK, ISL_FORMAT_BC3_UNORM_SRGB),
fmt(VK_FORMAT_BC4_UNORM_BLOCK, ISL_FORMAT_BC4_UNORM),
fmt(VK_FORMAT_BC4_SNORM_BLOCK, ISL_FORMAT_BC4_SNORM),
fmt(VK_FORMAT_BC5_UNORM_BLOCK, ISL_FORMAT_BC5_UNORM),
fmt(VK_FORMAT_BC5_SNORM_BLOCK, ISL_FORMAT_BC5_SNORM),
fmt(VK_FORMAT_BC6H_UFLOAT_BLOCK, ISL_FORMAT_BC6H_UF16),
fmt(VK_FORMAT_BC6H_SFLOAT_BLOCK, ISL_FORMAT_BC6H_SF16),
fmt(VK_FORMAT_BC7_UNORM_BLOCK, ISL_FORMAT_BC7_UNORM),
fmt(VK_FORMAT_BC7_SRGB_BLOCK, ISL_FORMAT_BC7_UNORM_SRGB),
fmt(VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8),
fmt(VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8),
fmt(VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, ISL_FORMAT_ETC2_RGB8_PTA),
fmt(VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, ISL_FORMAT_ETC2_SRGB8_PTA),
fmt(VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, ISL_FORMAT_ETC2_EAC_RGBA8),
fmt(VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, ISL_FORMAT_ETC2_EAC_SRGB8_A8),
fmt(VK_FORMAT_EAC_R11_UNORM_BLOCK, ISL_FORMAT_EAC_R11),
fmt(VK_FORMAT_EAC_R11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_R11),
fmt(VK_FORMAT_EAC_R11G11_UNORM_BLOCK, ISL_FORMAT_EAC_RG11),
fmt(VK_FORMAT_EAC_R11G11_SNORM_BLOCK, ISL_FORMAT_EAC_SIGNED_RG11),
fmt(VK_FORMAT_ASTC_4x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_4x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_5x4_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_5x4_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_5x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_5x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_6x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_6x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_6x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_6x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_8x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_8x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_8x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_8x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_8x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_8x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_10x5_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_10x5_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_10x6_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_10x6_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_10x8_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_10x8_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_10x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_10x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_12x10_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_12x10_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_12x12_UNORM_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_ASTC_12x12_SRGB_BLOCK, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8_UNORM, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8_SNORM, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8_USCALED, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8_SSCALED, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8_UINT, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8_SINT, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8_SRGB, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8A8_UNORM, ISL_FORMAT_B8G8R8A8_UNORM),
fmt(VK_FORMAT_B8G8R8A8_SNORM, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8A8_USCALED, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8A8_SSCALED, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8A8_UINT, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8A8_SINT, ISL_FORMAT_UNSUPPORTED),
fmt(VK_FORMAT_B8G8R8A8_SRGB, ISL_FORMAT_B8G8R8A8_UNORM_SRGB),
};
#undef fmt
const struct anv_format *
anv_format_for_vk_format(VkFormat format)
{
return &anv_formats[format];
}
/**
* Exactly one bit must be set in \a aspect.
*/
enum isl_format
anv_get_isl_format(VkFormat format, VkImageAspectFlags aspect,
VkImageTiling tiling, struct anv_format_swizzle *swizzle)
{
const struct anv_format *anv_fmt = &anv_formats[format];
if (swizzle)
*swizzle = anv_fmt->swizzle;
switch (aspect) {
case VK_IMAGE_ASPECT_COLOR_BIT:
if (anv_fmt->isl_format == ISL_FORMAT_UNSUPPORTED) {
return ISL_FORMAT_UNSUPPORTED;
} else if (tiling == VK_IMAGE_TILING_OPTIMAL &&
!util_is_power_of_two(anv_fmt->isl_layout->bs)) {
/* Tiled formats *must* be power-of-two because we need up upload
* them with the render pipeline. For 3-channel formats, we fix
* this by switching them over to RGBX or RGBA formats under the
* hood.
*/
enum isl_format rgbx = isl_format_rgb_to_rgbx(anv_fmt->isl_format);
if (rgbx != ISL_FORMAT_UNSUPPORTED)
return rgbx;
else
return isl_format_rgb_to_rgba(anv_fmt->isl_format);
} else {
return anv_fmt->isl_format;
}
case VK_IMAGE_ASPECT_DEPTH_BIT:
case (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT):
assert(anv_fmt->has_depth);
return anv_fmt->isl_format;
case VK_IMAGE_ASPECT_STENCIL_BIT:
assert(anv_fmt->has_stencil);
return ISL_FORMAT_R8_UINT;
default:
unreachable("bad VkImageAspect");
return ISL_FORMAT_UNSUPPORTED;
}
}
// Format capabilities
void anv_validate_GetPhysicalDeviceFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat _format,
VkFormatProperties* pFormatProperties)
{
const struct anv_format *format = anv_format_for_vk_format(_format);
fprintf(stderr, "vkGetFormatProperties(%s)\n", format->name);
anv_GetPhysicalDeviceFormatProperties(physicalDevice, _format, pFormatProperties);
}
static VkFormatFeatureFlags
get_image_format_properties(int gen, enum isl_format base,
enum isl_format actual,
struct anv_format_swizzle swizzle)
{
const struct brw_surface_format_info *info = &surface_formats[actual];
if (actual == ISL_FORMAT_UNSUPPORTED || !info->exists)
return 0;
VkFormatFeatureFlags flags = 0;
if (info->sampling <= gen) {
flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
VK_FORMAT_FEATURE_BLIT_SRC_BIT;
if (info->filtering <= gen)
flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
}
/* We can render to swizzled formats. However, if the alpha channel is
* moved, then blending won't work correctly. The PRM tells us
* straight-up not to render to such a surface.
*/
if (info->render_target <= gen && swizzle.a == 3) {
flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT;
}
if (info->alpha_blend <= gen && swizzle.a == 3)
flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
/* Load/store is determined based on base format. This prevents RGB
* formats from showing up as load/store capable.
*/
if (isl_is_storage_image_format(base))
flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
if (base == ISL_FORMAT_R32_SINT || base == ISL_FORMAT_R32_UINT)
flags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
return flags;
}
static VkFormatFeatureFlags
get_buffer_format_properties(int gen, enum isl_format format)
{
const struct brw_surface_format_info *info = &surface_formats[format];
if (format == ISL_FORMAT_UNSUPPORTED || !info->exists)
return 0;
VkFormatFeatureFlags flags = 0;
if (info->sampling <= gen && !isl_format_is_compressed(format))
flags |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
if (info->input_vb <= gen)
flags |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
if (isl_is_storage_image_format(format))
flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
if (format == ISL_FORMAT_R32_SINT || format == ISL_FORMAT_R32_UINT)
flags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
return flags;
}
static void
anv_physical_device_get_format_properties(struct anv_physical_device *physical_device,
VkFormat format,
VkFormatProperties *out_properties)
{
int gen = physical_device->info->gen * 10;
if (physical_device->info->is_haswell)
gen += 5;
VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
if (anv_format_is_depth_or_stencil(&anv_formats[format])) {
tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
if (physical_device->info->gen >= 8)
tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
VK_FORMAT_FEATURE_BLIT_DST_BIT;
} else {
enum isl_format linear_fmt, tiled_fmt;
struct anv_format_swizzle linear_swizzle, tiled_swizzle;
linear_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_TILING_LINEAR, &linear_swizzle);
tiled_fmt = anv_get_isl_format(format, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_TILING_OPTIMAL, &tiled_swizzle);
linear = get_image_format_properties(gen, linear_fmt, linear_fmt,
linear_swizzle);
tiled = get_image_format_properties(gen, linear_fmt, tiled_fmt,
tiled_swizzle);
buffer = get_buffer_format_properties(gen, linear_fmt);
/* XXX: We handle 3-channel formats by switching them out for RGBX or
* RGBA formats behind-the-scenes. This works fine for textures
* because the upload process will fill in the extra channel.
* We could also support it for render targets, but it will take
* substantially more work and we have enough RGBX formats to handle
* what most clients will want.
*/
if (linear_fmt != ISL_FORMAT_UNSUPPORTED &&
!util_is_power_of_two(isl_format_layouts[linear_fmt].bs) &&
isl_format_rgb_to_rgbx(linear_fmt) == ISL_FORMAT_UNSUPPORTED) {
tiled &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT &
~VK_FORMAT_FEATURE_BLIT_DST_BIT;
}
}
out_properties->linearTilingFeatures = linear;
out_properties->optimalTilingFeatures = tiled;
out_properties->bufferFeatures = buffer;
return;
}
void anv_GetPhysicalDeviceFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
VkFormatProperties* pFormatProperties)
{
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
anv_physical_device_get_format_properties(
physical_device,
format,
pFormatProperties);
}
VkResult anv_GetPhysicalDeviceImageFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
VkImageType type,
VkImageTiling tiling,
VkImageUsageFlags usage,
VkImageCreateFlags createFlags,
VkImageFormatProperties* pImageFormatProperties)
{
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
VkFormatProperties format_props;
VkFormatFeatureFlags format_feature_flags;
VkExtent3D maxExtent;
uint32_t maxMipLevels;
uint32_t maxArraySize;
VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
anv_physical_device_get_format_properties(physical_device, format,
&format_props);
/* Extract the VkFormatFeatureFlags that are relevant for the queried
* tiling.
*/
if (tiling == VK_IMAGE_TILING_LINEAR) {
format_feature_flags = format_props.linearTilingFeatures;
} else if (tiling == VK_IMAGE_TILING_OPTIMAL) {
format_feature_flags = format_props.optimalTilingFeatures;
} else {
unreachable("bad VkImageTiling");
}
switch (type) {
default:
unreachable("bad VkImageType");
case VK_IMAGE_TYPE_1D:
maxExtent.width = 16384;
maxExtent.height = 1;
maxExtent.depth = 1;
maxMipLevels = 15; /* log2(maxWidth) + 1 */
maxArraySize = 2048;
sampleCounts = VK_SAMPLE_COUNT_1_BIT;
break;
case VK_IMAGE_TYPE_2D:
/* FINISHME: Does this really differ for cube maps? The documentation
* for RENDER_SURFACE_STATE suggests so.
*/
maxExtent.width = 16384;
maxExtent.height = 16384;
maxExtent.depth = 1;
maxMipLevels = 15; /* log2(maxWidth) + 1 */
maxArraySize = 2048;
break;
case VK_IMAGE_TYPE_3D:
maxExtent.width = 2048;
maxExtent.height = 2048;
maxExtent.depth = 2048;
maxMipLevels = 12; /* log2(maxWidth) + 1 */
maxArraySize = 1;
break;
}
if (tiling == VK_IMAGE_TILING_OPTIMAL &&
type == VK_IMAGE_TYPE_2D &&
(format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
!(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
!(usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
sampleCounts = isl_device_get_sample_counts(&physical_device->isl_dev);
}
if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
/* Meta implements transfers by sampling from the source image. */
if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
goto unsupported;
}
}
#if 0
if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
if (anv_format_for_vk_format(format)->has_stencil) {
/* Not yet implemented because copying to a W-tiled surface is crazy
* hard.
*/
anv_finishme("support VK_IMAGE_USAGE_TRANSFER_DST_BIT for "
"stencil format");
goto unsupported;
}
}
#endif
if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
goto unsupported;
}
}
if (usage & VK_IMAGE_USAGE_STORAGE_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
goto unsupported;
}
}
if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
goto unsupported;
}
}
if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
goto unsupported;
}
}
if (usage & VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT) {
/* Nothing to check. */
}
if (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
/* Ignore this flag because it was removed from the
* provisional_I_20150910 header.
*/
}
*pImageFormatProperties = (VkImageFormatProperties) {
.maxExtent = maxExtent,
.maxMipLevels = maxMipLevels,
.maxArrayLayers = maxArraySize,
.sampleCounts = sampleCounts,
/* FINISHME: Accurately calculate
* VkImageFormatProperties::maxResourceSize.
*/
.maxResourceSize = UINT32_MAX,
};
return VK_SUCCESS;
unsupported:
*pImageFormatProperties = (VkImageFormatProperties) {
.maxExtent = { 0, 0, 0 },
.maxMipLevels = 0,
.maxArrayLayers = 0,
.sampleCounts = 0,
.maxResourceSize = 0,
};
return VK_SUCCESS;
}
void anv_GetPhysicalDeviceSparseImageFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
VkImageType type,
uint32_t samples,
VkImageUsageFlags usage,
VkImageTiling tiling,
uint32_t* pNumProperties,
VkSparseImageFormatProperties* pProperties)
{
/* Sparse images are not yet supported. */
*pNumProperties = 0;
}

335
src/intel/vulkan/anv_gem.c Normal file
View file

@ -0,0 +1,335 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#define _DEFAULT_SOURCE
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
static int
anv_ioctl(int fd, unsigned long request, void *arg)
{
int ret;
do {
ret = ioctl(fd, request, arg);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
return ret;
}
/**
* Wrapper around DRM_IOCTL_I915_GEM_CREATE.
*
* Return gem handle, or 0 on failure. Gem handles are never 0.
*/
uint32_t
anv_gem_create(struct anv_device *device, size_t size)
{
struct drm_i915_gem_create gem_create = {
.size = size,
};
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
if (ret != 0) {
/* FIXME: What do we do if this fails? */
return 0;
}
return gem_create.handle;
}
void
anv_gem_close(struct anv_device *device, uint32_t gem_handle)
{
struct drm_gem_close close = {
.handle = gem_handle,
};
anv_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close);
}
/**
* Wrapper around DRM_IOCTL_I915_GEM_MMAP.
*/
void*
anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
uint64_t offset, uint64_t size, uint32_t flags)
{
struct drm_i915_gem_mmap gem_mmap = {
.handle = gem_handle,
.offset = offset,
.size = size,
.flags = flags,
};
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap);
if (ret != 0) {
/* FIXME: Is NULL the right error return? Cf MAP_INVALID */
return NULL;
}
VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1));
return (void *)(uintptr_t) gem_mmap.addr_ptr;
}
/* This is just a wrapper around munmap, but it also notifies valgrind that
* this map is no longer valid. Pair this with anv_gem_mmap().
*/
void
anv_gem_munmap(void *p, uint64_t size)
{
VG(VALGRIND_FREELIKE_BLOCK(p, 0));
munmap(p, size);
}
uint32_t
anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
{
struct drm_i915_gem_userptr userptr = {
.user_ptr = (__u64)((unsigned long) mem),
.user_size = size,
.flags = 0,
};
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
if (ret == -1)
return 0;
return userptr.handle;
}
int
anv_gem_set_caching(struct anv_device *device,
uint32_t gem_handle, uint32_t caching)
{
struct drm_i915_gem_caching gem_caching = {
.handle = gem_handle,
.caching = caching,
};
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &gem_caching);
}
int
anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
uint32_t read_domains, uint32_t write_domain)
{
struct drm_i915_gem_set_domain gem_set_domain = {
.handle = gem_handle,
.read_domains = read_domains,
.write_domain = write_domain,
};
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &gem_set_domain);
}
/**
* On error, \a timeout_ns holds the remaining time.
*/
int
anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
{
struct drm_i915_gem_wait wait = {
.bo_handle = gem_handle,
.timeout_ns = *timeout_ns,
.flags = 0,
};
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
*timeout_ns = wait.timeout_ns;
return ret;
}
int
anv_gem_execbuffer(struct anv_device *device,
struct drm_i915_gem_execbuffer2 *execbuf)
{
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
}
int
anv_gem_set_tiling(struct anv_device *device,
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
{
int ret;
/* set_tiling overwrites the input on the error path, so we have to open
* code anv_ioctl.
*/
do {
struct drm_i915_gem_set_tiling set_tiling = {
.handle = gem_handle,
.tiling_mode = tiling,
.stride = stride,
};
ret = ioctl(device->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
return ret;
}
int
anv_gem_get_param(int fd, uint32_t param)
{
int tmp;
drm_i915_getparam_t gp = {
.param = param,
.value = &tmp,
};
int ret = anv_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
if (ret == 0)
return tmp;
return 0;
}
bool
anv_gem_get_bit6_swizzle(int fd, uint32_t tiling)
{
struct drm_gem_close close;
int ret;
struct drm_i915_gem_create gem_create = {
.size = 4096,
};
if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create)) {
assert(!"Failed to create GEM BO");
return false;
}
bool swizzled = false;
/* set_tiling overwrites the input on the error path, so we have to open
* code anv_ioctl.
*/
do {
struct drm_i915_gem_set_tiling set_tiling = {
.handle = gem_create.handle,
.tiling_mode = tiling,
.stride = tiling == I915_TILING_X ? 512 : 128,
};
ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
if (ret != 0) {
assert(!"Failed to set BO tiling");
goto close_and_return;
}
struct drm_i915_gem_get_tiling get_tiling = {
.handle = gem_create.handle,
};
if (anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) {
assert(!"Failed to get BO tiling");
goto close_and_return;
}
swizzled = get_tiling.swizzle_mode != I915_BIT_6_SWIZZLE_NONE;
close_and_return:
memset(&close, 0, sizeof(close));
close.handle = gem_create.handle;
anv_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
return swizzled;
}
int
anv_gem_create_context(struct anv_device *device)
{
struct drm_i915_gem_context_create create = { 0 };
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
if (ret == -1)
return -1;
return create.ctx_id;
}
int
anv_gem_destroy_context(struct anv_device *device, int context)
{
struct drm_i915_gem_context_destroy destroy = {
.ctx_id = context,
};
return anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
}
int
anv_gem_get_aperture(int fd, uint64_t *size)
{
struct drm_i915_gem_get_aperture aperture = { 0 };
int ret = anv_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
if (ret == -1)
return -1;
*size = aperture.aper_available_size;
return 0;
}
int
anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
{
struct drm_prime_handle args = {
.handle = gem_handle,
.flags = DRM_CLOEXEC,
};
int ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
if (ret == -1)
return -1;
return args.fd;
}
uint32_t
anv_gem_fd_to_handle(struct anv_device *device, int fd)
{
struct drm_prime_handle args = {
.fd = fd,
};
int ret = anv_ioctl(device->fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
if (ret == -1)
return 0;
return args.handle;
}

View file

@ -0,0 +1,159 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#define _DEFAULT_SOURCE
#include <linux/memfd.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include "anv_private.h"
static inline int
memfd_create(const char *name, unsigned int flags)
{
return syscall(SYS_memfd_create, name, flags);
}
uint32_t
anv_gem_create(struct anv_device *device, size_t size)
{
int fd = memfd_create("fake bo", MFD_CLOEXEC);
if (fd == -1)
return 0;
assert(fd != 0);
if (ftruncate(fd, size) == -1)
return 0;
return fd;
}
void
anv_gem_close(struct anv_device *device, uint32_t gem_handle)
{
close(gem_handle);
}
void*
anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
uint64_t offset, uint64_t size, uint32_t flags)
{
/* Ignore flags, as they're specific to I915_GEM_MMAP. */
(void) flags;
return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
gem_handle, offset);
}
/* This is just a wrapper around munmap, but it also notifies valgrind that
* this map is no longer valid. Pair this with anv_gem_mmap().
*/
void
anv_gem_munmap(void *p, uint64_t size)
{
munmap(p, size);
}
uint32_t
anv_gem_userptr(struct anv_device *device, void *mem, size_t size)
{
return -1;
}
int
anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns)
{
return 0;
}
int
anv_gem_execbuffer(struct anv_device *device,
struct drm_i915_gem_execbuffer2 *execbuf)
{
return 0;
}
int
anv_gem_set_tiling(struct anv_device *device,
uint32_t gem_handle, uint32_t stride, uint32_t tiling)
{
return 0;
}
int
anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle,
uint32_t caching)
{
return 0;
}
int
anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
uint32_t read_domains, uint32_t write_domain)
{
return 0;
}
int
anv_gem_get_param(int fd, uint32_t param)
{
unreachable("Unused");
}
bool
anv_gem_get_bit6_swizzle(int fd, uint32_t tiling)
{
unreachable("Unused");
}
int
anv_gem_create_context(struct anv_device *device)
{
unreachable("Unused");
}
int
anv_gem_destroy_context(struct anv_device *device, int context)
{
unreachable("Unused");
}
int
anv_gem_get_aperture(int fd, uint64_t *size)
{
unreachable("Unused");
}
int
anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle)
{
unreachable("Unused");
}
uint32_t
anv_gem_fd_to_handle(struct anv_device *device, int fd)
{
unreachable("Unused");
}

View file

@ -0,0 +1,66 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/*
* Gen-specific function declarations. This header must *not* be included
* directly. Instead, it is included multiple times by gen8_private.h.
*
* In this header file, the usual genx() macro is available.
*/
VkResult genX(init_device_state)(struct anv_device *device);
void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
struct anv_state
genX(cmd_buffer_alloc_null_surface_state)(struct anv_cmd_buffer *cmd_buffer,
struct anv_framebuffer *fb);
void genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer,
struct anv_subpass *subpass);
void genX(flush_pipeline_select_3d)(struct anv_cmd_buffer *cmd_buffer);
void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
bool enable_slm);
void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer);
VkResult
genX(graphics_pipeline_create)(VkDevice _device,
struct anv_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct anv_graphics_pipeline_create_info *extra,
const VkAllocationCallbacks *alloc,
VkPipeline *pPipeline);
VkResult
genX(compute_pipeline_create)(VkDevice _device,
struct anv_pipeline_cache *cache,
const VkComputePipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *alloc,
VkPipeline *pPipeline);

View file

@ -0,0 +1,787 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
/**
* Exactly one bit must be set in \a aspect.
*/
static isl_surf_usage_flags_t
choose_isl_surf_usage(VkImageUsageFlags vk_usage,
VkImageAspectFlags aspect)
{
isl_surf_usage_flags_t isl_usage = 0;
/* FINISHME: Support aux surfaces */
isl_usage |= ISL_SURF_USAGE_DISABLE_AUX_BIT;
if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT;
if (vk_usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)
isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT;
if (vk_usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)
isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
if (vk_usage & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)
isl_usage |= ISL_SURF_USAGE_CUBE_BIT;
if (vk_usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
switch (aspect) {
default:
unreachable("bad VkImageAspect");
case VK_IMAGE_ASPECT_DEPTH_BIT:
isl_usage |= ISL_SURF_USAGE_DEPTH_BIT;
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
isl_usage |= ISL_SURF_USAGE_STENCIL_BIT;
break;
}
}
if (vk_usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
/* Meta implements transfers by sampling from the source image. */
isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT;
}
if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
/* Meta implements transfers by rendering into the destination image. */
isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
}
return isl_usage;
}
/**
* Exactly one bit must be set in \a aspect.
*/
static struct anv_surface *
get_surface(struct anv_image *image, VkImageAspectFlags aspect)
{
switch (aspect) {
default:
unreachable("bad VkImageAspect");
case VK_IMAGE_ASPECT_COLOR_BIT:
return &image->color_surface;
case VK_IMAGE_ASPECT_DEPTH_BIT:
return &image->depth_surface;
case VK_IMAGE_ASPECT_STENCIL_BIT:
return &image->stencil_surface;
}
}
/**
* Initialize the anv_image::*_surface selected by \a aspect. Then update the
* image's memory requirements (that is, the image's size and alignment).
*
* Exactly one bit must be set in \a aspect.
*/
static VkResult
make_surface(const struct anv_device *dev,
struct anv_image *image,
const struct anv_image_create_info *anv_info,
VkImageAspectFlags aspect)
{
const VkImageCreateInfo *vk_info = anv_info->vk_info;
bool ok UNUSED;
static const enum isl_surf_dim vk_to_isl_surf_dim[] = {
[VK_IMAGE_TYPE_1D] = ISL_SURF_DIM_1D,
[VK_IMAGE_TYPE_2D] = ISL_SURF_DIM_2D,
[VK_IMAGE_TYPE_3D] = ISL_SURF_DIM_3D,
};
isl_tiling_flags_t tiling_flags = anv_info->isl_tiling_flags;
if (vk_info->tiling == VK_IMAGE_TILING_LINEAR)
tiling_flags = ISL_TILING_LINEAR_BIT;
struct anv_surface *anv_surf = get_surface(image, aspect);
image->extent = anv_sanitize_image_extent(vk_info->imageType,
vk_info->extent);
ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl,
.dim = vk_to_isl_surf_dim[vk_info->imageType],
.format = anv_get_isl_format(vk_info->format, aspect,
vk_info->tiling, NULL),
.width = image->extent.width,
.height = image->extent.height,
.depth = image->extent.depth,
.levels = vk_info->mipLevels,
.array_len = vk_info->arrayLayers,
.samples = vk_info->samples,
.min_alignment = 0,
.min_pitch = anv_info->stride,
.usage = choose_isl_surf_usage(image->usage, aspect),
.tiling_flags = tiling_flags);
/* isl_surf_init() will fail only if provided invalid input. Invalid input
* is illegal in Vulkan.
*/
assert(ok);
anv_surf->offset = align_u32(image->size, anv_surf->isl.alignment);
image->size = anv_surf->offset + anv_surf->isl.size;
image->alignment = MAX(image->alignment, anv_surf->isl.alignment);
return VK_SUCCESS;
}
/**
* Parameter @a format is required and overrides VkImageCreateInfo::format.
*/
static VkImageUsageFlags
anv_image_get_full_usage(const VkImageCreateInfo *info,
const struct anv_format *format)
{
VkImageUsageFlags usage = info->usage;
if (info->samples > 1 &&
(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
/* Meta will resolve the image by binding it as a texture. */
usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
}
if (usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
/* Meta will transfer from the image by binding it as a texture. */
usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
}
if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
/* For non-clear transfer operations, meta will transfer to the image by
* binding it as a color attachment, even if the image format is not
* a color format.
*/
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
if (anv_format_is_depth_or_stencil(format)) {
/* vkCmdClearDepthStencilImage() only requires that
* VK_IMAGE_USAGE_TRANSFER_SRC_BIT be set. In particular, it does
* not require VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT. Meta
* clears the image, though, by binding it as a depthstencil
* attachment.
*/
usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
}
}
return usage;
}
VkResult
anv_image_create(VkDevice _device,
const struct anv_image_create_info *create_info,
const VkAllocationCallbacks* alloc,
VkImage *pImage)
{
ANV_FROM_HANDLE(anv_device, device, _device);
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
struct anv_image *image = NULL;
const struct anv_format *format = anv_format_for_vk_format(pCreateInfo->format);
VkResult r;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
anv_assert(pCreateInfo->mipLevels > 0);
anv_assert(pCreateInfo->arrayLayers > 0);
anv_assert(pCreateInfo->samples > 0);
anv_assert(pCreateInfo->extent.width > 0);
anv_assert(pCreateInfo->extent.height > 0);
anv_assert(pCreateInfo->extent.depth > 0);
image = anv_alloc2(&device->alloc, alloc, sizeof(*image), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!image)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
memset(image, 0, sizeof(*image));
image->type = pCreateInfo->imageType;
image->extent = pCreateInfo->extent;
image->vk_format = pCreateInfo->format;
image->format = format;
image->levels = pCreateInfo->mipLevels;
image->array_size = pCreateInfo->arrayLayers;
image->samples = pCreateInfo->samples;
image->usage = anv_image_get_full_usage(pCreateInfo, format);
image->tiling = pCreateInfo->tiling;
if (likely(anv_format_is_color(format))) {
r = make_surface(device, image, create_info,
VK_IMAGE_ASPECT_COLOR_BIT);
if (r != VK_SUCCESS)
goto fail;
} else {
if (image->format->has_depth) {
r = make_surface(device, image, create_info,
VK_IMAGE_ASPECT_DEPTH_BIT);
if (r != VK_SUCCESS)
goto fail;
}
if (image->format->has_stencil) {
r = make_surface(device, image, create_info,
VK_IMAGE_ASPECT_STENCIL_BIT);
if (r != VK_SUCCESS)
goto fail;
}
}
*pImage = anv_image_to_handle(image);
return VK_SUCCESS;
fail:
if (image)
anv_free2(&device->alloc, alloc, image);
return r;
}
VkResult
anv_CreateImage(VkDevice device,
const VkImageCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkImage *pImage)
{
return anv_image_create(device,
&(struct anv_image_create_info) {
.vk_info = pCreateInfo,
.isl_tiling_flags = ISL_TILING_ANY_MASK,
},
pAllocator,
pImage);
}
void
anv_DestroyImage(VkDevice _device, VkImage _image,
const VkAllocationCallbacks *pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
anv_free2(&device->alloc, pAllocator, anv_image_from_handle(_image));
}
static void
anv_surface_get_subresource_layout(struct anv_image *image,
struct anv_surface *surface,
const VkImageSubresource *subresource,
VkSubresourceLayout *layout)
{
/* If we are on a non-zero mip level or array slice, we need to
* calculate a real offset.
*/
anv_assert(subresource->mipLevel == 0);
anv_assert(subresource->arrayLayer == 0);
layout->offset = surface->offset;
layout->rowPitch = surface->isl.row_pitch;
layout->depthPitch = isl_surf_get_array_pitch(&surface->isl);
layout->arrayPitch = isl_surf_get_array_pitch(&surface->isl);
layout->size = surface->isl.size;
}
void anv_GetImageSubresourceLayout(
VkDevice device,
VkImage _image,
const VkImageSubresource* pSubresource,
VkSubresourceLayout* pLayout)
{
ANV_FROM_HANDLE(anv_image, image, _image);
assert(__builtin_popcount(pSubresource->aspectMask) == 1);
switch (pSubresource->aspectMask) {
case VK_IMAGE_ASPECT_COLOR_BIT:
anv_surface_get_subresource_layout(image, &image->color_surface,
pSubresource, pLayout);
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
anv_surface_get_subresource_layout(image, &image->depth_surface,
pSubresource, pLayout);
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
anv_surface_get_subresource_layout(image, &image->stencil_surface,
pSubresource, pLayout);
break;
default:
assert(!"Invalid image aspect");
}
}
VkResult
anv_validate_CreateImageView(VkDevice _device,
const VkImageViewCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkImageView *pView)
{
ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image);
const VkImageSubresourceRange *subresource;
const struct anv_format *view_format_info;
/* Validate structure type before dereferencing it. */
assert(pCreateInfo);
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO);
subresource = &pCreateInfo->subresourceRange;
/* Validate viewType is in range before using it. */
assert(pCreateInfo->viewType >= VK_IMAGE_VIEW_TYPE_BEGIN_RANGE);
assert(pCreateInfo->viewType <= VK_IMAGE_VIEW_TYPE_END_RANGE);
/* Validate format is in range before using it. */
assert(pCreateInfo->format >= VK_FORMAT_BEGIN_RANGE);
assert(pCreateInfo->format <= VK_FORMAT_END_RANGE);
view_format_info = anv_format_for_vk_format(pCreateInfo->format);
/* Validate channel swizzles. */
assert(pCreateInfo->components.r >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
assert(pCreateInfo->components.r <= VK_COMPONENT_SWIZZLE_END_RANGE);
assert(pCreateInfo->components.g >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
assert(pCreateInfo->components.g <= VK_COMPONENT_SWIZZLE_END_RANGE);
assert(pCreateInfo->components.b >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
assert(pCreateInfo->components.b <= VK_COMPONENT_SWIZZLE_END_RANGE);
assert(pCreateInfo->components.a >= VK_COMPONENT_SWIZZLE_BEGIN_RANGE);
assert(pCreateInfo->components.a <= VK_COMPONENT_SWIZZLE_END_RANGE);
/* Validate subresource. */
assert(subresource->aspectMask != 0);
assert(subresource->levelCount > 0);
assert(subresource->layerCount > 0);
assert(subresource->baseMipLevel < image->levels);
assert(subresource->baseMipLevel + anv_get_levelCount(image, subresource) <= image->levels);
assert(subresource->baseArrayLayer < image->array_size);
assert(subresource->baseArrayLayer + anv_get_layerCount(image, subresource) <= image->array_size);
assert(pView);
const VkImageAspectFlags ds_flags = VK_IMAGE_ASPECT_DEPTH_BIT
| VK_IMAGE_ASPECT_STENCIL_BIT;
/* Validate format. */
if (subresource->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
assert(subresource->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(!image->format->has_depth);
assert(!image->format->has_stencil);
assert(!view_format_info->has_depth);
assert(!view_format_info->has_stencil);
assert(view_format_info->isl_layout->bs ==
image->format->isl_layout->bs);
} else if (subresource->aspectMask & ds_flags) {
assert((subresource->aspectMask & ~ds_flags) == 0);
if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
assert(image->format->has_depth);
assert(view_format_info->has_depth);
assert(view_format_info->isl_layout->bs ==
image->format->isl_layout->bs);
}
if (subresource->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
/* FINISHME: Is it legal to have an R8 view of S8? */
assert(image->format->has_stencil);
assert(view_format_info->has_stencil);
}
} else {
assert(!"bad VkImageSubresourceRange::aspectFlags");
}
return anv_CreateImageView(_device, pCreateInfo, pAllocator, pView);
}
static struct anv_state
alloc_surface_state(struct anv_device *device,
struct anv_cmd_buffer *cmd_buffer)
{
if (cmd_buffer) {
return anv_cmd_buffer_alloc_surface_state(cmd_buffer);
} else {
return anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
}
}
static bool
has_matching_storage_typed_format(const struct anv_device *device,
enum isl_format format)
{
return (isl_format_get_layout(format)->bs <= 4 ||
(isl_format_get_layout(format)->bs <= 8 &&
(device->info.gen >= 8 || device->info.is_haswell)) ||
device->info.gen >= 9);
}
static enum isl_channel_select
remap_swizzle(VkComponentSwizzle swizzle, VkComponentSwizzle component,
struct anv_format_swizzle format_swizzle)
{
if (swizzle == VK_COMPONENT_SWIZZLE_IDENTITY)
swizzle = component;
switch (swizzle) {
case VK_COMPONENT_SWIZZLE_ZERO:
return ISL_CHANNEL_SELECT_ZERO;
case VK_COMPONENT_SWIZZLE_ONE:
return ISL_CHANNEL_SELECT_ONE;
case VK_COMPONENT_SWIZZLE_R:
return ISL_CHANNEL_SELECT_RED + format_swizzle.r;
case VK_COMPONENT_SWIZZLE_G:
return ISL_CHANNEL_SELECT_RED + format_swizzle.g;
case VK_COMPONENT_SWIZZLE_B:
return ISL_CHANNEL_SELECT_RED + format_swizzle.b;
case VK_COMPONENT_SWIZZLE_A:
return ISL_CHANNEL_SELECT_RED + format_swizzle.a;
default:
unreachable("Invalid swizzle");
}
}
void
anv_image_view_init(struct anv_image_view *iview,
struct anv_device *device,
const VkImageViewCreateInfo* pCreateInfo,
struct anv_cmd_buffer *cmd_buffer,
VkImageUsageFlags usage_mask)
{
ANV_FROM_HANDLE(anv_image, image, pCreateInfo->image);
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
assert(range->layerCount > 0);
assert(range->baseMipLevel < image->levels);
assert(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT));
switch (image->type) {
default:
unreachable("bad VkImageType");
case VK_IMAGE_TYPE_1D:
case VK_IMAGE_TYPE_2D:
assert(range->baseArrayLayer + anv_get_layerCount(image, range) - 1 <= image->array_size);
break;
case VK_IMAGE_TYPE_3D:
assert(range->baseArrayLayer + anv_get_layerCount(image, range) - 1
<= anv_minify(image->extent.depth, range->baseMipLevel));
break;
}
struct anv_surface *surface =
anv_image_get_surface_for_aspect_mask(image, range->aspectMask);
iview->image = image;
iview->bo = image->bo;
iview->offset = image->offset + surface->offset;
iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
iview->vk_format = pCreateInfo->format;
struct anv_format_swizzle swizzle;
enum isl_format format = anv_get_isl_format(pCreateInfo->format,
range->aspectMask,
image->tiling, &swizzle);
iview->base_layer = range->baseArrayLayer;
iview->base_mip = range->baseMipLevel;
struct isl_view isl_view = {
.format = format,
.base_level = range->baseMipLevel,
.levels = anv_get_levelCount(image, range),
.base_array_layer = range->baseArrayLayer,
.array_len = anv_get_layerCount(image, range),
.channel_select = {
remap_swizzle(pCreateInfo->components.r,
VK_COMPONENT_SWIZZLE_R, swizzle),
remap_swizzle(pCreateInfo->components.g,
VK_COMPONENT_SWIZZLE_G, swizzle),
remap_swizzle(pCreateInfo->components.b,
VK_COMPONENT_SWIZZLE_B, swizzle),
remap_swizzle(pCreateInfo->components.a,
VK_COMPONENT_SWIZZLE_A, swizzle),
},
};
iview->extent = (VkExtent3D) {
.width = anv_minify(image->extent.width , range->baseMipLevel),
.height = anv_minify(image->extent.height, range->baseMipLevel),
.depth = anv_minify(image->extent.depth , range->baseMipLevel),
};
isl_surf_usage_flags_t cube_usage;
if (pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE ||
pCreateInfo->viewType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
cube_usage = ISL_SURF_USAGE_CUBE_BIT;
} else {
cube_usage = 0;
}
if (image->usage & usage_mask & VK_IMAGE_USAGE_SAMPLED_BIT) {
iview->sampler_surface_state = alloc_surface_state(device, cmd_buffer);
isl_view.usage = cube_usage | ISL_SURF_USAGE_TEXTURE_BIT;
isl_surf_fill_state(&device->isl_dev,
iview->sampler_surface_state.map,
.surf = &surface->isl,
.view = &isl_view,
.mocs = device->default_mocs);
if (!device->info.has_llc)
anv_state_clflush(iview->sampler_surface_state);
} else {
iview->sampler_surface_state.alloc_size = 0;
}
if (image->usage & usage_mask & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
iview->color_rt_surface_state = alloc_surface_state(device, cmd_buffer);
isl_view.usage = cube_usage | ISL_SURF_USAGE_RENDER_TARGET_BIT;
isl_surf_fill_state(&device->isl_dev,
iview->color_rt_surface_state.map,
.surf = &surface->isl,
.view = &isl_view,
.mocs = device->default_mocs);
if (!device->info.has_llc)
anv_state_clflush(iview->color_rt_surface_state);
} else {
iview->color_rt_surface_state.alloc_size = 0;
}
if (image->usage & usage_mask & VK_IMAGE_USAGE_STORAGE_BIT) {
iview->storage_surface_state = alloc_surface_state(device, cmd_buffer);
if (has_matching_storage_typed_format(device, format)) {
isl_view.usage = cube_usage | ISL_SURF_USAGE_STORAGE_BIT;
isl_surf_fill_state(&device->isl_dev,
iview->storage_surface_state.map,
.surf = &surface->isl,
.view = &isl_view,
.mocs = device->default_mocs);
} else {
anv_fill_buffer_surface_state(device, iview->storage_surface_state,
ISL_FORMAT_RAW,
iview->offset,
iview->bo->size - iview->offset, 1);
}
isl_surf_fill_image_param(&device->isl_dev,
&iview->storage_image_param,
&surface->isl, &isl_view);
if (!device->info.has_llc)
anv_state_clflush(iview->storage_surface_state);
} else {
iview->storage_surface_state.alloc_size = 0;
}
}
VkResult
anv_CreateImageView(VkDevice _device,
const VkImageViewCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkImageView *pView)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_image_view *view;
view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (view == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
anv_image_view_init(view, device, pCreateInfo, NULL, ~0);
*pView = anv_image_view_to_handle(view);
return VK_SUCCESS;
}
void
anv_DestroyImageView(VkDevice _device, VkImageView _iview,
const VkAllocationCallbacks *pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_image_view, iview, _iview);
if (iview->color_rt_surface_state.alloc_size > 0) {
anv_state_pool_free(&device->surface_state_pool,
iview->color_rt_surface_state);
}
if (iview->sampler_surface_state.alloc_size > 0) {
anv_state_pool_free(&device->surface_state_pool,
iview->sampler_surface_state);
}
if (iview->storage_surface_state.alloc_size > 0) {
anv_state_pool_free(&device->surface_state_pool,
iview->storage_surface_state);
}
anv_free2(&device->alloc, pAllocator, iview);
}
void anv_buffer_view_init(struct anv_buffer_view *view,
struct anv_device *device,
const VkBufferViewCreateInfo* pCreateInfo,
struct anv_cmd_buffer *cmd_buffer)
{
ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer);
const struct anv_format *format =
anv_format_for_vk_format(pCreateInfo->format);
view->format = format->isl_format;
view->bo = buffer->bo;
view->offset = buffer->offset + pCreateInfo->offset;
view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
buffer->size - view->offset : pCreateInfo->range;
if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT) {
view->surface_state = alloc_surface_state(device, cmd_buffer);
anv_fill_buffer_surface_state(device, view->surface_state,
view->format,
view->offset, view->range,
format->isl_layout->bs);
} else {
view->surface_state = (struct anv_state){ 0 };
}
if (buffer->usage & VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT) {
view->storage_surface_state = alloc_surface_state(device, cmd_buffer);
enum isl_format storage_format =
has_matching_storage_typed_format(device, view->format) ?
isl_lower_storage_image_format(&device->isl_dev, view->format) :
ISL_FORMAT_RAW;
anv_fill_buffer_surface_state(device, view->storage_surface_state,
storage_format,
view->offset, view->range,
(storage_format == ISL_FORMAT_RAW ? 1 :
format->isl_layout->bs));
isl_buffer_fill_image_param(&device->isl_dev,
&view->storage_image_param,
view->format, view->range);
} else {
view->storage_surface_state = (struct anv_state){ 0 };
}
}
VkResult
anv_CreateBufferView(VkDevice _device,
const VkBufferViewCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkBufferView *pView)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_buffer_view *view;
view = anv_alloc2(&device->alloc, pAllocator, sizeof(*view), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!view)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
anv_buffer_view_init(view, device, pCreateInfo, NULL);
*pView = anv_buffer_view_to_handle(view);
return VK_SUCCESS;
}
void
anv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
const VkAllocationCallbacks *pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_buffer_view, view, bufferView);
if (view->surface_state.alloc_size > 0)
anv_state_pool_free(&device->surface_state_pool,
view->surface_state);
if (view->storage_surface_state.alloc_size > 0)
anv_state_pool_free(&device->surface_state_pool,
view->storage_surface_state);
anv_free2(&device->alloc, pAllocator, view);
}
struct anv_surface *
anv_image_get_surface_for_aspect_mask(struct anv_image *image, VkImageAspectFlags aspect_mask)
{
switch (aspect_mask) {
case VK_IMAGE_ASPECT_COLOR_BIT:
/* Dragons will eat you.
*
* Meta attaches all destination surfaces as color render targets. Guess
* what surface the Meta Dragons really want.
*/
if (image->format->has_depth && image->format->has_stencil) {
return &image->depth_surface;
} else if (image->format->has_depth) {
return &image->depth_surface;
} else if (image->format->has_stencil) {
return &image->stencil_surface;
} else {
return &image->color_surface;
}
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
assert(image->format->has_depth);
return &image->depth_surface;
case VK_IMAGE_ASPECT_STENCIL_BIT:
assert(image->format->has_stencil);
return &image->stencil_surface;
case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
if (image->format->has_depth && image->format->has_stencil) {
/* FINISHME: The Vulkan spec (git a511ba2) requires support for
* combined depth stencil formats. Specifically, it states:
*
* At least one of ename:VK_FORMAT_D24_UNORM_S8_UINT or
* ename:VK_FORMAT_D32_SFLOAT_S8_UINT must be supported.
*
* Image views with both depth and stencil aspects are only valid for
* render target attachments, in which case
* cmd_buffer_emit_depth_stencil() will pick out both the depth and
* stencil surfaces from the underlying surface.
*/
return &image->depth_surface;
} else if (image->format->has_depth) {
return &image->depth_surface;
} else if (image->format->has_stencil) {
return &image->stencil_surface;
}
/* fallthrough */
default:
unreachable("image does not have aspect");
return NULL;
}
}

View file

@ -0,0 +1,100 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
VkResult anv_CreateDmaBufImageINTEL(
VkDevice _device,
const VkDmaBufImageCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkDeviceMemory* pMem,
VkImage* pImage)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_device_memory *mem;
struct anv_image *image;
VkResult result;
VkImage image_h;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL);
mem = anv_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (mem == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd);
if (!mem->bo.gem_handle) {
result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail;
}
mem->bo.map = NULL;
mem->bo.index = 0;
mem->bo.offset = 0;
mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height;
anv_image_create(_device,
&(struct anv_image_create_info) {
.isl_tiling_flags = ISL_TILING_X_BIT,
.stride = pCreateInfo->strideInBytes,
.vk_info =
&(VkImageCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = pCreateInfo->format,
.extent = pCreateInfo->extent,
.mipLevels = 1,
.arrayLayers = 1,
.samples = 1,
/* FIXME: Need a way to use X tiling to allow scanout */
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.flags = 0,
}},
pAllocator, &image_h);
image = anv_image_from_handle(image_h);
image->bo = &mem->bo;
image->offset = 0;
assert(image->extent.width > 0);
assert(image->extent.height > 0);
assert(image->extent.depth == 1);
*pMem = anv_device_memory_to_handle(mem);
*pImage = anv_image_to_handle(image);
return VK_SUCCESS;
fail:
anv_free2(&device->alloc, pAllocator, mem);
return result;
}

176
src/intel/vulkan/anv_meta.c Normal file
View file

@ -0,0 +1,176 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_meta.h"
struct anv_render_pass anv_meta_dummy_renderpass = {0};
void
anv_meta_save(struct anv_meta_saved_state *state,
const struct anv_cmd_buffer *cmd_buffer,
uint32_t dynamic_mask)
{
state->old_pipeline = cmd_buffer->state.pipeline;
state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings,
sizeof(state->old_vertex_bindings));
state->dynamic_mask = dynamic_mask;
anv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic,
dynamic_mask);
}
void
anv_meta_restore(const struct anv_meta_saved_state *state,
struct anv_cmd_buffer *cmd_buffer)
{
cmd_buffer->state.pipeline = state->old_pipeline;
cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
sizeof(state->old_vertex_bindings));
cmd_buffer->state.vb_dirty |= (1 << ANV_META_VERTEX_BINDING_COUNT) - 1;
cmd_buffer->state.dirty |= ANV_CMD_DIRTY_PIPELINE;
cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
anv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic,
state->dynamic_mask);
cmd_buffer->state.dirty |= state->dynamic_mask;
/* Since we've used the pipeline with the VS disabled, set
* need_query_wa. See CmdBeginQuery.
*/
cmd_buffer->state.need_query_wa = true;
}
VkImageViewType
anv_meta_get_view_type(const struct anv_image *image)
{
switch (image->type) {
case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D;
case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D;
case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D;
default:
unreachable("bad VkImageViewType");
}
}
/**
* When creating a destination VkImageView, this function provides the needed
* VkImageViewCreateInfo::subresourceRange::baseArrayLayer.
*/
uint32_t
anv_meta_get_iview_layer(const struct anv_image *dest_image,
const VkImageSubresourceLayers *dest_subresource,
const VkOffset3D *dest_offset)
{
switch (dest_image->type) {
case VK_IMAGE_TYPE_1D:
case VK_IMAGE_TYPE_2D:
return dest_subresource->baseArrayLayer;
case VK_IMAGE_TYPE_3D:
/* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
* but meta does it anyway. When doing so, we translate the
* destination's z offset into an array offset.
*/
return dest_offset->z;
default:
assert(!"bad VkImageType");
return 0;
}
}
static void *
meta_alloc(void* _device, size_t size, size_t alignment,
VkSystemAllocationScope allocationScope)
{
struct anv_device *device = _device;
return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
}
static void *
meta_realloc(void* _device, void *original, size_t size, size_t alignment,
VkSystemAllocationScope allocationScope)
{
struct anv_device *device = _device;
return device->alloc.pfnReallocation(device->alloc.pUserData, original,
size, alignment,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
}
static void
meta_free(void* _device, void *data)
{
struct anv_device *device = _device;
return device->alloc.pfnFree(device->alloc.pUserData, data);
}
VkResult
anv_device_init_meta(struct anv_device *device)
{
VkResult result;
device->meta_state.alloc = (VkAllocationCallbacks) {
.pUserData = device,
.pfnAllocation = meta_alloc,
.pfnReallocation = meta_realloc,
.pfnFree = meta_free,
};
result = anv_device_init_meta_clear_state(device);
if (result != VK_SUCCESS)
goto fail_clear;
result = anv_device_init_meta_resolve_state(device);
if (result != VK_SUCCESS)
goto fail_resolve;
result = anv_device_init_meta_blit_state(device);
if (result != VK_SUCCESS)
goto fail_blit;
result = anv_device_init_meta_blit2d_state(device);
if (result != VK_SUCCESS)
goto fail_blit2d;
return VK_SUCCESS;
fail_blit2d:
anv_device_finish_meta_blit_state(device);
fail_blit:
anv_device_finish_meta_resolve_state(device);
fail_resolve:
anv_device_finish_meta_clear_state(device);
fail_clear:
return result;
}
void
anv_device_finish_meta(struct anv_device *device)
{
anv_device_finish_meta_resolve_state(device);
anv_device_finish_meta_clear_state(device);
anv_device_finish_meta_blit_state(device);
anv_device_finish_meta_blit2d_state(device);
}

113
src/intel/vulkan/anv_meta.h Normal file
View file

@ -0,0 +1,113 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include "anv_private.h"
#ifdef __cplusplus
extern "C" {
#endif
#define ANV_META_VERTEX_BINDING_COUNT 2
struct anv_meta_saved_state {
struct anv_vertex_binding old_vertex_bindings[ANV_META_VERTEX_BINDING_COUNT];
struct anv_descriptor_set *old_descriptor_set0;
struct anv_pipeline *old_pipeline;
/**
* Bitmask of (1 << VK_DYNAMIC_STATE_*). Defines the set of saved dynamic
* state.
*/
uint32_t dynamic_mask;
struct anv_dynamic_state dynamic;
};
VkResult anv_device_init_meta_clear_state(struct anv_device *device);
void anv_device_finish_meta_clear_state(struct anv_device *device);
VkResult anv_device_init_meta_resolve_state(struct anv_device *device);
void anv_device_finish_meta_resolve_state(struct anv_device *device);
VkResult anv_device_init_meta_blit_state(struct anv_device *device);
void anv_device_finish_meta_blit_state(struct anv_device *device);
VkResult anv_device_init_meta_blit2d_state(struct anv_device *device);
void anv_device_finish_meta_blit2d_state(struct anv_device *device);
void
anv_meta_save(struct anv_meta_saved_state *state,
const struct anv_cmd_buffer *cmd_buffer,
uint32_t dynamic_mask);
void
anv_meta_restore(const struct anv_meta_saved_state *state,
struct anv_cmd_buffer *cmd_buffer);
VkImageViewType
anv_meta_get_view_type(const struct anv_image *image);
uint32_t
anv_meta_get_iview_layer(const struct anv_image *dest_image,
const VkImageSubresourceLayers *dest_subresource,
const VkOffset3D *dest_offset);
struct anv_meta_blit2d_surf {
struct anv_bo *bo;
enum isl_tiling tiling;
/** Base offset to the start of the image */
uint64_t base_offset;
/** The size of an element in bytes. */
uint8_t bs;
/** Pitch between rows in bytes. */
uint32_t pitch;
};
struct anv_meta_blit2d_rect {
uint32_t src_x, src_y;
uint32_t dst_x, dst_y;
uint32_t width, height;
};
void
anv_meta_begin_blit2d(struct anv_cmd_buffer *cmd_buffer,
struct anv_meta_saved_state *save);
void
anv_meta_blit2d(struct anv_cmd_buffer *cmd_buffer,
struct anv_meta_blit2d_surf *src,
struct anv_meta_blit2d_surf *dst,
unsigned num_rects,
struct anv_meta_blit2d_rect *rects);
void
anv_meta_end_blit2d(struct anv_cmd_buffer *cmd_buffer,
struct anv_meta_saved_state *save);
#ifdef __cplusplus
}
#endif

View file

@ -0,0 +1,736 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_meta.h"
#include "nir/nir_builder.h"
struct blit_region {
VkOffset3D src_offset;
VkExtent3D src_extent;
VkOffset3D dest_offset;
VkExtent3D dest_extent;
};
static nir_shader *
build_nir_vertex_shader(void)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "a_pos");
pos_in->data.location = VERT_ATTRIB_GENERIC0;
nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "gl_Position");
pos_out->data.location = VARYING_SLOT_POS;
nir_copy_var(&b, pos_out, pos_in);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "a_tex_pos");
tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "v_tex_pos");
tex_pos_out->data.location = VARYING_SLOT_VAR0;
tex_pos_out->data.interpolation = INTERP_QUALIFIER_SMOOTH;
nir_copy_var(&b, tex_pos_out, tex_pos_in);
return b.shader;
}
static nir_shader *
build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_fs");
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "v_tex_pos");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
/* Swizzle the array index which comes in as Z coordinate into the right
* position.
*/
unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
nir_ssa_def *const tex_pos =
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
(tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3), false);
const struct glsl_type *sampler_type =
glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
glsl_get_base_type(vec4));
nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
tex->sampler_dim = tex_dim;
tex->op = nir_texop_tex;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(tex_pos);
tex->dest_type = nir_type_float; /* TODO */
tex->is_array = glsl_sampler_type_is_array(sampler_type);
tex->coord_components = tex_pos->num_components;
tex->texture = nir_deref_var_create(tex, sampler);
tex->sampler = nir_deref_var_create(tex, sampler);
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_store_var(&b, color_out, &tex->dest.ssa, 4);
return b.shader;
}
static void
meta_prepare_blit(struct anv_cmd_buffer *cmd_buffer,
struct anv_meta_saved_state *saved_state)
{
anv_meta_save(saved_state, cmd_buffer, 0);
}
static void
meta_emit_blit(struct anv_cmd_buffer *cmd_buffer,
struct anv_image *src_image,
struct anv_image_view *src_iview,
VkOffset3D src_offset,
VkExtent3D src_extent,
struct anv_image *dest_image,
struct anv_image_view *dest_iview,
VkOffset3D dest_offset,
VkExtent3D dest_extent,
VkFilter blit_filter)
{
struct anv_device *device = cmd_buffer->device;
struct blit_vb_data {
float pos[2];
float tex_coord[3];
} *vb_data;
assert(src_image->samples == dest_image->samples);
unsigned vb_size = sizeof(struct anv_vue_header) + 3 * sizeof(*vb_data);
struct anv_state vb_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, vb_size, 16);
memset(vb_state.map, 0, sizeof(struct anv_vue_header));
vb_data = vb_state.map + sizeof(struct anv_vue_header);
vb_data[0] = (struct blit_vb_data) {
.pos = {
dest_offset.x + dest_extent.width,
dest_offset.y + dest_extent.height,
},
.tex_coord = {
(float)(src_offset.x + src_extent.width)
/ (float)src_iview->extent.width,
(float)(src_offset.y + src_extent.height)
/ (float)src_iview->extent.height,
(float)src_offset.z / (float)src_iview->extent.depth,
},
};
vb_data[1] = (struct blit_vb_data) {
.pos = {
dest_offset.x,
dest_offset.y + dest_extent.height,
},
.tex_coord = {
(float)src_offset.x / (float)src_iview->extent.width,
(float)(src_offset.y + src_extent.height) /
(float)src_iview->extent.height,
(float)src_offset.z / (float)src_iview->extent.depth,
},
};
vb_data[2] = (struct blit_vb_data) {
.pos = {
dest_offset.x,
dest_offset.y,
},
.tex_coord = {
(float)src_offset.x / (float)src_iview->extent.width,
(float)src_offset.y / (float)src_iview->extent.height,
(float)src_offset.z / (float)src_iview->extent.depth,
},
};
if (!device->info.has_llc)
anv_state_clflush(vb_state);
struct anv_buffer vertex_buffer = {
.device = device,
.size = vb_size,
.bo = &device->dynamic_state_block_pool.bo,
.offset = vb_state.offset,
};
anv_CmdBindVertexBuffers(anv_cmd_buffer_to_handle(cmd_buffer), 0, 2,
(VkBuffer[]) {
anv_buffer_to_handle(&vertex_buffer),
anv_buffer_to_handle(&vertex_buffer)
},
(VkDeviceSize[]) {
0,
sizeof(struct anv_vue_header),
});
VkSampler sampler;
ANV_CALL(CreateSampler)(anv_device_to_handle(device),
&(VkSamplerCreateInfo) {
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.magFilter = blit_filter,
.minFilter = blit_filter,
}, &cmd_buffer->pool->alloc, &sampler);
VkDescriptorPool desc_pool;
anv_CreateDescriptorPool(anv_device_to_handle(device),
&(const VkDescriptorPoolCreateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = NULL,
.flags = 0,
.maxSets = 1,
.poolSizeCount = 1,
.pPoolSizes = (VkDescriptorPoolSize[]) {
{
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1
},
}
}, &cmd_buffer->pool->alloc, &desc_pool);
VkDescriptorSet set;
anv_AllocateDescriptorSets(anv_device_to_handle(device),
&(VkDescriptorSetAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = desc_pool,
.descriptorSetCount = 1,
.pSetLayouts = &device->meta_state.blit.ds_layout
}, &set);
anv_UpdateDescriptorSets(anv_device_to_handle(device),
1, /* writeCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = sampler,
.imageView = anv_image_view_to_handle(src_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
}
}, 0, NULL);
VkFramebuffer fb;
anv_CreateFramebuffer(anv_device_to_handle(device),
&(VkFramebufferCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = (VkImageView[]) {
anv_image_view_to_handle(dest_iview),
},
.width = dest_iview->extent.width,
.height = dest_iview->extent.height,
.layers = 1
}, &cmd_buffer->pool->alloc, &fb);
ANV_CALL(CmdBeginRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer),
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = device->meta_state.blit.render_pass,
.framebuffer = fb,
.renderArea = {
.offset = { dest_offset.x, dest_offset.y },
.extent = { dest_extent.width, dest_extent.height },
},
.clearValueCount = 0,
.pClearValues = NULL,
}, VK_SUBPASS_CONTENTS_INLINE);
VkPipeline pipeline;
switch (src_image->type) {
case VK_IMAGE_TYPE_1D:
pipeline = device->meta_state.blit.pipeline_1d_src;
break;
case VK_IMAGE_TYPE_2D:
pipeline = device->meta_state.blit.pipeline_2d_src;
break;
case VK_IMAGE_TYPE_3D:
pipeline = device->meta_state.blit.pipeline_3d_src;
break;
default:
unreachable(!"bad VkImageType");
}
if (cmd_buffer->state.pipeline != anv_pipeline_from_handle(pipeline)) {
anv_CmdBindPipeline(anv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
anv_CmdBindDescriptorSets(anv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_GRAPHICS,
device->meta_state.blit.pipeline_layout, 0, 1,
&set, 0, NULL);
ANV_CALL(CmdDraw)(anv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
ANV_CALL(CmdEndRenderPass)(anv_cmd_buffer_to_handle(cmd_buffer));
/* At the point where we emit the draw call, all data from the
* descriptor sets, etc. has been used. We are free to delete it.
*/
anv_DestroyDescriptorPool(anv_device_to_handle(device),
desc_pool, &cmd_buffer->pool->alloc);
anv_DestroySampler(anv_device_to_handle(device), sampler,
&cmd_buffer->pool->alloc);
anv_DestroyFramebuffer(anv_device_to_handle(device), fb,
&cmd_buffer->pool->alloc);
}
static void
meta_finish_blit(struct anv_cmd_buffer *cmd_buffer,
const struct anv_meta_saved_state *saved_state)
{
anv_meta_restore(saved_state, cmd_buffer);
}
void anv_CmdBlitImage(
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageBlit* pRegions,
VkFilter filter)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_image, src_image, srcImage);
ANV_FROM_HANDLE(anv_image, dest_image, destImage);
struct anv_meta_saved_state saved_state;
/* From the Vulkan 1.0 spec:
*
* vkCmdBlitImage must not be used for multisampled source or
* destination images. Use vkCmdResolveImage for this purpose.
*/
assert(src_image->samples == 1);
assert(dest_image->samples == 1);
meta_prepare_blit(cmd_buffer, &saved_state);
for (unsigned r = 0; r < regionCount; r++) {
struct anv_image_view src_iview;
anv_image_view_init(&src_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = srcImage,
.viewType = anv_meta_get_view_type(src_image),
.format = src_image->vk_format,
.subresourceRange = {
.aspectMask = pRegions[r].srcSubresource.aspectMask,
.baseMipLevel = pRegions[r].srcSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
.layerCount = 1
},
},
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
const VkOffset3D dest_offset = {
.x = pRegions[r].dstOffsets[0].x,
.y = pRegions[r].dstOffsets[0].y,
.z = 0,
};
if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
anv_finishme("FINISHME: Allow flipping in blits");
const VkExtent3D dest_extent = {
.width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
.height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
};
const VkExtent3D src_extent = {
.width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
.height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
};
const uint32_t dest_array_slice =
anv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
&pRegions[r].dstOffsets[0]);
if (pRegions[r].srcSubresource.layerCount > 1)
anv_finishme("FINISHME: copy multiple array layers");
if (pRegions[r].srcOffsets[0].z + 1 != pRegions[r].srcOffsets[1].z ||
pRegions[r].dstOffsets[0].z + 1 != pRegions[r].dstOffsets[1].z)
anv_finishme("FINISHME: copy multiple depth layers");
struct anv_image_view dest_iview;
anv_image_view_init(&dest_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = destImage,
.viewType = anv_meta_get_view_type(dest_image),
.format = dest_image->vk_format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = pRegions[r].dstSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = dest_array_slice,
.layerCount = 1
},
},
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
meta_emit_blit(cmd_buffer,
src_image, &src_iview,
pRegions[r].srcOffsets[0], src_extent,
dest_image, &dest_iview,
dest_offset, dest_extent,
filter);
}
meta_finish_blit(cmd_buffer, &saved_state);
}
void
anv_device_finish_meta_blit_state(struct anv_device *device)
{
anv_DestroyRenderPass(anv_device_to_handle(device),
device->meta_state.blit.render_pass,
&device->meta_state.alloc);
anv_DestroyPipeline(anv_device_to_handle(device),
device->meta_state.blit.pipeline_1d_src,
&device->meta_state.alloc);
anv_DestroyPipeline(anv_device_to_handle(device),
device->meta_state.blit.pipeline_2d_src,
&device->meta_state.alloc);
anv_DestroyPipeline(anv_device_to_handle(device),
device->meta_state.blit.pipeline_3d_src,
&device->meta_state.alloc);
anv_DestroyPipelineLayout(anv_device_to_handle(device),
device->meta_state.blit.pipeline_layout,
&device->meta_state.alloc);
anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
device->meta_state.blit.ds_layout,
&device->meta_state.alloc);
}
VkResult
anv_device_init_meta_blit_state(struct anv_device *device)
{
VkResult result;
result = anv_CreateRenderPass(anv_device_to_handle(device),
&(VkRenderPassCreateInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = &(VkAttachmentDescription) {
.format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
},
.subpassCount = 1,
.pSubpasses = &(VkSubpassDescription) {
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.colorAttachmentCount = 1,
.pColorAttachments = &(VkAttachmentReference) {
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_GENERAL,
},
.pResolveAttachments = NULL,
.pDepthStencilAttachment = &(VkAttachmentReference) {
.attachment = VK_ATTACHMENT_UNUSED,
.layout = VK_IMAGE_LAYOUT_GENERAL,
},
.preserveAttachmentCount = 1,
.pPreserveAttachments = (uint32_t[]) { 0 },
},
.dependencyCount = 0,
}, &device->meta_state.alloc, &device->meta_state.blit.render_pass);
if (result != VK_SUCCESS)
goto fail;
/* We don't use a vertex shader for blitting, but instead build and pass
* the VUEs directly to the rasterization backend. However, we do need
* to provide GLSL source for the vertex shader so that the compiler
* does not dead-code our inputs.
*/
struct anv_shader_module vs = {
.nir = build_nir_vertex_shader(),
};
struct anv_shader_module fs_1d = {
.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D),
};
struct anv_shader_module fs_2d = {
.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D),
};
struct anv_shader_module fs_3d = {
.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D),
};
VkPipelineVertexInputStateCreateInfo vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 2,
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
{
.binding = 0,
.stride = 0,
.inputRate = VK_VERTEX_INPUT_RATE_INSTANCE
},
{
.binding = 1,
.stride = 5 * sizeof(float),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
},
},
.vertexAttributeDescriptionCount = 3,
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
{
/* VUE Header */
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32G32B32A32_UINT,
.offset = 0
},
{
/* Position */
.location = 1,
.binding = 1,
.format = VK_FORMAT_R32G32_SFLOAT,
.offset = 0
},
{
/* Texture Coordinate */
.location = 2,
.binding = 1,
.format = VK_FORMAT_R32G32B32_SFLOAT,
.offset = 8
}
}
};
VkDescriptorSetLayoutCreateInfo ds_layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = 1,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
.pImmutableSamplers = NULL
},
}
};
result = anv_CreateDescriptorSetLayout(anv_device_to_handle(device),
&ds_layout_info,
&device->meta_state.alloc,
&device->meta_state.blit.ds_layout);
if (result != VK_SUCCESS)
goto fail_render_pass;
result = anv_CreatePipelineLayout(anv_device_to_handle(device),
&(VkPipelineLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.blit.ds_layout,
},
&device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
if (result != VK_SUCCESS)
goto fail_descriptor_set_layout;
VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = anv_shader_module_to_handle(&vs),
.pName = "main",
.pSpecializationInfo = NULL
}, {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
.pName = "main",
.pSpecializationInfo = NULL
},
};
const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = ARRAY_SIZE(pipeline_shader_stages),
.pStages = pipeline_shader_stages,
.pVertexInputState = &vi_create_info,
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
.primitiveRestartEnable = false,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.rasterizerDiscardEnable = false,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
},
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.rasterizationSamples = 1,
.sampleShadingEnable = false,
.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
},
.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = (VkPipelineColorBlendAttachmentState []) {
{ .colorWriteMask =
VK_COLOR_COMPONENT_A_BIT |
VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT },
}
},
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = 9,
.pDynamicStates = (VkDynamicState[]) {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
},
},
.flags = 0,
.layout = device->meta_state.blit.pipeline_layout,
.renderPass = device->meta_state.blit.render_pass,
.subpass = 0,
};
const struct anv_graphics_pipeline_create_info anv_pipeline_info = {
.color_attachment_count = -1,
.use_repclear = false,
.disable_vs = true,
.use_rectlist = true
};
pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_1d);
result = anv_graphics_pipeline_create(anv_device_to_handle(device),
VK_NULL_HANDLE,
&vk_pipeline_info, &anv_pipeline_info,
&device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src);
if (result != VK_SUCCESS)
goto fail_pipeline_layout;
pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_2d);
result = anv_graphics_pipeline_create(anv_device_to_handle(device),
VK_NULL_HANDLE,
&vk_pipeline_info, &anv_pipeline_info,
&device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src);
if (result != VK_SUCCESS)
goto fail_pipeline_1d;
pipeline_shader_stages[1].module = anv_shader_module_to_handle(&fs_3d);
result = anv_graphics_pipeline_create(anv_device_to_handle(device),
VK_NULL_HANDLE,
&vk_pipeline_info, &anv_pipeline_info,
&device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src);
if (result != VK_SUCCESS)
goto fail_pipeline_2d;
ralloc_free(vs.nir);
ralloc_free(fs_1d.nir);
ralloc_free(fs_2d.nir);
ralloc_free(fs_3d.nir);
return VK_SUCCESS;
fail_pipeline_2d:
anv_DestroyPipeline(anv_device_to_handle(device),
device->meta_state.blit.pipeline_2d_src,
&device->meta_state.alloc);
fail_pipeline_1d:
anv_DestroyPipeline(anv_device_to_handle(device),
device->meta_state.blit.pipeline_1d_src,
&device->meta_state.alloc);
fail_pipeline_layout:
anv_DestroyPipelineLayout(anv_device_to_handle(device),
device->meta_state.blit.pipeline_layout,
&device->meta_state.alloc);
fail_descriptor_set_layout:
anv_DestroyDescriptorSetLayout(anv_device_to_handle(device),
device->meta_state.blit.ds_layout,
&device->meta_state.alloc);
fail_render_pass:
anv_DestroyRenderPass(anv_device_to_handle(device),
device->meta_state.blit.render_pass,
&device->meta_state.alloc);
ralloc_free(vs.nir);
ralloc_free(fs_1d.nir);
ralloc_free(fs_2d.nir);
ralloc_free(fs_3d.nir);
fail:
return result;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,462 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_meta.h"
/* Returns the user-provided VkBufferImageCopy::imageExtent in units of
* elements rather than texels. One element equals one texel or one block
* if Image is uncompressed or compressed, respectively.
*/
static struct VkExtent3D
meta_region_extent_el(const struct anv_image *image,
const struct VkExtent3D *extent)
{
const struct isl_format_layout *isl_layout =
anv_format_for_vk_format(image->vk_format)->isl_layout;
return anv_sanitize_image_extent(image->type, (VkExtent3D) {
.width = DIV_ROUND_UP(extent->width , isl_layout->bw),
.height = DIV_ROUND_UP(extent->height, isl_layout->bh),
.depth = DIV_ROUND_UP(extent->depth , isl_layout->bd),
});
}
/* Returns the user-provided VkBufferImageCopy::imageOffset in units of
* elements rather than texels. One element equals one texel or one block
* if Image is uncompressed or compressed, respectively.
*/
static struct VkOffset3D
meta_region_offset_el(const struct anv_image *image,
const struct VkOffset3D *offset)
{
const struct isl_format_layout *isl_layout = image->format->isl_layout;
return anv_sanitize_image_offset(image->type, (VkOffset3D) {
.x = offset->x / isl_layout->bw,
.y = offset->y / isl_layout->bh,
.z = offset->z / isl_layout->bd,
});
}
static struct anv_meta_blit2d_surf
blit_surf_for_image(const struct anv_image* image,
const struct isl_surf *img_isl_surf)
{
return (struct anv_meta_blit2d_surf) {
.bo = image->bo,
.tiling = img_isl_surf->tiling,
.base_offset = image->offset,
.bs = isl_format_get_layout(img_isl_surf->format)->bs,
.pitch = isl_surf_get_row_pitch(img_isl_surf),
};
}
static void
do_buffer_copy(struct anv_cmd_buffer *cmd_buffer,
struct anv_bo *src, uint64_t src_offset,
struct anv_bo *dest, uint64_t dest_offset,
int width, int height, int bs)
{
struct anv_meta_blit2d_surf b_src = {
.bo = src,
.tiling = ISL_TILING_LINEAR,
.base_offset = src_offset,
.bs = bs,
.pitch = width * bs,
};
struct anv_meta_blit2d_surf b_dst = {
.bo = dest,
.tiling = ISL_TILING_LINEAR,
.base_offset = dest_offset,
.bs = bs,
.pitch = width * bs,
};
struct anv_meta_blit2d_rect rect = {
.width = width,
.height = height,
};
anv_meta_blit2d(cmd_buffer, &b_src, &b_dst, 1, &rect);
}
static void
meta_copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
struct anv_buffer* buffer,
struct anv_image* image,
uint32_t regionCount,
const VkBufferImageCopy* pRegions,
bool forward)
{
struct anv_meta_saved_state saved_state;
/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
* VK_SAMPLE_COUNT_1_BIT."
*/
assert(image->samples == 1);
anv_meta_begin_blit2d(cmd_buffer, &saved_state);
for (unsigned r = 0; r < regionCount; r++) {
/**
* From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
* extent is the size in texels of the source image to copy in width,
* height and depth. 1D images use only x and width. 2D images use x, y,
* width and height. 3D images use x, y, z, width, height and depth.
*
*
* Also, convert the offsets and extent from units of texels to units of
* blocks - which is the highest resolution accessible in this command.
*/
const VkOffset3D img_offset_el =
meta_region_offset_el(image, &pRegions[r].imageOffset);
const VkExtent3D bufferExtent = {
.width = pRegions[r].bufferRowLength,
.height = pRegions[r].bufferImageHeight,
};
/* Start creating blit rect */
const VkExtent3D buf_extent_el =
meta_region_extent_el(image, &bufferExtent);
const VkExtent3D img_extent_el =
meta_region_extent_el(image, &pRegions[r].imageExtent);
struct anv_meta_blit2d_rect rect = {
.width = MAX2(buf_extent_el.width, img_extent_el.width),
.height = MAX2(buf_extent_el.height, img_extent_el.height),
};
/* Create blit surfaces */
VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask;
const struct isl_surf *img_isl_surf =
&anv_image_get_surface_for_aspect_mask(image, aspect)->isl;
struct anv_meta_blit2d_surf img_bsurf =
blit_surf_for_image(image, img_isl_surf);
struct anv_meta_blit2d_surf buf_bsurf = {
.bo = buffer->bo,
.tiling = ISL_TILING_LINEAR,
.base_offset = buffer->offset + pRegions[r].bufferOffset,
.bs = forward ? image->format->isl_layout->bs : img_bsurf.bs,
.pitch = rect.width * buf_bsurf.bs,
};
/* Set direction-dependent variables */
struct anv_meta_blit2d_surf *dst_bsurf = forward ? &img_bsurf : &buf_bsurf;
struct anv_meta_blit2d_surf *src_bsurf = forward ? &buf_bsurf : &img_bsurf;
uint32_t *x_offset = forward ? &rect.dst_x : &rect.src_x;
uint32_t *y_offset = forward ? &rect.dst_y : &rect.src_y;
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = pRegions[r].imageSubresource.layerCount;
unsigned slice_3d = 0;
unsigned slice_array = 0;
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
/* Finish creating blit rect */
isl_surf_get_image_offset_el(img_isl_surf,
pRegions[r].imageSubresource.mipLevel,
pRegions[r].imageSubresource.baseArrayLayer
+ slice_array,
img_offset_el.z + slice_3d,
x_offset,
y_offset);
*x_offset += img_offset_el.x;
*y_offset += img_offset_el.y;
/* Perform Blit */
anv_meta_blit2d(cmd_buffer, src_bsurf, dst_bsurf, 1, &rect);
/* Once we've done the blit, all of the actual information about
* the image is embedded in the command buffer so we can just
* increment the offset directly in the image effectively
* re-binding it to different backing memory.
*/
buf_bsurf.base_offset += rect.width * rect.height * buf_bsurf.bs;
if (image->type == VK_IMAGE_TYPE_3D)
slice_3d++;
else
slice_array++;
}
}
anv_meta_end_blit2d(cmd_buffer, &saved_state);
}
void anv_CmdCopyBufferToImage(
VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_image, dest_image, destImage);
ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
regionCount, pRegions, true);
}
void anv_CmdCopyImageToBuffer(
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_image, src_image, srcImage);
ANV_FROM_HANDLE(anv_buffer, dst_buffer, destBuffer);
meta_copy_buffer_to_image(cmd_buffer, dst_buffer, src_image,
regionCount, pRegions, false);
}
void anv_CmdCopyImage(
VkCommandBuffer commandBuffer,
VkImage srcImage,
VkImageLayout srcImageLayout,
VkImage destImage,
VkImageLayout destImageLayout,
uint32_t regionCount,
const VkImageCopy* pRegions)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_image, src_image, srcImage);
ANV_FROM_HANDLE(anv_image, dest_image, destImage);
struct anv_meta_saved_state saved_state;
/* From the Vulkan 1.0 spec:
*
* vkCmdCopyImage can be used to copy image data between multisample
* images, but both images must have the same number of samples.
*/
assert(src_image->samples == dest_image->samples);
anv_meta_begin_blit2d(cmd_buffer, &saved_state);
for (unsigned r = 0; r < regionCount; r++) {
assert(pRegions[r].srcSubresource.aspectMask ==
pRegions[r].dstSubresource.aspectMask);
VkImageAspectFlags aspect = pRegions[r].srcSubresource.aspectMask;
/* Create blit surfaces */
struct isl_surf *src_isl_surf =
&anv_image_get_surface_for_aspect_mask(src_image, aspect)->isl;
struct isl_surf *dst_isl_surf =
&anv_image_get_surface_for_aspect_mask(dest_image, aspect)->isl;
struct anv_meta_blit2d_surf b_src =
blit_surf_for_image(src_image, src_isl_surf);
struct anv_meta_blit2d_surf b_dst =
blit_surf_for_image(dest_image, dst_isl_surf);
/**
* From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images
* imageExtent is the size in texels of the image to copy in width, height
* and depth. 1D images use only x and width. 2D images use x, y, width
* and height. 3D images use x, y, z, width, height and depth.
*
* Also, convert the offsets and extent from units of texels to units of
* blocks - which is the highest resolution accessible in this command.
*/
const VkOffset3D dst_offset_el =
meta_region_offset_el(dest_image, &pRegions[r].dstOffset);
const VkOffset3D src_offset_el =
meta_region_offset_el(src_image, &pRegions[r].srcOffset);
const VkExtent3D img_extent_el =
meta_region_extent_el(src_image, &pRegions[r].extent);
/* Start creating blit rect */
struct anv_meta_blit2d_rect rect = {
.width = img_extent_el.width,
.height = img_extent_el.height,
};
/* Loop through each 3D or array slice */
unsigned num_slices_3d = img_extent_el.depth;
unsigned num_slices_array = pRegions[r].dstSubresource.layerCount;
unsigned slice_3d = 0;
unsigned slice_array = 0;
while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
/* Finish creating blit rect */
isl_surf_get_image_offset_el(dst_isl_surf,
pRegions[r].dstSubresource.mipLevel,
pRegions[r].dstSubresource.baseArrayLayer
+ slice_array,
dst_offset_el.z + slice_3d,
&rect.dst_x,
&rect.dst_y);
isl_surf_get_image_offset_el(src_isl_surf,
pRegions[r].srcSubresource.mipLevel,
pRegions[r].srcSubresource.baseArrayLayer
+ slice_array,
src_offset_el.z + slice_3d,
&rect.src_x,
&rect.src_y);
rect.dst_x += dst_offset_el.x;
rect.dst_y += dst_offset_el.y;
rect.src_x += src_offset_el.x;
rect.src_y += src_offset_el.y;
/* Perform Blit */
anv_meta_blit2d(cmd_buffer, &b_src, &b_dst, 1, &rect);
if (dest_image->type == VK_IMAGE_TYPE_3D)
slice_3d++;
else
slice_array++;
}
}
anv_meta_end_blit2d(cmd_buffer, &saved_state);
}
void anv_CmdCopyBuffer(
VkCommandBuffer commandBuffer,
VkBuffer srcBuffer,
VkBuffer destBuffer,
uint32_t regionCount,
const VkBufferCopy* pRegions)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, src_buffer, srcBuffer);
ANV_FROM_HANDLE(anv_buffer, dest_buffer, destBuffer);
struct anv_meta_saved_state saved_state;
anv_meta_begin_blit2d(cmd_buffer, &saved_state);
for (unsigned r = 0; r < regionCount; r++) {
uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset;
uint64_t dest_offset = dest_buffer->offset + pRegions[r].dstOffset;
uint64_t copy_size = pRegions[r].size;
/* First, we compute the biggest format that can be used with the
* given offsets and size.
*/
int bs = 16;
int fs = ffs(src_offset) - 1;
if (fs != -1)
bs = MIN2(bs, 1 << fs);
assert(src_offset % bs == 0);
fs = ffs(dest_offset) - 1;
if (fs != -1)
bs = MIN2(bs, 1 << fs);
assert(dest_offset % bs == 0);
fs = ffs(pRegions[r].size) - 1;
if (fs != -1)
bs = MIN2(bs, 1 << fs);
assert(pRegions[r].size % bs == 0);
/* This is maximum possible width/height our HW can handle */
uint64_t max_surface_dim = 1 << 14;
/* First, we make a bunch of max-sized copies */
uint64_t max_copy_size = max_surface_dim * max_surface_dim * bs;
while (copy_size >= max_copy_size) {
do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
dest_buffer->bo, dest_offset,
max_surface_dim, max_surface_dim, bs);
copy_size -= max_copy_size;
src_offset += max_copy_size;
dest_offset += max_copy_size;
}
uint64_t height = copy_size / (max_surface_dim * bs);
assert(height < max_surface_dim);
if (height != 0) {
uint64_t rect_copy_size = height * max_surface_dim * bs;
do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
dest_buffer->bo, dest_offset,
max_surface_dim, height, bs);
copy_size -= rect_copy_size;
src_offset += rect_copy_size;
dest_offset += rect_copy_size;
}
if (copy_size != 0) {
do_buffer_copy(cmd_buffer, src_buffer->bo, src_offset,
dest_buffer->bo, dest_offset,
copy_size / bs, 1, bs);
}
}
anv_meta_end_blit2d(cmd_buffer, &saved_state);
}
void anv_CmdUpdateBuffer(
VkCommandBuffer commandBuffer,
VkBuffer dstBuffer,
VkDeviceSize dstOffset,
VkDeviceSize dataSize,
const uint32_t* pData)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
struct anv_meta_saved_state saved_state;
anv_meta_begin_blit2d(cmd_buffer, &saved_state);
/* We can't quite grab a full block because the state stream needs a
* little data at the top to build its linked list.
*/
const uint32_t max_update_size =
cmd_buffer->device->dynamic_state_block_pool.block_size - 64;
assert(max_update_size < (1 << 14) * 4);
while (dataSize) {
const uint32_t copy_size = MIN2(dataSize, max_update_size);
struct anv_state tmp_data =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
memcpy(tmp_data.map, pData, copy_size);
int bs;
if ((copy_size & 15) == 0 && (dstOffset & 15) == 0) {
bs = 16;
} else if ((copy_size & 7) == 0 && (dstOffset & 7) == 0) {
bs = 8;
} else {
assert((copy_size & 3) == 0 && (dstOffset & 3) == 0);
bs = 4;
}
do_buffer_copy(cmd_buffer,
&cmd_buffer->device->dynamic_state_block_pool.bo,
tmp_data.offset,
dst_buffer->bo, dst_buffer->offset + dstOffset,
copy_size / bs, 1, bs);
dataSize -= copy_size;
dstOffset += copy_size;
pData = (void *)pData + copy_size;
}
anv_meta_end_blit2d(cmd_buffer, &saved_state);
}

View file

@ -0,0 +1,870 @@
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include "anv_meta.h"
#include "anv_private.h"
#include "nir/nir_builder.h"
/**
* Vertex attributes used by all pipelines.
*/
struct vertex_attrs {
struct anv_vue_header vue_header;
float position[2]; /**< 3DPRIM_RECTLIST */
float tex_position[2];
};
static void
meta_resolve_save(struct anv_meta_saved_state *saved_state,
struct anv_cmd_buffer *cmd_buffer)
{
anv_meta_save(saved_state, cmd_buffer, 0);
}
static void
meta_resolve_restore(struct anv_meta_saved_state *saved_state,
struct anv_cmd_buffer *cmd_buffer)
{
anv_meta_restore(saved_state, cmd_buffer);
}
static VkPipeline *
get_pipeline_h(struct anv_device *device, uint32_t samples)
{
uint32_t i = ffs(samples) - 2; /* log2(samples) - 1 */
assert(samples >= 2);
assert(i < ARRAY_SIZE(device->meta_state.resolve.pipelines));
return &device->meta_state.resolve.pipelines[i];
}
static nir_shader *
build_nir_vs(void)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_builder b;
nir_variable *a_position;
nir_variable *v_position;
nir_variable *a_tex_position;
nir_variable *v_tex_position;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
"a_position");
a_position->data.location = VERT_ATTRIB_GENERIC0;
v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
"gl_Position");
v_position->data.location = VARYING_SLOT_POS;
a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
"a_tex_position");
a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
"v_tex_position");
v_tex_position->data.location = VARYING_SLOT_VAR0;
nir_copy_var(&b, v_position, a_position);
nir_copy_var(&b, v_tex_position, a_tex_position);
return b.shader;
}
static nir_shader *
build_nir_fs(uint32_t num_samples)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *sampler2DMS =
glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
/*is_shadow*/ false,
/*is_array*/ false,
GLSL_TYPE_FLOAT);
nir_builder b;
nir_variable *u_tex; /* uniform sampler */
nir_variable *v_position; /* vec4, varying fragment position */
nir_variable *v_tex_position; /* vec4, varying texture coordinate */
nir_variable *f_color; /* vec4, fragment output color */
nir_ssa_def *accum; /* vec4, accumulation of sample values */
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_asprintf(b.shader,
"meta_resolve_fs_samples%02d",
num_samples);
u_tex = nir_variable_create(b.shader, nir_var_uniform, sampler2DMS,
"u_tex");
u_tex->data.descriptor_set = 0;
u_tex->data.binding = 0;
v_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
"v_position");
v_position->data.location = VARYING_SLOT_POS;
v_position->data.origin_upper_left = true;
v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
"v_tex_position");
v_tex_position->data.location = VARYING_SLOT_VAR0;
f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
"f_color");
f_color->data.location = FRAG_RESULT_DATA0;
accum = nir_imm_vec4(&b, 0, 0, 0, 0);
nir_ssa_def *tex_position_ivec =
nir_f2i(&b, nir_load_var(&b, v_tex_position));
for (uint32_t i = 0; i < num_samples; ++i) {
nir_tex_instr *tex;
tex = nir_tex_instr_create(b.shader, /*num_srcs*/ 2);
tex->texture = nir_deref_var_create(tex, u_tex);
tex->sampler = nir_deref_var_create(tex, u_tex);
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex->op = nir_texop_txf_ms;
tex->src[0].src = nir_src_for_ssa(tex_position_ivec);
tex->src[0].src_type = nir_tex_src_coord;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
tex->src[1].src_type = nir_tex_src_ms_index;
tex->dest_type = nir_type_float;
tex->is_array = false;
tex->coord_components = 3;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
accum = nir_fadd(&b, accum, &tex->dest.ssa);
}
accum = nir_fdiv(&b, accum, nir_imm_float(&b, num_samples));
nir_store_var(&b, f_color, accum, /*writemask*/ 4);
return b.shader;
}
static VkResult
create_pass(struct anv_device *device)
{
VkResult result;
VkDevice device_h = anv_device_to_handle(device);
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
result = anv_CreateRenderPass(device_h,
&(VkRenderPassCreateInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = &(VkAttachmentDescription) {
.format = VK_FORMAT_UNDEFINED, /* Our shaders don't care */
.samples = 1,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
},
.subpassCount = 1,
.pSubpasses = &(VkSubpassDescription) {
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.colorAttachmentCount = 1,
.pColorAttachments = &(VkAttachmentReference) {
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_GENERAL,
},
.pResolveAttachments = NULL,
.pDepthStencilAttachment = &(VkAttachmentReference) {
.attachment = VK_ATTACHMENT_UNUSED,
},
.preserveAttachmentCount = 0,
.pPreserveAttachments = NULL,
},
.dependencyCount = 0,
},
alloc,
&device->meta_state.resolve.pass);
return result;
}
static VkResult
create_pipeline(struct anv_device *device,
uint32_t num_samples,
VkShaderModule vs_module_h)
{
VkResult result;
VkDevice device_h = anv_device_to_handle(device);
struct anv_shader_module fs_module = {
.nir = build_nir_fs(num_samples),
};
if (!fs_module.nir) {
/* XXX: Need more accurate error */
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto cleanup;
}
result = anv_graphics_pipeline_create(device_h,
VK_NULL_HANDLE,
&(VkGraphicsPipelineCreateInfo) {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = 2,
.pStages = (VkPipelineShaderStageCreateInfo[]) {
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = vs_module_h,
.pName = "main",
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = anv_shader_module_to_handle(&fs_module),
.pName = "main",
},
},
.pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
{
.binding = 0,
.stride = sizeof(struct vertex_attrs),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
},
},
.vertexAttributeDescriptionCount = 3,
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
{
/* VUE Header */
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32G32B32A32_UINT,
.offset = offsetof(struct vertex_attrs, vue_header),
},
{
/* Position */
.location = 1,
.binding = 0,
.format = VK_FORMAT_R32G32_SFLOAT,
.offset = offsetof(struct vertex_attrs, position),
},
{
/* Texture Coordinate */
.location = 2,
.binding = 0,
.format = VK_FORMAT_R32G32_SFLOAT,
.offset = offsetof(struct vertex_attrs, tex_position),
},
},
},
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
.primitiveRestartEnable = false,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
},
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.rasterizationSamples = 1,
.sampleShadingEnable = false,
.pSampleMask = (VkSampleMask[]) { 0x1 },
.alphaToCoverageEnable = false,
.alphaToOneEnable = false,
},
.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.logicOpEnable = false,
.attachmentCount = 1,
.pAttachments = (VkPipelineColorBlendAttachmentState []) {
{
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT |
VK_COLOR_COMPONENT_A_BIT,
},
},
},
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = 2,
.pDynamicStates = (VkDynamicState[]) {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
},
},
.layout = device->meta_state.resolve.pipeline_layout,
.renderPass = device->meta_state.resolve.pass,
.subpass = 0,
},
&(struct anv_graphics_pipeline_create_info) {
.color_attachment_count = -1,
.use_repclear = false,
.disable_vs = true,
.use_rectlist = true
},
&device->meta_state.alloc,
get_pipeline_h(device, num_samples));
if (result != VK_SUCCESS)
goto cleanup;
goto cleanup;
cleanup:
ralloc_free(fs_module.nir);
return result;
}
void
anv_device_finish_meta_resolve_state(struct anv_device *device)
{
struct anv_meta_state *state = &device->meta_state;
VkDevice device_h = anv_device_to_handle(device);
VkRenderPass pass_h = device->meta_state.resolve.pass;
VkPipelineLayout pipeline_layout_h = device->meta_state.resolve.pipeline_layout;
VkDescriptorSetLayout ds_layout_h = device->meta_state.resolve.ds_layout;
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
if (pass_h)
ANV_CALL(DestroyRenderPass)(device_h, pass_h,
&device->meta_state.alloc);
if (pipeline_layout_h)
ANV_CALL(DestroyPipelineLayout)(device_h, pipeline_layout_h, alloc);
if (ds_layout_h)
ANV_CALL(DestroyDescriptorSetLayout)(device_h, ds_layout_h, alloc);
for (uint32_t i = 0; i < ARRAY_SIZE(state->resolve.pipelines); ++i) {
VkPipeline pipeline_h = state->resolve.pipelines[i];
if (pipeline_h) {
ANV_CALL(DestroyPipeline)(device_h, pipeline_h, alloc);
}
}
}
VkResult
anv_device_init_meta_resolve_state(struct anv_device *device)
{
VkResult res = VK_SUCCESS;
VkDevice device_h = anv_device_to_handle(device);
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
const isl_sample_count_mask_t sample_count_mask =
isl_device_get_sample_counts(&device->isl_dev);
zero(device->meta_state.resolve);
struct anv_shader_module vs_module = { .nir = build_nir_vs() };
if (!vs_module.nir) {
/* XXX: Need more accurate error */
res = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
}
VkShaderModule vs_module_h = anv_shader_module_to_handle(&vs_module);
res = anv_CreateDescriptorSetLayout(device_h,
&(VkDescriptorSetLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = 1,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
},
},
},
alloc,
&device->meta_state.resolve.ds_layout);
if (res != VK_SUCCESS)
goto fail;
res = anv_CreatePipelineLayout(device_h,
&(VkPipelineLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = (VkDescriptorSetLayout[]) {
device->meta_state.resolve.ds_layout,
},
},
alloc,
&device->meta_state.resolve.pipeline_layout);
if (res != VK_SUCCESS)
goto fail;
res = create_pass(device);
if (res != VK_SUCCESS)
goto fail;
for (uint32_t i = 0;
i < ARRAY_SIZE(device->meta_state.resolve.pipelines); ++i) {
uint32_t sample_count = 1 << (1 + i);
if (!(sample_count_mask & sample_count))
continue;
res = create_pipeline(device, sample_count, vs_module_h);
if (res != VK_SUCCESS)
goto fail;
}
goto cleanup;
fail:
anv_device_finish_meta_resolve_state(device);
cleanup:
ralloc_free(vs_module.nir);
return res;
}
static void
emit_resolve(struct anv_cmd_buffer *cmd_buffer,
struct anv_image_view *src_iview,
const VkOffset2D *src_offset,
struct anv_image_view *dest_iview,
const VkOffset2D *dest_offset,
const VkExtent2D *resolve_extent)
{
struct anv_device *device = cmd_buffer->device;
VkDevice device_h = anv_device_to_handle(device);
VkCommandBuffer cmd_buffer_h = anv_cmd_buffer_to_handle(cmd_buffer);
const struct anv_image *src_image = src_iview->image;
const struct vertex_attrs vertex_data[3] = {
{
.vue_header = {0},
.position = {
dest_offset->x + resolve_extent->width,
dest_offset->y + resolve_extent->height,
},
.tex_position = {
src_offset->x + resolve_extent->width,
src_offset->y + resolve_extent->height,
},
},
{
.vue_header = {0},
.position = {
dest_offset->x,
dest_offset->y + resolve_extent->height,
},
.tex_position = {
src_offset->x,
src_offset->y + resolve_extent->height,
},
},
{
.vue_header = {0},
.position = {
dest_offset->x,
dest_offset->y,
},
.tex_position = {
src_offset->x,
src_offset->y,
},
},
};
struct anv_state vertex_mem =
anv_cmd_buffer_emit_dynamic(cmd_buffer, vertex_data,
sizeof(vertex_data), 16);
struct anv_buffer vertex_buffer = {
.device = device,
.size = sizeof(vertex_data),
.bo = &cmd_buffer->dynamic_state_stream.block_pool->bo,
.offset = vertex_mem.offset,
};
VkBuffer vertex_buffer_h = anv_buffer_to_handle(&vertex_buffer);
anv_CmdBindVertexBuffers(cmd_buffer_h,
/*firstBinding*/ 0,
/*bindingCount*/ 1,
(VkBuffer[]) { vertex_buffer_h },
(VkDeviceSize[]) { 0 });
VkSampler sampler_h;
ANV_CALL(CreateSampler)(device_h,
&(VkSamplerCreateInfo) {
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.magFilter = VK_FILTER_NEAREST,
.minFilter = VK_FILTER_NEAREST,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.mipLodBias = 0.0,
.anisotropyEnable = false,
.compareEnable = false,
.minLod = 0.0,
.maxLod = 0.0,
.unnormalizedCoordinates = false,
},
&cmd_buffer->pool->alloc,
&sampler_h);
VkDescriptorPool desc_pool;
anv_CreateDescriptorPool(anv_device_to_handle(device),
&(const VkDescriptorPoolCreateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = NULL,
.flags = 0,
.maxSets = 1,
.poolSizeCount = 1,
.pPoolSizes = (VkDescriptorPoolSize[]) {
{
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1
},
}
}, &cmd_buffer->pool->alloc, &desc_pool);
VkDescriptorSet desc_set_h;
anv_AllocateDescriptorSets(device_h,
&(VkDescriptorSetAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = desc_pool,
.descriptorSetCount = 1,
.pSetLayouts = (VkDescriptorSetLayout[]) {
device->meta_state.resolve.ds_layout,
},
},
&desc_set_h);
anv_UpdateDescriptorSets(device_h,
/*writeCount*/ 1,
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = desc_set_h,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = sampler_h,
.imageView = anv_image_view_to_handle(src_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
},
},
},
/*copyCount*/ 0,
/*copies */ NULL);
VkPipeline pipeline_h = *get_pipeline_h(device, src_image->samples);
ANV_FROM_HANDLE(anv_pipeline, pipeline, pipeline_h);
if (cmd_buffer->state.pipeline != pipeline) {
anv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline_h);
}
anv_CmdBindDescriptorSets(cmd_buffer_h,
VK_PIPELINE_BIND_POINT_GRAPHICS,
device->meta_state.resolve.pipeline_layout,
/*firstSet*/ 0,
/* setCount */ 1,
(VkDescriptorSet[]) {
desc_set_h,
},
/*copyCount*/ 0,
/*copies */ NULL);
ANV_CALL(CmdDraw)(cmd_buffer_h, 3, 1, 0, 0);
/* All objects below are consumed by the draw call. We may safely destroy
* them.
*/
anv_DestroyDescriptorPool(anv_device_to_handle(device),
desc_pool, &cmd_buffer->pool->alloc);
anv_DestroySampler(device_h, sampler_h,
&cmd_buffer->pool->alloc);
}
void anv_CmdResolveImage(
VkCommandBuffer cmd_buffer_h,
VkImage src_image_h,
VkImageLayout src_image_layout,
VkImage dest_image_h,
VkImageLayout dest_image_layout,
uint32_t region_count,
const VkImageResolve* regions)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmd_buffer_h);
ANV_FROM_HANDLE(anv_image, src_image, src_image_h);
ANV_FROM_HANDLE(anv_image, dest_image, dest_image_h);
struct anv_device *device = cmd_buffer->device;
struct anv_meta_saved_state state;
VkDevice device_h = anv_device_to_handle(device);
meta_resolve_save(&state, cmd_buffer);
assert(src_image->samples > 1);
assert(dest_image->samples == 1);
if (src_image->samples >= 16) {
/* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
* glBlitFramebuffer workaround for samples >= 16.
*/
anv_finishme("vkCmdResolveImage: need interpolation workaround when "
"samples >= 16");
}
if (src_image->array_size > 1)
anv_finishme("vkCmdResolveImage: multisample array images");
for (uint32_t r = 0; r < region_count; ++r) {
const VkImageResolve *region = &regions[r];
/* From the Vulkan 1.0 spec:
*
* - The aspectMask member of srcSubresource and dstSubresource must
* only contain VK_IMAGE_ASPECT_COLOR_BIT
*
* - The layerCount member of srcSubresource and dstSubresource must
* match
*/
assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(region->srcSubresource.layerCount ==
region->dstSubresource.layerCount);
const uint32_t src_base_layer =
anv_meta_get_iview_layer(src_image, &region->srcSubresource,
&region->srcOffset);
const uint32_t dest_base_layer =
anv_meta_get_iview_layer(dest_image, &region->dstSubresource,
&region->dstOffset);
/**
* From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
*
* extent is the size in texels of the source image to resolve in width,
* height and depth. 1D images use only x and width. 2D images use x, y,
* width and height. 3D images use x, y, z, width, height and depth.
*
* srcOffset and dstOffset select the initial x, y, and z offsets in
* texels of the sub-regions of the source and destination image data.
* extent is the size in texels of the source image to resolve in width,
* height and depth. 1D images use only x and width. 2D images use x, y,
* width and height. 3D images use x, y, z, width, height and depth.
*/
const struct VkExtent3D extent =
anv_sanitize_image_extent(src_image->type, region->extent);
const struct VkOffset3D srcOffset =
anv_sanitize_image_offset(src_image->type, region->srcOffset);
const struct VkOffset3D dstOffset =
anv_sanitize_image_offset(dest_image->type, region->dstOffset);
for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
++layer) {
struct anv_image_view src_iview;
anv_image_view_init(&src_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = src_image_h,
.viewType = anv_meta_get_view_type(src_image),
.format = src_image->format->vk_format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = region->srcSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = src_base_layer + layer,
.layerCount = 1,
},
},
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
struct anv_image_view dest_iview;
anv_image_view_init(&dest_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = dest_image_h,
.viewType = anv_meta_get_view_type(dest_image),
.format = dest_image->format->vk_format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = region->dstSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = dest_base_layer + layer,
.layerCount = 1,
},
},
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
VkFramebuffer fb_h;
anv_CreateFramebuffer(device_h,
&(VkFramebufferCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = (VkImageView[]) {
anv_image_view_to_handle(&dest_iview),
},
.width = anv_minify(dest_image->extent.width,
region->dstSubresource.mipLevel),
.height = anv_minify(dest_image->extent.height,
region->dstSubresource.mipLevel),
.layers = 1
},
&cmd_buffer->pool->alloc,
&fb_h);
ANV_CALL(CmdBeginRenderPass)(cmd_buffer_h,
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = device->meta_state.resolve.pass,
.framebuffer = fb_h,
.renderArea = {
.offset = {
dstOffset.x,
dstOffset.y,
},
.extent = {
extent.width,
extent.height,
}
},
.clearValueCount = 0,
.pClearValues = NULL,
},
VK_SUBPASS_CONTENTS_INLINE);
emit_resolve(cmd_buffer,
&src_iview,
&(VkOffset2D) {
.x = srcOffset.x,
.y = srcOffset.y,
},
&dest_iview,
&(VkOffset2D) {
.x = dstOffset.x,
.y = dstOffset.y,
},
&(VkExtent2D) {
.width = extent.width,
.height = extent.height,
});
ANV_CALL(CmdEndRenderPass)(cmd_buffer_h);
anv_DestroyFramebuffer(device_h, fb_h,
&cmd_buffer->pool->alloc);
}
}
meta_resolve_restore(&state, cmd_buffer);
}
/**
* Emit any needed resolves for the current subpass.
*/
void
anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
struct anv_subpass *subpass = cmd_buffer->state.subpass;
struct anv_meta_saved_state saved_state;
/* FINISHME(perf): Skip clears for resolve attachments.
*
* From the Vulkan 1.0 spec:
*
* If the first use of an attachment in a render pass is as a resolve
* attachment, then the loadOp is effectively ignored as the resolve is
* guaranteed to overwrite all pixels in the render area.
*/
if (!subpass->has_resolve)
return;
meta_resolve_save(&saved_state, cmd_buffer);
for (uint32_t i = 0; i < subpass->color_count; ++i) {
uint32_t src_att = subpass->color_attachments[i];
uint32_t dest_att = subpass->resolve_attachments[i];
if (dest_att == VK_ATTACHMENT_UNUSED)
continue;
struct anv_image_view *src_iview = fb->attachments[src_att];
struct anv_image_view *dest_iview = fb->attachments[dest_att];
struct anv_subpass resolve_subpass = {
.color_count = 1,
.color_attachments = (uint32_t[]) { dest_att },
.depth_stencil_attachment = VK_ATTACHMENT_UNUSED,
};
anv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
/* Subpass resolves must respect the render area. We can ignore the
* render area here because vkCmdBeginRenderPass set the render area
* with 3DSTATE_DRAWING_RECTANGLE.
*
* XXX(chadv): Does the hardware really respect
* 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
*/
emit_resolve(cmd_buffer,
src_iview,
&(VkOffset2D) { 0, 0 },
dest_iview,
&(VkOffset2D) { 0, 0 },
&(VkExtent2D) { fb->width, fb->height });
}
cmd_buffer->state.subpass = subpass;
meta_resolve_restore(&saved_state, cmd_buffer);
}

View file

@ -0,0 +1,45 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include "nir/nir.h"
#include "anv_private.h"
#ifdef __cplusplus
extern "C" {
#endif
void anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar);
void anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
nir_shader *shader,
struct brw_stage_prog_data *prog_data);
void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
nir_shader *shader,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map);
#ifdef __cplusplus
}
#endif

View file

@ -0,0 +1,172 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
#include "nir/nir_builder.h"
struct apply_dynamic_offsets_state {
nir_shader *shader;
nir_builder builder;
const struct anv_pipeline_layout *layout;
uint32_t indices_start;
};
static bool
apply_dynamic_offsets_block(nir_block *block, void *void_state)
{
struct apply_dynamic_offsets_state *state = void_state;
struct anv_descriptor_set_layout *set_layout;
nir_builder *b = &state->builder;
nir_foreach_instr_safe(block, instr) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
unsigned block_idx_src;
switch (intrin->intrinsic) {
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
block_idx_src = 0;
break;
case nir_intrinsic_store_ssbo:
block_idx_src = 1;
break;
default:
continue; /* the loop */
}
nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr;
assert(res_instr->type == nir_instr_type_intrinsic);
nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr);
assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
unsigned set = res_intrin->const_index[0];
unsigned binding = res_intrin->const_index[1];
set_layout = state->layout->set[set].layout;
if (set_layout->binding[binding].dynamic_offset_index < 0)
continue;
b->cursor = nir_before_instr(&intrin->instr);
/* First, we need to generate the uniform load for the buffer offset */
uint32_t index = state->layout->set[set].dynamic_offset_start +
set_layout->binding[binding].dynamic_offset_index;
nir_intrinsic_instr *offset_load =
nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
offset_load->num_components = 2;
offset_load->const_index[0] = state->indices_start + index * 8;
offset_load->src[0] = nir_src_for_ssa(nir_imul(b, res_intrin->src[0].ssa,
nir_imm_int(b, 8)));
nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, 32, NULL);
nir_builder_instr_insert(b, &offset_load->instr);
nir_src *offset_src = nir_get_io_offset_src(intrin);
nir_ssa_def *new_offset = nir_iadd(b, offset_src->ssa,
&offset_load->dest.ssa);
/* In order to avoid out-of-bounds access, we predicate */
nir_ssa_def *pred = nir_uge(b, nir_channel(b, &offset_load->dest.ssa, 1),
offset_src->ssa);
nir_if *if_stmt = nir_if_create(b->shader);
if_stmt->condition = nir_src_for_ssa(pred);
nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
nir_instr_remove(&intrin->instr);
*offset_src = nir_src_for_ssa(new_offset);
nir_instr_insert_after_cf_list(&if_stmt->then_list, &intrin->instr);
if (intrin->intrinsic != nir_intrinsic_store_ssbo) {
/* It's a load, we need a phi node */
nir_phi_instr *phi = nir_phi_instr_create(b->shader);
nir_ssa_dest_init(&phi->instr, &phi->dest,
intrin->num_components,
intrin->dest.ssa.bit_size, NULL);
nir_phi_src *src1 = ralloc(phi, nir_phi_src);
struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list);
src1->pred = exec_node_data(nir_block, tnode, cf_node.node);
src1->src = nir_src_for_ssa(&intrin->dest.ssa);
exec_list_push_tail(&phi->srcs, &src1->node);
b->cursor = nir_after_cf_list(&if_stmt->else_list);
nir_ssa_def *zero = nir_build_imm(b, intrin->num_components,
(nir_const_value) { .u32 = { 0, 0, 0, 0 } });
nir_phi_src *src2 = ralloc(phi, nir_phi_src);
struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list);
src2->pred = exec_node_data(nir_block, enode, cf_node.node);
src2->src = nir_src_for_ssa(zero);
exec_list_push_tail(&phi->srcs, &src2->node);
assert(intrin->dest.is_ssa);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&phi->dest.ssa));
nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr);
}
}
return true;
}
void
anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
nir_shader *shader,
struct brw_stage_prog_data *prog_data)
{
struct apply_dynamic_offsets_state state = {
.shader = shader,
.layout = pipeline->layout,
.indices_start = shader->num_uniforms,
};
if (!state.layout || !state.layout->stage[shader->stage].has_dynamic_offsets)
return;
nir_foreach_function(shader, function) {
if (function->impl) {
nir_builder_init(&state.builder, function->impl);
nir_foreach_block(function->impl, apply_dynamic_offsets_block, &state);
nir_metadata_preserve(function->impl, nir_metadata_block_index |
nir_metadata_dominance);
}
}
struct anv_push_constants *null_data = NULL;
for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) {
prog_data->param[i * 2 + shader->num_uniforms / 4] =
(const union gl_constant_value *)&null_data->dynamic[i].offset;
prog_data->param[i * 2 + 1 + shader->num_uniforms / 4] =
(const union gl_constant_value *)&null_data->dynamic[i].range;
}
shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 8;
}

View file

@ -0,0 +1,387 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
#include "program/prog_parameter.h"
#include "nir/nir_builder.h"
struct apply_pipeline_layout_state {
nir_shader *shader;
nir_builder builder;
struct {
BITSET_WORD *used;
uint8_t *surface_offsets;
uint8_t *sampler_offsets;
uint8_t *image_offsets;
} set[MAX_SETS];
};
static void
add_binding(struct apply_pipeline_layout_state *state,
uint32_t set, uint32_t binding)
{
BITSET_SET(state->set[set].used, binding);
}
static void
add_var_binding(struct apply_pipeline_layout_state *state, nir_variable *var)
{
add_binding(state, var->data.descriptor_set, var->data.binding);
}
static bool
get_used_bindings_block(nir_block *block, void *void_state)
{
struct apply_pipeline_layout_state *state = void_state;
nir_foreach_instr_safe(block, instr) {
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_vulkan_resource_index:
add_binding(state, nir_intrinsic_desc_set(intrin),
nir_intrinsic_binding(intrin));
break;
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_min:
case nir_intrinsic_image_atomic_max:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_image_size:
case nir_intrinsic_image_samples:
add_var_binding(state, intrin->variables[0]->var);
break;
default:
break;
}
break;
}
case nir_instr_type_tex: {
nir_tex_instr *tex = nir_instr_as_tex(instr);
assert(tex->texture);
add_var_binding(state, tex->texture->var);
if (tex->sampler)
add_var_binding(state, tex->sampler->var);
break;
}
default:
continue;
}
}
return true;
}
static void
lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
struct apply_pipeline_layout_state *state)
{
nir_builder *b = &state->builder;
b->cursor = nir_before_instr(&intrin->instr);
uint32_t set = nir_intrinsic_desc_set(intrin);
uint32_t binding = nir_intrinsic_binding(intrin);
uint32_t surface_index = state->set[set].surface_offsets[binding];
nir_const_value *const_block_idx =
nir_src_as_const_value(intrin->src[0]);
nir_ssa_def *block_index;
if (const_block_idx) {
block_index = nir_imm_int(b, surface_index + const_block_idx->u32[0]);
} else {
block_index = nir_iadd(b, nir_imm_int(b, surface_index),
nir_ssa_for_src(b, intrin->src[0], 1));
}
assert(intrin->dest.is_ssa);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(block_index));
nir_instr_remove(&intrin->instr);
}
static void
lower_tex_deref(nir_tex_instr *tex, nir_deref_var *deref,
unsigned *const_index, nir_tex_src_type src_type,
struct apply_pipeline_layout_state *state)
{
if (deref->deref.child) {
assert(deref->deref.child->deref_type == nir_deref_type_array);
nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
*const_index += deref_array->base_offset;
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
tex->num_srcs + 1);
for (unsigned i = 0; i < tex->num_srcs; i++) {
new_srcs[i].src_type = tex->src[i].src_type;
nir_instr_move_src(&tex->instr, &new_srcs[i].src, &tex->src[i].src);
}
ralloc_free(tex->src);
tex->src = new_srcs;
/* Now we can go ahead and move the source over to being a
* first-class texture source.
*/
tex->src[tex->num_srcs].src_type = src_type;
tex->num_srcs++;
assert(deref_array->indirect.is_ssa);
nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs - 1].src,
deref_array->indirect);
}
}
}
static void
cleanup_tex_deref(nir_tex_instr *tex, nir_deref_var *deref)
{
if (deref->deref.child == NULL)
return;
nir_deref_array *deref_array = nir_deref_as_array(deref->deref.child);
if (deref_array->deref_array_type != nir_deref_array_type_indirect)
return;
nir_instr_rewrite_src(&tex->instr, &deref_array->indirect, NIR_SRC_INIT);
}
static void
lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
{
/* No one should have come by and lowered it already */
assert(tex->texture);
unsigned set = tex->texture->var->data.descriptor_set;
unsigned binding = tex->texture->var->data.binding;
tex->texture_index = state->set[set].surface_offsets[binding];
lower_tex_deref(tex, tex->texture, &tex->texture_index,
nir_tex_src_texture_offset, state);
if (tex->sampler) {
unsigned set = tex->sampler->var->data.descriptor_set;
unsigned binding = tex->sampler->var->data.binding;
tex->sampler_index = state->set[set].sampler_offsets[binding];
lower_tex_deref(tex, tex->sampler, &tex->sampler_index,
nir_tex_src_sampler_offset, state);
}
/* The backend only ever uses this to mark used surfaces. We don't care
* about that little optimization so it just needs to be non-zero.
*/
tex->texture_array_size = 1;
cleanup_tex_deref(tex, tex->texture);
if (tex->sampler)
cleanup_tex_deref(tex, tex->sampler);
tex->texture = NULL;
tex->sampler = NULL;
}
static bool
apply_pipeline_layout_block(nir_block *block, void *void_state)
{
struct apply_pipeline_layout_state *state = void_state;
nir_foreach_instr_safe(block, instr) {
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
lower_res_index_intrinsic(intrin, state);
}
break;
}
case nir_instr_type_tex:
lower_tex(nir_instr_as_tex(instr), state);
break;
default:
continue;
}
}
return true;
}
static void
setup_vec4_uniform_value(const union gl_constant_value **params,
const union gl_constant_value *values,
unsigned n)
{
static const gl_constant_value zero = { 0 };
for (unsigned i = 0; i < n; ++i)
params[i] = &values[i];
for (unsigned i = n; i < 4; ++i)
params[i] = &zero;
}
void
anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
nir_shader *shader,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map)
{
struct anv_pipeline_layout *layout = pipeline->layout;
struct apply_pipeline_layout_state state = {
.shader = shader,
};
void *mem_ctx = ralloc_context(NULL);
for (unsigned s = 0; s < layout->num_sets; s++) {
const unsigned count = layout->set[s].layout->binding_count;
const unsigned words = BITSET_WORDS(count);
state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
}
nir_foreach_function(shader, function) {
if (function->impl)
nir_foreach_block(function->impl, get_used_bindings_block, &state);
}
for (uint32_t set = 0; set < layout->num_sets; set++) {
struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
BITSET_WORD b, _tmp;
BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
set_layout->binding_count) {
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0)
map->surface_count += set_layout->binding[b].array_size;
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0)
map->sampler_count += set_layout->binding[b].array_size;
if (set_layout->binding[b].stage[shader->stage].image_index >= 0)
map->image_count += set_layout->binding[b].array_size;
}
}
unsigned surface = 0;
unsigned sampler = 0;
unsigned image = 0;
for (uint32_t set = 0; set < layout->num_sets; set++) {
struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
BITSET_WORD b, _tmp;
BITSET_FOREACH_SET(b, _tmp, state.set[set].used,
set_layout->binding_count) {
unsigned array_size = set_layout->binding[b].array_size;
unsigned set_offset = set_layout->binding[b].descriptor_index;
if (set_layout->binding[b].stage[shader->stage].surface_index >= 0) {
state.set[set].surface_offsets[b] = surface;
for (unsigned i = 0; i < array_size; i++) {
map->surface_to_descriptor[surface + i].set = set;
map->surface_to_descriptor[surface + i].offset = set_offset + i;
}
surface += array_size;
}
if (set_layout->binding[b].stage[shader->stage].sampler_index >= 0) {
state.set[set].sampler_offsets[b] = sampler;
for (unsigned i = 0; i < array_size; i++) {
map->sampler_to_descriptor[sampler + i].set = set;
map->sampler_to_descriptor[sampler + i].offset = set_offset + i;
}
sampler += array_size;
}
if (set_layout->binding[b].stage[shader->stage].image_index >= 0) {
state.set[set].image_offsets[b] = image;
image += array_size;
}
}
}
nir_foreach_function(shader, function) {
if (function->impl) {
nir_builder_init(&state.builder, function->impl);
nir_foreach_block(function->impl, apply_pipeline_layout_block, &state);
nir_metadata_preserve(function->impl, nir_metadata_block_index |
nir_metadata_dominance);
}
}
if (map->image_count > 0) {
assert(map->image_count <= MAX_IMAGES);
nir_foreach_variable(var, &shader->uniforms) {
if (glsl_type_is_image(var->type) ||
(glsl_type_is_array(var->type) &&
glsl_type_is_image(glsl_get_array_element(var->type)))) {
/* Images are represented as uniform push constants and the actual
* information required for reading/writing to/from the image is
* storred in the uniform.
*/
unsigned set = var->data.descriptor_set;
unsigned binding = var->data.binding;
unsigned image_index = state.set[set].image_offsets[binding];
var->data.driver_location = shader->num_uniforms +
image_index * BRW_IMAGE_PARAM_SIZE * 4;
}
}
struct anv_push_constants *null_data = NULL;
const gl_constant_value **param =
prog_data->param + (shader->num_uniforms / 4);
const struct brw_image_param *image_param = null_data->images;
for (uint32_t i = 0; i < map->image_count; i++) {
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
(const union gl_constant_value *)&image_param->surface_idx, 1);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
(const union gl_constant_value *)image_param->offset, 2);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
(const union gl_constant_value *)image_param->size, 3);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
(const union gl_constant_value *)image_param->stride, 4);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
(const union gl_constant_value *)image_param->tiling, 3);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
(const union gl_constant_value *)image_param->swizzling, 2);
param += BRW_IMAGE_PARAM_SIZE;
image_param ++;
}
shader->num_uniforms += map->image_count * BRW_IMAGE_PARAM_SIZE * 4;
}
ralloc_free(mem_ctx);
}

View file

@ -0,0 +1,77 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
struct lower_push_constants_state {
nir_shader *shader;
bool is_scalar;
};
static bool
lower_push_constants_block(nir_block *block, void *void_state)
{
struct lower_push_constants_state *state = void_state;
nir_foreach_instr(block, instr) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
/* TODO: Handle indirect push constants */
if (intrin->intrinsic != nir_intrinsic_load_push_constant)
continue;
/* This wont work for vec4 stages. */
assert(state->is_scalar);
assert(intrin->const_index[0] % 4 == 0);
assert(intrin->const_index[1] == 128);
/* We just turn them into uniform loads with the appropreate offset */
intrin->intrinsic = nir_intrinsic_load_uniform;
}
return true;
}
void
anv_nir_lower_push_constants(nir_shader *shader, bool is_scalar)
{
struct lower_push_constants_state state = {
.shader = shader,
.is_scalar = is_scalar,
};
nir_foreach_function(shader, function) {
if (function->impl)
nir_foreach_block(function->impl, lower_push_constants_block, &state);
}
assert(shader->num_uniforms % 4 == 0);
if (is_scalar)
shader->num_uniforms /= 4;
else
shader->num_uniforms = DIV_ROUND_UP(shader->num_uniforms, 16);
}

160
src/intel/vulkan/anv_pass.c Normal file
View file

@ -0,0 +1,160 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
VkResult anv_CreateRenderPass(
VkDevice _device,
const VkRenderPassCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkRenderPass* pRenderPass)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_render_pass *pass;
size_t size;
size_t attachments_offset;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
size = sizeof(*pass);
size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
attachments_offset = size;
size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
pass = anv_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pass == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
/* Clear the subpasses along with the parent pass. This required because
* each array member of anv_subpass must be a valid pointer if not NULL.
*/
memset(pass, 0, size);
pass->attachment_count = pCreateInfo->attachmentCount;
pass->subpass_count = pCreateInfo->subpassCount;
pass->attachments = (void *) pass + attachments_offset;
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
struct anv_render_pass_attachment *att = &pass->attachments[i];
att->format = anv_format_for_vk_format(pCreateInfo->pAttachments[i].format);
att->samples = pCreateInfo->pAttachments[i].samples;
att->load_op = pCreateInfo->pAttachments[i].loadOp;
att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
// att->store_op = pCreateInfo->pAttachments[i].storeOp;
// att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
}
uint32_t subpass_attachment_count = 0, *p;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
subpass_attachment_count +=
desc->inputAttachmentCount +
desc->colorAttachmentCount +
/* Count colorAttachmentCount again for resolve_attachments */
desc->colorAttachmentCount;
}
pass->subpass_attachments =
anv_alloc2(&device->alloc, pAllocator,
subpass_attachment_count * sizeof(uint32_t), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pass->subpass_attachments == NULL) {
anv_free2(&device->alloc, pAllocator, pass);
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
p = pass->subpass_attachments;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
struct anv_subpass *subpass = &pass->subpasses[i];
subpass->input_count = desc->inputAttachmentCount;
subpass->color_count = desc->colorAttachmentCount;
if (desc->inputAttachmentCount > 0) {
subpass->input_attachments = p;
p += desc->inputAttachmentCount;
for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
subpass->input_attachments[j]
= desc->pInputAttachments[j].attachment;
}
}
if (desc->colorAttachmentCount > 0) {
subpass->color_attachments = p;
p += desc->colorAttachmentCount;
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
subpass->color_attachments[j]
= desc->pColorAttachments[j].attachment;
}
}
subpass->has_resolve = false;
if (desc->pResolveAttachments) {
subpass->resolve_attachments = p;
p += desc->colorAttachmentCount;
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
uint32_t a = desc->pResolveAttachments[j].attachment;
subpass->resolve_attachments[j] = a;
if (a != VK_ATTACHMENT_UNUSED)
subpass->has_resolve = true;
}
}
if (desc->pDepthStencilAttachment) {
subpass->depth_stencil_attachment =
desc->pDepthStencilAttachment->attachment;
} else {
subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED;
}
}
*pRenderPass = anv_render_pass_to_handle(pass);
return VK_SUCCESS;
}
void anv_DestroyRenderPass(
VkDevice _device,
VkRenderPass _pass,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_render_pass, pass, _pass);
anv_free2(&device->alloc, pAllocator, pass->subpass_attachments);
anv_free2(&device->alloc, pAllocator, pass);
}
void anv_GetRenderAreaGranularity(
VkDevice device,
VkRenderPass renderPass,
VkExtent2D* pGranularity)
{
*pGranularity = (VkExtent2D) { 1, 1 };
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,518 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "util/mesa-sha1.h"
#include "util/debug.h"
#include "anv_private.h"
/* Remaining work:
*
* - Compact binding table layout so it's tight and not dependent on
* descriptor set layout.
*
* - Review prog_data struct for size and cacheability: struct
* brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
* bit quantities etc; param, pull_param, and image_params are pointers, we
* just need the compation map. use bit fields for all bools, eg
* dual_src_blend.
*/
void
anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
struct anv_device *device)
{
cache->device = device;
anv_state_stream_init(&cache->program_stream,
&device->instruction_block_pool);
pthread_mutex_init(&cache->mutex, NULL);
cache->kernel_count = 0;
cache->total_size = 0;
cache->table_size = 1024;
const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
cache->hash_table = malloc(byte_size);
/* We don't consider allocation failure fatal, we just start with a 0-sized
* cache. */
if (cache->hash_table == NULL ||
!env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true))
cache->table_size = 0;
else
memset(cache->hash_table, 0xff, byte_size);
}
void
anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
{
anv_state_stream_finish(&cache->program_stream);
pthread_mutex_destroy(&cache->mutex);
free(cache->hash_table);
}
struct cache_entry {
unsigned char sha1[20];
uint32_t prog_data_size;
uint32_t kernel_size;
uint32_t surface_count;
uint32_t sampler_count;
uint32_t image_count;
char prog_data[0];
/* kernel follows prog_data at next 64 byte aligned address */
};
static uint32_t
entry_size(struct cache_entry *entry)
{
/* This returns the number of bytes needed to serialize an entry, which
* doesn't include the alignment padding bytes.
*/
const uint32_t map_size =
entry->surface_count * sizeof(struct anv_pipeline_binding) +
entry->sampler_count * sizeof(struct anv_pipeline_binding);
return sizeof(*entry) + entry->prog_data_size + map_size;
}
void
anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
struct anv_shader_module *module,
const char *entrypoint,
const VkSpecializationInfo *spec_info)
{
struct mesa_sha1 *ctx;
ctx = _mesa_sha1_init();
_mesa_sha1_update(ctx, key, key_size);
_mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
_mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
/* hash in shader stage, pipeline layout? */
if (spec_info) {
_mesa_sha1_update(ctx, spec_info->pMapEntries,
spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
_mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
}
_mesa_sha1_final(ctx, hash);
}
static uint32_t
anv_pipeline_cache_search_unlocked(struct anv_pipeline_cache *cache,
const unsigned char *sha1,
const struct brw_stage_prog_data **prog_data,
struct anv_pipeline_bind_map *map)
{
const uint32_t mask = cache->table_size - 1;
const uint32_t start = (*(uint32_t *) sha1);
for (uint32_t i = 0; i < cache->table_size; i++) {
const uint32_t index = (start + i) & mask;
const uint32_t offset = cache->hash_table[index];
if (offset == ~0)
return NO_KERNEL;
struct cache_entry *entry =
cache->program_stream.block_pool->map + offset;
if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
if (prog_data) {
assert(map);
void *p = entry->prog_data;
*prog_data = p;
p += entry->prog_data_size;
map->surface_count = entry->surface_count;
map->sampler_count = entry->sampler_count;
map->image_count = entry->image_count;
map->surface_to_descriptor = p;
p += map->surface_count * sizeof(struct anv_pipeline_binding);
map->sampler_to_descriptor = p;
}
return offset + align_u32(entry_size(entry), 64);
}
}
unreachable("hash table should never be full");
}
uint32_t
anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
const unsigned char *sha1,
const struct brw_stage_prog_data **prog_data,
struct anv_pipeline_bind_map *map)
{
uint32_t kernel;
pthread_mutex_lock(&cache->mutex);
kernel = anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
pthread_mutex_unlock(&cache->mutex);
return kernel;
}
static void
anv_pipeline_cache_set_entry(struct anv_pipeline_cache *cache,
struct cache_entry *entry, uint32_t entry_offset)
{
const uint32_t mask = cache->table_size - 1;
const uint32_t start = (*(uint32_t *) entry->sha1);
/* We'll always be able to insert when we get here. */
assert(cache->kernel_count < cache->table_size / 2);
for (uint32_t i = 0; i < cache->table_size; i++) {
const uint32_t index = (start + i) & mask;
if (cache->hash_table[index] == ~0) {
cache->hash_table[index] = entry_offset;
break;
}
}
cache->total_size += entry_size(entry) + entry->kernel_size;
cache->kernel_count++;
}
static VkResult
anv_pipeline_cache_grow(struct anv_pipeline_cache *cache)
{
const uint32_t table_size = cache->table_size * 2;
const uint32_t old_table_size = cache->table_size;
const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
uint32_t *table;
uint32_t *old_table = cache->hash_table;
table = malloc(byte_size);
if (table == NULL)
return VK_ERROR_OUT_OF_HOST_MEMORY;
cache->hash_table = table;
cache->table_size = table_size;
cache->kernel_count = 0;
cache->total_size = 0;
memset(cache->hash_table, 0xff, byte_size);
for (uint32_t i = 0; i < old_table_size; i++) {
const uint32_t offset = old_table[i];
if (offset == ~0)
continue;
struct cache_entry *entry =
cache->program_stream.block_pool->map + offset;
anv_pipeline_cache_set_entry(cache, entry, offset);
}
free(old_table);
return VK_SUCCESS;
}
static void
anv_pipeline_cache_add_entry(struct anv_pipeline_cache *cache,
struct cache_entry *entry, uint32_t entry_offset)
{
if (cache->kernel_count == cache->table_size / 2)
anv_pipeline_cache_grow(cache);
/* Failing to grow that hash table isn't fatal, but may mean we don't
* have enough space to add this new kernel. Only add it if there's room.
*/
if (cache->kernel_count < cache->table_size / 2)
anv_pipeline_cache_set_entry(cache, entry, entry_offset);
}
uint32_t
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
const unsigned char *sha1,
const void *kernel, size_t kernel_size,
const struct brw_stage_prog_data **prog_data,
size_t prog_data_size,
struct anv_pipeline_bind_map *map)
{
pthread_mutex_lock(&cache->mutex);
/* Before uploading, check again that another thread didn't upload this
* shader while we were compiling it.
*/
if (sha1) {
uint32_t cached_kernel =
anv_pipeline_cache_search_unlocked(cache, sha1, prog_data, map);
if (cached_kernel != NO_KERNEL) {
pthread_mutex_unlock(&cache->mutex);
return cached_kernel;
}
}
struct cache_entry *entry;
const uint32_t map_size =
map->surface_count * sizeof(struct anv_pipeline_binding) +
map->sampler_count * sizeof(struct anv_pipeline_binding);
const uint32_t preamble_size =
align_u32(sizeof(*entry) + prog_data_size + map_size, 64);
const uint32_t size = preamble_size + kernel_size;
assert(size < cache->program_stream.block_pool->block_size);
const struct anv_state state =
anv_state_stream_alloc(&cache->program_stream, size, 64);
entry = state.map;
entry->prog_data_size = prog_data_size;
entry->surface_count = map->surface_count;
entry->sampler_count = map->sampler_count;
entry->image_count = map->image_count;
entry->kernel_size = kernel_size;
void *p = entry->prog_data;
memcpy(p, *prog_data, prog_data_size);
p += prog_data_size;
memcpy(p, map->surface_to_descriptor,
map->surface_count * sizeof(struct anv_pipeline_binding));
map->surface_to_descriptor = p;
p += map->surface_count * sizeof(struct anv_pipeline_binding);
memcpy(p, map->sampler_to_descriptor,
map->sampler_count * sizeof(struct anv_pipeline_binding));
map->sampler_to_descriptor = p;
if (sha1) {
assert(anv_pipeline_cache_search_unlocked(cache, sha1,
NULL, NULL) == NO_KERNEL);
memcpy(entry->sha1, sha1, sizeof(entry->sha1));
anv_pipeline_cache_add_entry(cache, entry, state.offset);
}
pthread_mutex_unlock(&cache->mutex);
memcpy(state.map + preamble_size, kernel, kernel_size);
if (!cache->device->info.has_llc)
anv_state_clflush(state);
*prog_data = (const struct brw_stage_prog_data *) entry->prog_data;
return state.offset + preamble_size;
}
struct cache_header {
uint32_t header_size;
uint32_t header_version;
uint32_t vendor_id;
uint32_t device_id;
uint8_t uuid[VK_UUID_SIZE];
};
static void
anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
const void *data, size_t size)
{
struct anv_device *device = cache->device;
struct cache_header header;
uint8_t uuid[VK_UUID_SIZE];
if (size < sizeof(header))
return;
memcpy(&header, data, sizeof(header));
if (header.header_size < sizeof(header))
return;
if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
return;
if (header.vendor_id != 0x8086)
return;
if (header.device_id != device->chipset_id)
return;
anv_device_get_cache_uuid(uuid);
if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
return;
void *end = (void *) data + size;
void *p = (void *) data + header.header_size;
while (p < end) {
struct cache_entry *entry = p;
void *data = entry->prog_data;
const struct brw_stage_prog_data *prog_data = data;
data += entry->prog_data_size;
struct anv_pipeline_binding *surface_to_descriptor = data;
data += entry->surface_count * sizeof(struct anv_pipeline_binding);
struct anv_pipeline_binding *sampler_to_descriptor = data;
data += entry->sampler_count * sizeof(struct anv_pipeline_binding);
void *kernel = data;
struct anv_pipeline_bind_map map = {
.surface_count = entry->surface_count,
.sampler_count = entry->sampler_count,
.image_count = entry->image_count,
.surface_to_descriptor = surface_to_descriptor,
.sampler_to_descriptor = sampler_to_descriptor
};
anv_pipeline_cache_upload_kernel(cache, entry->sha1,
kernel, entry->kernel_size,
&prog_data,
entry->prog_data_size, &map);
p = kernel + entry->kernel_size;
}
}
VkResult anv_CreatePipelineCache(
VkDevice _device,
const VkPipelineCacheCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipelineCache* pPipelineCache)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_pipeline_cache *cache;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
assert(pCreateInfo->flags == 0);
cache = anv_alloc2(&device->alloc, pAllocator,
sizeof(*cache), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cache == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
anv_pipeline_cache_init(cache, device);
if (pCreateInfo->initialDataSize > 0)
anv_pipeline_cache_load(cache,
pCreateInfo->pInitialData,
pCreateInfo->initialDataSize);
*pPipelineCache = anv_pipeline_cache_to_handle(cache);
return VK_SUCCESS;
}
void anv_DestroyPipelineCache(
VkDevice _device,
VkPipelineCache _cache,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
anv_pipeline_cache_finish(cache);
anv_free2(&device->alloc, pAllocator, cache);
}
VkResult anv_GetPipelineCacheData(
VkDevice _device,
VkPipelineCache _cache,
size_t* pDataSize,
void* pData)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
struct cache_header *header;
const size_t size = sizeof(*header) + cache->total_size;
if (pData == NULL) {
*pDataSize = size;
return VK_SUCCESS;
}
if (*pDataSize < sizeof(*header)) {
*pDataSize = 0;
return VK_INCOMPLETE;
}
void *p = pData, *end = pData + *pDataSize;
header = p;
header->header_size = sizeof(*header);
header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
header->vendor_id = 0x8086;
header->device_id = device->chipset_id;
anv_device_get_cache_uuid(header->uuid);
p += header->header_size;
struct cache_entry *entry;
for (uint32_t i = 0; i < cache->table_size; i++) {
if (cache->hash_table[i] == ~0)
continue;
entry = cache->program_stream.block_pool->map + cache->hash_table[i];
const uint32_t size = entry_size(entry);
if (end < p + size + entry->kernel_size)
break;
memcpy(p, entry, size);
p += size;
void *kernel = (void *) entry + align_u32(size, 64);
memcpy(p, kernel, entry->kernel_size);
p += entry->kernel_size;
}
*pDataSize = p - pData;
return VK_SUCCESS;
}
static void
anv_pipeline_cache_merge(struct anv_pipeline_cache *dst,
struct anv_pipeline_cache *src)
{
for (uint32_t i = 0; i < src->table_size; i++) {
const uint32_t offset = src->hash_table[i];
if (offset == ~0)
continue;
struct cache_entry *entry =
src->program_stream.block_pool->map + offset;
if (anv_pipeline_cache_search(dst, entry->sha1, NULL, NULL) != NO_KERNEL)
continue;
anv_pipeline_cache_add_entry(dst, entry, offset);
}
}
VkResult anv_MergePipelineCaches(
VkDevice _device,
VkPipelineCache destCache,
uint32_t srcCacheCount,
const VkPipelineCache* pSrcCaches)
{
ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
for (uint32_t i = 0; i < srcCacheCount; i++) {
ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
anv_pipeline_cache_merge(dst, src);
}
return VK_SUCCESS;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,187 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
VkResult anv_CreateQueryPool(
VkDevice _device,
const VkQueryPoolCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkQueryPool* pQueryPool)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_query_pool *pool;
VkResult result;
uint32_t slot_size;
uint64_t size;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
switch (pCreateInfo->queryType) {
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP:
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
return VK_ERROR_INCOMPATIBLE_DRIVER;
default:
assert(!"Invalid query type");
}
slot_size = sizeof(struct anv_query_pool_slot);
pool = anv_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pool == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
pool->type = pCreateInfo->queryType;
pool->slots = pCreateInfo->queryCount;
size = pCreateInfo->queryCount * slot_size;
result = anv_bo_init_new(&pool->bo, device, size);
if (result != VK_SUCCESS)
goto fail;
pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);
*pQueryPool = anv_query_pool_to_handle(pool);
return VK_SUCCESS;
fail:
anv_free2(&device->alloc, pAllocator, pool);
return result;
}
void anv_DestroyQueryPool(
VkDevice _device,
VkQueryPool _pool,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
anv_gem_munmap(pool->bo.map, pool->bo.size);
anv_gem_close(device, pool->bo.gem_handle);
anv_free2(&device->alloc, pAllocator, pool);
}
VkResult anv_GetQueryPoolResults(
VkDevice _device,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount,
size_t dataSize,
void* pData,
VkDeviceSize stride,
VkQueryResultFlags flags)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
int64_t timeout = INT64_MAX;
uint64_t result;
int ret;
assert(pool->type == VK_QUERY_TYPE_OCCLUSION ||
pool->type == VK_QUERY_TYPE_TIMESTAMP);
if (pData == NULL)
return VK_SUCCESS;
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
ret = anv_gem_wait(device, pool->bo.gem_handle, &timeout);
if (ret == -1) {
/* We don't know the real error. */
return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
"gem_wait failed %m");
}
}
void *data_end = pData + dataSize;
struct anv_query_pool_slot *slot = pool->bo.map;
for (uint32_t i = 0; i < queryCount; i++) {
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION: {
result = slot[firstQuery + i].end - slot[firstQuery + i].begin;
break;
}
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
unreachable("pipeline stats not supported");
case VK_QUERY_TYPE_TIMESTAMP: {
result = slot[firstQuery + i].begin;
break;
}
default:
unreachable("invalid pool type");
}
if (flags & VK_QUERY_RESULT_64_BIT) {
uint64_t *dst = pData;
dst[0] = result;
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
dst[1] = slot[firstQuery + i].available;
} else {
uint32_t *dst = pData;
if (result > UINT32_MAX)
result = UINT32_MAX;
dst[0] = result;
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
dst[1] = slot[firstQuery + i].available;
}
pData += stride;
if (pData >= data_end)
break;
}
return VK_SUCCESS;
}
void anv_CmdResetQueryPool(
VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t firstQuery,
uint32_t queryCount)
{
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
for (uint32_t i = 0; i < queryCount; i++) {
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP: {
struct anv_query_pool_slot *slot = pool->bo.map;
slot[firstQuery + i].available = 0;
break;
}
default:
assert(!"Invalid query type");
}
}
}

202
src/intel/vulkan/anv_util.c Normal file
View file

@ -0,0 +1,202 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include "anv_private.h"
/** Log an error message. */
void anv_printflike(1, 2)
anv_loge(const char *format, ...)
{
va_list va;
va_start(va, format);
anv_loge_v(format, va);
va_end(va);
}
/** \see anv_loge() */
void
anv_loge_v(const char *format, va_list va)
{
fprintf(stderr, "vk: error: ");
vfprintf(stderr, format, va);
fprintf(stderr, "\n");
}
void anv_printflike(3, 4)
__anv_finishme(const char *file, int line, const char *format, ...)
{
va_list ap;
char buffer[256];
va_start(ap, format);
vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
}
void anv_noreturn anv_printflike(1, 2)
anv_abortf(const char *format, ...)
{
va_list va;
va_start(va, format);
anv_abortfv(format, va);
va_end(va);
}
void anv_noreturn
anv_abortfv(const char *format, va_list va)
{
fprintf(stderr, "vk: error: ");
vfprintf(stderr, format, va);
fprintf(stderr, "\n");
abort();
}
VkResult
__vk_errorf(VkResult error, const char *file, int line, const char *format, ...)
{
va_list ap;
char buffer[256];
#define ERROR_CASE(error) case error: error_str = #error; break;
const char *error_str;
switch ((int32_t)error) {
/* Core errors */
ERROR_CASE(VK_ERROR_OUT_OF_HOST_MEMORY)
ERROR_CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY)
ERROR_CASE(VK_ERROR_INITIALIZATION_FAILED)
ERROR_CASE(VK_ERROR_DEVICE_LOST)
ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED)
ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT)
ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT)
ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER)
/* Extension errors */
ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR)
default:
assert(!"Unknown error");
error_str = "unknown error";
}
#undef ERROR_CASE
if (format) {
va_start(ap, format);
vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str);
} else {
fprintf(stderr, "%s:%d: %s\n", file, line, error_str);
}
return error;
}
int
anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size)
{
assert(util_is_power_of_two(size));
assert(element_size < size && util_is_power_of_two(element_size));
vector->head = 0;
vector->tail = 0;
vector->element_size = element_size;
vector->size = size;
vector->data = malloc(size);
return vector->data != NULL;
}
void *
anv_vector_add(struct anv_vector *vector)
{
uint32_t offset, size, split, src_tail, dst_tail;
void *data;
if (vector->head - vector->tail == vector->size) {
size = vector->size * 2;
data = malloc(size);
if (data == NULL)
return NULL;
src_tail = vector->tail & (vector->size - 1);
dst_tail = vector->tail & (size - 1);
if (src_tail == 0) {
/* Since we know that the vector is full, this means that it's
* linear from start to end so we can do one copy.
*/
memcpy(data + dst_tail, vector->data, vector->size);
} else {
/* In this case, the vector is split into two pieces and we have
* to do two copies. We have to be careful to make sure each
* piece goes to the right locations. Thanks to the change in
* size, it may or may not still wrap around.
*/
split = align_u32(vector->tail, vector->size);
assert(vector->tail <= split && split < vector->head);
memcpy(data + dst_tail, vector->data + src_tail,
split - vector->tail);
memcpy(data + (split & (size - 1)), vector->data,
vector->head - split);
}
free(vector->data);
vector->data = data;
vector->size = size;
}
assert(vector->head - vector->tail < vector->size);
offset = vector->head & (vector->size - 1);
vector->head += vector->element_size;
return vector->data + offset;
}
void *
anv_vector_remove(struct anv_vector *vector)
{
uint32_t offset;
if (vector->head == vector->tail)
return NULL;
assert(vector->head - vector->tail <= vector->size);
offset = vector->tail & (vector->size - 1);
vector->tail += vector->element_size;
return vector->data + offset;
}

234
src/intel/vulkan/anv_wsi.c Normal file
View file

@ -0,0 +1,234 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_wsi.h"
VkResult
anv_init_wsi(struct anv_instance *instance)
{
VkResult result;
result = anv_x11_init_wsi(instance);
if (result != VK_SUCCESS)
return result;
#ifdef HAVE_WAYLAND_PLATFORM
result = anv_wl_init_wsi(instance);
if (result != VK_SUCCESS) {
anv_x11_finish_wsi(instance);
return result;
}
#endif
return VK_SUCCESS;
}
void
anv_finish_wsi(struct anv_instance *instance)
{
#ifdef HAVE_WAYLAND_PLATFORM
anv_wl_finish_wsi(instance);
#endif
anv_x11_finish_wsi(instance);
}
void anv_DestroySurfaceKHR(
VkInstance _instance,
VkSurfaceKHR _surface,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_instance, instance, _instance);
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
anv_free2(&instance->alloc, pAllocator, surface);
}
VkResult anv_GetPhysicalDeviceSurfaceSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
VkSurfaceKHR _surface,
VkBool32* pSupported)
{
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
return iface->get_support(surface, device, queueFamilyIndex, pSupported);
}
VkResult anv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR _surface,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
{
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
return iface->get_capabilities(surface, device, pSurfaceCapabilities);
}
VkResult anv_GetPhysicalDeviceSurfaceFormatsKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR _surface,
uint32_t* pSurfaceFormatCount,
VkSurfaceFormatKHR* pSurfaceFormats)
{
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
return iface->get_formats(surface, device, pSurfaceFormatCount,
pSurfaceFormats);
}
VkResult anv_GetPhysicalDeviceSurfacePresentModesKHR(
VkPhysicalDevice physicalDevice,
VkSurfaceKHR _surface,
uint32_t* pPresentModeCount,
VkPresentModeKHR* pPresentModes)
{
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
return iface->get_present_modes(surface, device, pPresentModeCount,
pPresentModes);
}
VkResult anv_CreateSwapchainKHR(
VkDevice _device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSwapchainKHR* pSwapchain)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface);
struct anv_wsi_interface *iface = device->instance->wsi[surface->platform];
struct anv_swapchain *swapchain;
VkResult result = iface->create_swapchain(surface, device, pCreateInfo,
pAllocator, &swapchain);
if (result != VK_SUCCESS)
return result;
if (pAllocator)
swapchain->alloc = *pAllocator;
else
swapchain->alloc = device->alloc;
for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++)
swapchain->fences[i] = VK_NULL_HANDLE;
*pSwapchain = anv_swapchain_to_handle(swapchain);
return VK_SUCCESS;
}
void anv_DestroySwapchainKHR(
VkDevice device,
VkSwapchainKHR _swapchain,
const VkAllocationCallbacks* pAllocator)
{
ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain);
for (unsigned i = 0; i < ARRAY_SIZE(swapchain->fences); i++) {
if (swapchain->fences[i] != VK_NULL_HANDLE)
anv_DestroyFence(device, swapchain->fences[i], pAllocator);
}
swapchain->destroy(swapchain, pAllocator);
}
VkResult anv_GetSwapchainImagesKHR(
VkDevice device,
VkSwapchainKHR _swapchain,
uint32_t* pSwapchainImageCount,
VkImage* pSwapchainImages)
{
ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain);
return swapchain->get_images(swapchain, pSwapchainImageCount,
pSwapchainImages);
}
VkResult anv_AcquireNextImageKHR(
VkDevice device,
VkSwapchainKHR _swapchain,
uint64_t timeout,
VkSemaphore semaphore,
VkFence fence,
uint32_t* pImageIndex)
{
ANV_FROM_HANDLE(anv_swapchain, swapchain, _swapchain);
return swapchain->acquire_next_image(swapchain, timeout, semaphore,
pImageIndex);
}
VkResult anv_QueuePresentKHR(
VkQueue _queue,
const VkPresentInfoKHR* pPresentInfo)
{
ANV_FROM_HANDLE(anv_queue, queue, _queue);
VkResult result;
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
ANV_FROM_HANDLE(anv_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
assert(swapchain->device == queue->device);
if (swapchain->fences[0] == VK_NULL_HANDLE) {
result = anv_CreateFence(anv_device_to_handle(queue->device),
&(VkFenceCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.flags = 0,
}, &swapchain->alloc, &swapchain->fences[0]);
if (result != VK_SUCCESS)
return result;
} else {
anv_ResetFences(anv_device_to_handle(queue->device),
1, &swapchain->fences[0]);
}
anv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]);
result = swapchain->queue_present(swapchain, queue,
pPresentInfo->pImageIndices[i]);
/* TODO: What if one of them returns OUT_OF_DATE? */
if (result != VK_SUCCESS)
return result;
VkFence last = swapchain->fences[2];
swapchain->fences[2] = swapchain->fences[1];
swapchain->fences[1] = swapchain->fences[0];
swapchain->fences[0] = last;
if (last != VK_NULL_HANDLE) {
anv_WaitForFences(anv_device_to_handle(queue->device),
1, &last, true, 1);
}
}
return VK_SUCCESS;
}

View file

@ -0,0 +1,78 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#pragma once
#include "anv_private.h"
struct anv_swapchain;
struct anv_wsi_interface {
VkResult (*get_support)(VkIcdSurfaceBase *surface,
struct anv_physical_device *device,
uint32_t queueFamilyIndex,
VkBool32* pSupported);
VkResult (*get_capabilities)(VkIcdSurfaceBase *surface,
struct anv_physical_device *device,
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities);
VkResult (*get_formats)(VkIcdSurfaceBase *surface,
struct anv_physical_device *device,
uint32_t* pSurfaceFormatCount,
VkSurfaceFormatKHR* pSurfaceFormats);
VkResult (*get_present_modes)(VkIcdSurfaceBase *surface,
struct anv_physical_device *device,
uint32_t* pPresentModeCount,
VkPresentModeKHR* pPresentModes);
VkResult (*create_swapchain)(VkIcdSurfaceBase *surface,
struct anv_device *device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
struct anv_swapchain **swapchain);
};
struct anv_swapchain {
struct anv_device *device;
VkAllocationCallbacks alloc;
VkFence fences[3];
VkResult (*destroy)(struct anv_swapchain *swapchain,
const VkAllocationCallbacks *pAllocator);
VkResult (*get_images)(struct anv_swapchain *swapchain,
uint32_t *pCount, VkImage *pSwapchainImages);
VkResult (*acquire_next_image)(struct anv_swapchain *swap_chain,
uint64_t timeout, VkSemaphore semaphore,
uint32_t *image_index);
VkResult (*queue_present)(struct anv_swapchain *swap_chain,
struct anv_queue *queue,
uint32_t image_index);
};
ANV_DEFINE_NONDISP_HANDLE_CASTS(_VkIcdSurfaceBase, VkSurfaceKHR)
ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_swapchain, VkSwapchainKHR)
VkResult anv_x11_init_wsi(struct anv_instance *instance);
void anv_x11_finish_wsi(struct anv_instance *instance);
VkResult anv_wl_init_wsi(struct anv_instance *instance);
void anv_wl_finish_wsi(struct anv_instance *instance);

View file

@ -0,0 +1,871 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <wayland-client.h>
#include <wayland-drm-client-protocol.h>
#include "anv_wsi.h"
#include <util/hash_table.h>
#define MIN_NUM_IMAGES 2
struct wsi_wl_display {
struct wl_display * display;
struct wl_drm * drm;
/* Vector of VkFormats supported */
struct anv_vector formats;
uint32_t capabilities;
};
struct wsi_wayland {
struct anv_wsi_interface base;
struct anv_instance * instance;
pthread_mutex_t mutex;
/* Hash table of wl_display -> wsi_wl_display mappings */
struct hash_table * displays;
};
static void
wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format)
{
/* Don't add a format that's already in the list */
VkFormat *f;
anv_vector_foreach(f, &display->formats)
if (*f == format)
return;
/* Don't add formats which aren't supported by the driver */
if (anv_format_for_vk_format(format)->isl_format ==
ISL_FORMAT_UNSUPPORTED) {
return;
}
f = anv_vector_add(&display->formats);
if (f)
*f = format;
}
static void
drm_handle_device(void *data, struct wl_drm *drm, const char *name)
{
fprintf(stderr, "wl_drm.device(%s)\n", name);
}
static uint32_t
wl_drm_format_for_vk_format(VkFormat vk_format, bool alpha)
{
switch (vk_format) {
/* TODO: Figure out what all the formats mean and make this table
* correct.
*/
#if 0
case VK_FORMAT_R4G4B4A4_UNORM:
return alpha ? WL_DRM_FORMAT_ABGR4444 : WL_DRM_FORMAT_XBGR4444;
case VK_FORMAT_R5G6B5_UNORM:
return WL_DRM_FORMAT_BGR565;
case VK_FORMAT_R5G5B5A1_UNORM:
return alpha ? WL_DRM_FORMAT_ABGR1555 : WL_DRM_FORMAT_XBGR1555;
case VK_FORMAT_R8G8B8_UNORM:
return WL_DRM_FORMAT_XBGR8888;
case VK_FORMAT_R8G8B8A8_UNORM:
return alpha ? WL_DRM_FORMAT_ABGR8888 : WL_DRM_FORMAT_XBGR8888;
case VK_FORMAT_R10G10B10A2_UNORM:
return alpha ? WL_DRM_FORMAT_ABGR2101010 : WL_DRM_FORMAT_XBGR2101010;
case VK_FORMAT_B4G4R4A4_UNORM:
return alpha ? WL_DRM_FORMAT_ARGB4444 : WL_DRM_FORMAT_XRGB4444;
case VK_FORMAT_B5G6R5_UNORM:
return WL_DRM_FORMAT_RGB565;
case VK_FORMAT_B5G5R5A1_UNORM:
return alpha ? WL_DRM_FORMAT_XRGB1555 : WL_DRM_FORMAT_XRGB1555;
#endif
case VK_FORMAT_B8G8R8_SRGB:
return WL_DRM_FORMAT_BGRX8888;
case VK_FORMAT_B8G8R8A8_SRGB:
return alpha ? WL_DRM_FORMAT_ARGB8888 : WL_DRM_FORMAT_XRGB8888;
#if 0
case VK_FORMAT_B10G10R10A2_UNORM:
return alpha ? WL_DRM_FORMAT_ARGB2101010 : WL_DRM_FORMAT_XRGB2101010;
#endif
default:
assert("!Unsupported Vulkan format");
return 0;
}
}
static void
drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format)
{
struct wsi_wl_display *display = data;
switch (wl_format) {
#if 0
case WL_DRM_FORMAT_ABGR4444:
case WL_DRM_FORMAT_XBGR4444:
wsi_wl_display_add_vk_format(display, VK_FORMAT_R4G4B4A4_UNORM);
break;
case WL_DRM_FORMAT_BGR565:
wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G6B5_UNORM);
break;
case WL_DRM_FORMAT_ABGR1555:
case WL_DRM_FORMAT_XBGR1555:
wsi_wl_display_add_vk_format(display, VK_FORMAT_R5G5B5A1_UNORM);
break;
case WL_DRM_FORMAT_XBGR8888:
wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8_UNORM);
/* fallthrough */
case WL_DRM_FORMAT_ABGR8888:
wsi_wl_display_add_vk_format(display, VK_FORMAT_R8G8B8A8_UNORM);
break;
case WL_DRM_FORMAT_ABGR2101010:
case WL_DRM_FORMAT_XBGR2101010:
wsi_wl_display_add_vk_format(display, VK_FORMAT_R10G10B10A2_UNORM);
break;
case WL_DRM_FORMAT_ARGB4444:
case WL_DRM_FORMAT_XRGB4444:
wsi_wl_display_add_vk_format(display, VK_FORMAT_B4G4R4A4_UNORM);
break;
case WL_DRM_FORMAT_RGB565:
wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G6R5_UNORM);
break;
case WL_DRM_FORMAT_ARGB1555:
case WL_DRM_FORMAT_XRGB1555:
wsi_wl_display_add_vk_format(display, VK_FORMAT_B5G5R5A1_UNORM);
break;
#endif
case WL_DRM_FORMAT_XRGB8888:
wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8_SRGB);
/* fallthrough */
case WL_DRM_FORMAT_ARGB8888:
wsi_wl_display_add_vk_format(display, VK_FORMAT_B8G8R8A8_SRGB);
break;
#if 0
case WL_DRM_FORMAT_ARGB2101010:
case WL_DRM_FORMAT_XRGB2101010:
wsi_wl_display_add_vk_format(display, VK_FORMAT_B10G10R10A2_UNORM);
break;
#endif
}
}
static void
drm_handle_authenticated(void *data, struct wl_drm *drm)
{
}
static void
drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t capabilities)
{
struct wsi_wl_display *display = data;
display->capabilities = capabilities;
}
static const struct wl_drm_listener drm_listener = {
drm_handle_device,
drm_handle_format,
drm_handle_authenticated,
drm_handle_capabilities,
};
static void
registry_handle_global(void *data, struct wl_registry *registry,
uint32_t name, const char *interface, uint32_t version)
{
struct wsi_wl_display *display = data;
if (strcmp(interface, "wl_drm") == 0) {
assert(display->drm == NULL);
assert(version >= 2);
display->drm = wl_registry_bind(registry, name, &wl_drm_interface, 2);
if (display->drm)
wl_drm_add_listener(display->drm, &drm_listener, display);
}
}
static void
registry_handle_global_remove(void *data, struct wl_registry *registry,
uint32_t name)
{ /* No-op */ }
static const struct wl_registry_listener registry_listener = {
registry_handle_global,
registry_handle_global_remove
};
static void
wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display)
{
anv_vector_finish(&display->formats);
if (display->drm)
wl_drm_destroy(display->drm);
anv_free(&wsi->instance->alloc, display);
}
static struct wsi_wl_display *
wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display)
{
struct wsi_wl_display *display =
anv_alloc(&wsi->instance->alloc, sizeof(*display), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!display)
return NULL;
memset(display, 0, sizeof(*display));
display->display = wl_display;
if (!anv_vector_init(&display->formats, sizeof(VkFormat), 8))
goto fail;
struct wl_registry *registry = wl_display_get_registry(wl_display);
if (!registry)
return NULL;
wl_registry_add_listener(registry, &registry_listener, display);
/* Round-rip to get the wl_drm global */
wl_display_roundtrip(wl_display);
if (!display->drm)
goto fail;
/* Round-rip to get wl_drm formats and capabilities */
wl_display_roundtrip(wl_display);
/* We need prime support */
if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME))
goto fail;
/* We don't need this anymore */
wl_registry_destroy(registry);
return display;
fail:
if (registry)
wl_registry_destroy(registry);
wsi_wl_display_destroy(wsi, display);
return NULL;
}
static struct wsi_wl_display *
wsi_wl_get_display(struct anv_instance *instance, struct wl_display *wl_display)
{
struct wsi_wayland *wsi =
(struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
pthread_mutex_lock(&wsi->mutex);
struct hash_entry *entry = _mesa_hash_table_search(wsi->displays,
wl_display);
if (!entry) {
/* We're about to make a bunch of blocking calls. Let's drop the
* mutex for now so we don't block up too badly.
*/
pthread_mutex_unlock(&wsi->mutex);
struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display);
pthread_mutex_lock(&wsi->mutex);
entry = _mesa_hash_table_search(wsi->displays, wl_display);
if (entry) {
/* Oops, someone raced us to it */
wsi_wl_display_destroy(wsi, display);
} else {
entry = _mesa_hash_table_insert(wsi->displays, wl_display, display);
}
}
pthread_mutex_unlock(&wsi->mutex);
return entry->data;
}
VkBool32 anv_GetPhysicalDeviceWaylandPresentationSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
struct wl_display* display)
{
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
return wsi_wl_get_display(physical_device->instance, display) != NULL;
}
static VkResult
wsi_wl_surface_get_support(VkIcdSurfaceBase *surface,
struct anv_physical_device *device,
uint32_t queueFamilyIndex,
VkBool32* pSupported)
{
*pSupported = true;
return VK_SUCCESS;
}
static const VkPresentModeKHR present_modes[] = {
VK_PRESENT_MODE_MAILBOX_KHR,
VK_PRESENT_MODE_FIFO_KHR,
};
static VkResult
wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface,
struct anv_physical_device *device,
VkSurfaceCapabilitiesKHR* caps)
{
caps->minImageCount = MIN_NUM_IMAGES;
caps->maxImageCount = 4;
caps->currentExtent = (VkExtent2D) { -1, -1 };
caps->minImageExtent = (VkExtent2D) { 1, 1 };
caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX };
caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
caps->maxImageArrayLayers = 1;
caps->supportedCompositeAlpha =
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR |
VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR;
caps->supportedUsageFlags =
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
return VK_SUCCESS;
}
static VkResult
wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface,
struct anv_physical_device *device,
uint32_t* pSurfaceFormatCount,
VkSurfaceFormatKHR* pSurfaceFormats)
{
VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
struct wsi_wl_display *display =
wsi_wl_get_display(device->instance, surface->display);
uint32_t count = anv_vector_length(&display->formats);
if (pSurfaceFormats == NULL) {
*pSurfaceFormatCount = count;
return VK_SUCCESS;
}
assert(*pSurfaceFormatCount >= count);
*pSurfaceFormatCount = count;
VkFormat *f;
anv_vector_foreach(f, &display->formats) {
*(pSurfaceFormats++) = (VkSurfaceFormatKHR) {
.format = *f,
/* TODO: We should get this from the compositor somehow */
.colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR,
};
}
return VK_SUCCESS;
}
static VkResult
wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface,
struct anv_physical_device *device,
uint32_t* pPresentModeCount,
VkPresentModeKHR* pPresentModes)
{
if (pPresentModes == NULL) {
*pPresentModeCount = ARRAY_SIZE(present_modes);
return VK_SUCCESS;
}
assert(*pPresentModeCount >= ARRAY_SIZE(present_modes));
typed_memcpy(pPresentModes, present_modes, *pPresentModeCount);
*pPresentModeCount = ARRAY_SIZE(present_modes);
return VK_SUCCESS;
}
static VkResult
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *surface,
struct anv_device *device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
struct anv_swapchain **swapchain);
VkResult anv_CreateWaylandSurfaceKHR(
VkInstance _instance,
const VkWaylandSurfaceCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface)
{
ANV_FROM_HANDLE(anv_instance, instance, _instance);
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR);
VkIcdSurfaceWayland *surface;
surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (surface == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
surface->base.platform = VK_ICD_WSI_PLATFORM_WAYLAND;
surface->display = pCreateInfo->display;
surface->surface = pCreateInfo->surface;
*pSurface = _VkIcdSurfaceBase_to_handle(&surface->base);
return VK_SUCCESS;
}
struct wsi_wl_image {
struct anv_image * image;
struct anv_device_memory * memory;
struct wl_buffer * buffer;
bool busy;
};
struct wsi_wl_swapchain {
struct anv_swapchain base;
struct wsi_wl_display * display;
struct wl_event_queue * queue;
struct wl_surface * surface;
VkExtent2D extent;
VkFormat vk_format;
uint32_t drm_format;
VkPresentModeKHR present_mode;
bool fifo_ready;
uint32_t image_count;
struct wsi_wl_image images[0];
};
static VkResult
wsi_wl_swapchain_get_images(struct anv_swapchain *anv_chain,
uint32_t *pCount, VkImage *pSwapchainImages)
{
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain;
if (pSwapchainImages == NULL) {
*pCount = chain->image_count;
return VK_SUCCESS;
}
assert(chain->image_count <= *pCount);
for (uint32_t i = 0; i < chain->image_count; i++)
pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image);
*pCount = chain->image_count;
return VK_SUCCESS;
}
static VkResult
wsi_wl_swapchain_acquire_next_image(struct anv_swapchain *anv_chain,
uint64_t timeout,
VkSemaphore semaphore,
uint32_t *image_index)
{
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain;
int ret = wl_display_dispatch_queue_pending(chain->display->display,
chain->queue);
/* XXX: I'm not sure if out-of-date is the right error here. If
* wl_display_dispatch_queue_pending fails it most likely means we got
* kicked by the server so this seems more-or-less correct.
*/
if (ret < 0)
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
while (1) {
for (uint32_t i = 0; i < chain->image_count; i++) {
if (!chain->images[i].busy) {
/* We found a non-busy image */
*image_index = i;
return VK_SUCCESS;
}
}
/* This time we do a blocking dispatch because we can't go
* anywhere until we get an event.
*/
int ret = wl_display_roundtrip_queue(chain->display->display,
chain->queue);
if (ret < 0)
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
}
}
static void
frame_handle_done(void *data, struct wl_callback *callback, uint32_t serial)
{
struct wsi_wl_swapchain *chain = data;
chain->fifo_ready = true;
wl_callback_destroy(callback);
}
static const struct wl_callback_listener frame_listener = {
frame_handle_done,
};
static VkResult
wsi_wl_swapchain_queue_present(struct anv_swapchain *anv_chain,
struct anv_queue *queue,
uint32_t image_index)
{
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain;
if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) {
while (!chain->fifo_ready) {
int ret = wl_display_dispatch_queue(chain->display->display,
chain->queue);
if (ret < 0)
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
}
}
assert(image_index < chain->image_count);
wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0);
wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX);
if (chain->present_mode == VK_PRESENT_MODE_FIFO_KHR) {
struct wl_callback *frame = wl_surface_frame(chain->surface);
wl_proxy_set_queue((struct wl_proxy *)frame, chain->queue);
wl_callback_add_listener(frame, &frame_listener, chain);
chain->fifo_ready = false;
}
chain->images[image_index].busy = true;
wl_surface_commit(chain->surface);
wl_display_flush(chain->display->display);
return VK_SUCCESS;
}
static void
wsi_wl_image_finish(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image,
const VkAllocationCallbacks* pAllocator)
{
VkDevice vk_device = anv_device_to_handle(chain->base.device);
anv_FreeMemory(vk_device, anv_device_memory_to_handle(image->memory),
pAllocator);
anv_DestroyImage(vk_device, anv_image_to_handle(image->image),
pAllocator);
}
static void
buffer_handle_release(void *data, struct wl_buffer *buffer)
{
struct wsi_wl_image *image = data;
assert(image->buffer == buffer);
image->busy = false;
}
static const struct wl_buffer_listener buffer_listener = {
buffer_handle_release,
};
static VkResult
wsi_wl_image_init(struct wsi_wl_swapchain *chain, struct wsi_wl_image *image,
const VkAllocationCallbacks* pAllocator)
{
VkDevice vk_device = anv_device_to_handle(chain->base.device);
VkResult result;
VkImage vk_image;
result = anv_image_create(vk_device,
&(struct anv_image_create_info) {
.isl_tiling_flags = ISL_TILING_X_BIT,
.stride = 0,
.vk_info =
&(VkImageCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = chain->vk_format,
.extent = {
.width = chain->extent.width,
.height = chain->extent.height,
.depth = 1
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = 1,
/* FIXME: Need a way to use X tiling to allow scanout */
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.flags = 0,
}},
pAllocator,
&vk_image);
if (result != VK_SUCCESS)
return result;
image->image = anv_image_from_handle(vk_image);
assert(anv_format_is_color(image->image->format));
struct anv_surface *surface = &image->image->color_surface;
VkDeviceMemory vk_memory;
result = anv_AllocateMemory(vk_device,
&(VkMemoryAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = image->image->size,
.memoryTypeIndex = 0,
},
pAllocator,
&vk_memory);
if (result != VK_SUCCESS)
goto fail_image;
image->memory = anv_device_memory_from_handle(vk_memory);
image->memory->bo.is_winsys_bo = true;
result = anv_BindImageMemory(vk_device, vk_image, vk_memory, 0);
if (result != VK_SUCCESS)
goto fail_mem;
int ret = anv_gem_set_tiling(chain->base.device,
image->memory->bo.gem_handle,
surface->isl.row_pitch, I915_TILING_X);
if (ret) {
/* FINISHME: Choose a better error. */
result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_mem;
}
int fd = anv_gem_handle_to_fd(chain->base.device,
image->memory->bo.gem_handle);
if (fd == -1) {
/* FINISHME: Choose a better error. */
result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_mem;
}
image->buffer = wl_drm_create_prime_buffer(chain->display->drm,
fd, /* name */
chain->extent.width,
chain->extent.height,
chain->drm_format,
surface->offset,
surface->isl.row_pitch,
0, 0, 0, 0 /* unused */);
wl_display_roundtrip(chain->display->display);
close(fd);
wl_proxy_set_queue((struct wl_proxy *)image->buffer, chain->queue);
wl_buffer_add_listener(image->buffer, &buffer_listener, image);
return VK_SUCCESS;
fail_mem:
anv_FreeMemory(vk_device, vk_memory, pAllocator);
fail_image:
anv_DestroyImage(vk_device, vk_image, pAllocator);
return result;
}
static VkResult
wsi_wl_swapchain_destroy(struct anv_swapchain *anv_chain,
const VkAllocationCallbacks *pAllocator)
{
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)anv_chain;
for (uint32_t i = 0; i < chain->image_count; i++) {
if (chain->images[i].buffer)
wsi_wl_image_finish(chain, &chain->images[i], pAllocator);
}
anv_free2(&chain->base.device->alloc, pAllocator, chain);
return VK_SUCCESS;
}
static VkResult
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
struct anv_device *device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
struct anv_swapchain **swapchain_out)
{
VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
struct wsi_wl_swapchain *chain;
VkResult result;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR);
int num_images = pCreateInfo->minImageCount;
assert(num_images >= MIN_NUM_IMAGES);
/* For true mailbox mode, we need at least 4 images:
* 1) One to scan out from
* 2) One to have queued for scan-out
* 3) One to be currently held by the Wayland compositor
* 4) One to render to
*/
if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR)
num_images = MAX2(num_images, 4);
size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]);
chain = anv_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (chain == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
chain->base.device = device;
chain->base.destroy = wsi_wl_swapchain_destroy;
chain->base.get_images = wsi_wl_swapchain_get_images;
chain->base.acquire_next_image = wsi_wl_swapchain_acquire_next_image;
chain->base.queue_present = wsi_wl_swapchain_queue_present;
chain->surface = surface->surface;
chain->extent = pCreateInfo->imageExtent;
chain->vk_format = pCreateInfo->imageFormat;
chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, false);
chain->present_mode = pCreateInfo->presentMode;
chain->fifo_ready = true;
chain->image_count = num_images;
/* Mark a bunch of stuff as NULL. This way we can just call
* destroy_swapchain for cleanup.
*/
for (uint32_t i = 0; i < chain->image_count; i++)
chain->images[i].buffer = NULL;
chain->queue = NULL;
chain->display = wsi_wl_get_display(device->instance, surface->display);
if (!chain->display)
goto fail;
chain->queue = wl_display_create_queue(chain->display->display);
if (!chain->queue)
goto fail;
for (uint32_t i = 0; i < chain->image_count; i++) {
result = wsi_wl_image_init(chain, &chain->images[i], pAllocator);
if (result != VK_SUCCESS)
goto fail;
chain->images[i].busy = false;
}
*swapchain_out = &chain->base;
return VK_SUCCESS;
fail:
wsi_wl_swapchain_destroy(&chain->base, pAllocator);
return result;
}
VkResult
anv_wl_init_wsi(struct anv_instance *instance)
{
struct wsi_wayland *wsi;
VkResult result;
wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!wsi) {
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail;
}
wsi->instance = instance;
int ret = pthread_mutex_init(&wsi->mutex, NULL);
if (ret != 0) {
if (ret == ENOMEM) {
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
} else {
/* FINISHME: Choose a better error. */
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
goto fail_alloc;
}
wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
if (!wsi->displays) {
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_mutex;
}
wsi->base.get_support = wsi_wl_surface_get_support;
wsi->base.get_capabilities = wsi_wl_surface_get_capabilities;
wsi->base.get_formats = wsi_wl_surface_get_formats;
wsi->base.get_present_modes = wsi_wl_surface_get_present_modes;
wsi->base.create_swapchain = wsi_wl_surface_create_swapchain;
instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base;
return VK_SUCCESS;
fail_mutex:
pthread_mutex_destroy(&wsi->mutex);
fail_alloc:
anv_free(&instance->alloc, wsi);
fail:
instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL;
return result;
}
void
anv_wl_finish_wsi(struct anv_instance *instance)
{
struct wsi_wayland *wsi =
(struct wsi_wayland *)instance->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
if (wsi) {
_mesa_hash_table_destroy(wsi->displays, NULL);
pthread_mutex_destroy(&wsi->mutex);
anv_free(&instance->alloc, wsi);
}
}

View file

@ -0,0 +1,902 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <X11/xshmfence.h>
#include <xcb/xcb.h>
#include <xcb/dri3.h>
#include <xcb/present.h>
#include "anv_wsi.h"
#include "util/hash_table.h"
struct wsi_x11_connection {
bool has_dri3;
bool has_present;
};
struct wsi_x11 {
struct anv_wsi_interface base;
pthread_mutex_t mutex;
/* Hash table of xcb_connection -> wsi_x11_connection mappings */
struct hash_table *connections;
};
static struct wsi_x11_connection *
wsi_x11_connection_create(struct anv_instance *instance, xcb_connection_t *conn)
{
xcb_query_extension_cookie_t dri3_cookie, pres_cookie;
xcb_query_extension_reply_t *dri3_reply, *pres_reply;
struct wsi_x11_connection *wsi_conn =
anv_alloc(&instance->alloc, sizeof(*wsi_conn), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!wsi_conn)
return NULL;
dri3_cookie = xcb_query_extension(conn, 4, "DRI3");
pres_cookie = xcb_query_extension(conn, 7, "PRESENT");
dri3_reply = xcb_query_extension_reply(conn, dri3_cookie, NULL);
pres_reply = xcb_query_extension_reply(conn, pres_cookie, NULL);
if (dri3_reply == NULL || pres_reply == NULL) {
free(dri3_reply);
free(pres_reply);
anv_free(&instance->alloc, wsi_conn);
return NULL;
}
wsi_conn->has_dri3 = dri3_reply->present != 0;
wsi_conn->has_present = pres_reply->present != 0;
free(dri3_reply);
free(pres_reply);
return wsi_conn;
}
static void
wsi_x11_connection_destroy(struct anv_instance *instance,
struct wsi_x11_connection *conn)
{
anv_free(&instance->alloc, conn);
}
static struct wsi_x11_connection *
wsi_x11_get_connection(struct anv_instance *instance, xcb_connection_t *conn)
{
struct wsi_x11 *wsi =
(struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB];
pthread_mutex_lock(&wsi->mutex);
struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn);
if (!entry) {
/* We're about to make a bunch of blocking calls. Let's drop the
* mutex for now so we don't block up too badly.
*/
pthread_mutex_unlock(&wsi->mutex);
struct wsi_x11_connection *wsi_conn =
wsi_x11_connection_create(instance, conn);
pthread_mutex_lock(&wsi->mutex);
entry = _mesa_hash_table_search(wsi->connections, conn);
if (entry) {
/* Oops, someone raced us to it */
wsi_x11_connection_destroy(instance, wsi_conn);
} else {
entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn);
}
}
pthread_mutex_unlock(&wsi->mutex);
return entry->data;
}
static const VkSurfaceFormatKHR formats[] = {
{ .format = VK_FORMAT_B8G8R8A8_SRGB, },
};
static const VkPresentModeKHR present_modes[] = {
VK_PRESENT_MODE_MAILBOX_KHR,
};
static xcb_screen_t *
get_screen_for_root(xcb_connection_t *conn, xcb_window_t root)
{
xcb_screen_iterator_t screen_iter =
xcb_setup_roots_iterator(xcb_get_setup(conn));
for (; screen_iter.rem; xcb_screen_next (&screen_iter)) {
if (screen_iter.data->root == root)
return screen_iter.data;
}
return NULL;
}
static xcb_visualtype_t *
screen_get_visualtype(xcb_screen_t *screen, xcb_visualid_t visual_id,
unsigned *depth)
{
xcb_depth_iterator_t depth_iter =
xcb_screen_allowed_depths_iterator(screen);
for (; depth_iter.rem; xcb_depth_next (&depth_iter)) {
xcb_visualtype_iterator_t visual_iter =
xcb_depth_visuals_iterator (depth_iter.data);
for (; visual_iter.rem; xcb_visualtype_next (&visual_iter)) {
if (visual_iter.data->visual_id == visual_id) {
if (depth)
*depth = depth_iter.data->depth;
return visual_iter.data;
}
}
}
return NULL;
}
static xcb_visualtype_t *
connection_get_visualtype(xcb_connection_t *conn, xcb_visualid_t visual_id,
unsigned *depth)
{
xcb_screen_iterator_t screen_iter =
xcb_setup_roots_iterator(xcb_get_setup(conn));
/* For this we have to iterate over all of the screens which is rather
* annoying. Fortunately, there is probably only 1.
*/
for (; screen_iter.rem; xcb_screen_next (&screen_iter)) {
xcb_visualtype_t *visual = screen_get_visualtype(screen_iter.data,
visual_id, depth);
if (visual)
return visual;
}
return NULL;
}
static xcb_visualtype_t *
get_visualtype_for_window(xcb_connection_t *conn, xcb_window_t window,
unsigned *depth)
{
xcb_query_tree_cookie_t tree_cookie;
xcb_get_window_attributes_cookie_t attrib_cookie;
xcb_query_tree_reply_t *tree;
xcb_get_window_attributes_reply_t *attrib;
tree_cookie = xcb_query_tree(conn, window);
attrib_cookie = xcb_get_window_attributes(conn, window);
tree = xcb_query_tree_reply(conn, tree_cookie, NULL);
attrib = xcb_get_window_attributes_reply(conn, attrib_cookie, NULL);
if (attrib == NULL || tree == NULL) {
free(attrib);
free(tree);
return NULL;
}
xcb_window_t root = tree->root;
xcb_visualid_t visual_id = attrib->visual;
free(attrib);
free(tree);
xcb_screen_t *screen = get_screen_for_root(conn, root);
if (screen == NULL)
return NULL;
return screen_get_visualtype(screen, visual_id, depth);
}
static bool
visual_has_alpha(xcb_visualtype_t *visual, unsigned depth)
{
uint32_t rgb_mask = visual->red_mask |
visual->green_mask |
visual->blue_mask;
uint32_t all_mask = 0xffffffff >> (32 - depth);
/* Do we have bits left over after RGB? */
return (all_mask & ~rgb_mask) != 0;
}
VkBool32 anv_GetPhysicalDeviceXcbPresentationSupportKHR(
VkPhysicalDevice physicalDevice,
uint32_t queueFamilyIndex,
xcb_connection_t* connection,
xcb_visualid_t visual_id)
{
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
struct wsi_x11_connection *wsi_conn =
wsi_x11_get_connection(device->instance, connection);
if (!wsi_conn->has_dri3) {
fprintf(stderr, "vulkan: No DRI3 support\n");
return false;
}
unsigned visual_depth;
if (!connection_get_visualtype(connection, visual_id, &visual_depth))
return false;
if (visual_depth != 24 && visual_depth != 32)
return false;
return true;
}
static VkResult
x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
struct anv_physical_device *device,
uint32_t queueFamilyIndex,
VkBool32* pSupported)
{
VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface;
struct wsi_x11_connection *wsi_conn =
wsi_x11_get_connection(device->instance, surface->connection);
if (!wsi_conn)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
if (!wsi_conn->has_dri3) {
fprintf(stderr, "vulkan: No DRI3 support\n");
*pSupported = false;
return VK_SUCCESS;
}
unsigned visual_depth;
if (!get_visualtype_for_window(surface->connection, surface->window,
&visual_depth)) {
*pSupported = false;
return VK_SUCCESS;
}
if (visual_depth != 24 && visual_depth != 32) {
*pSupported = false;
return VK_SUCCESS;
}
*pSupported = true;
return VK_SUCCESS;
}
static VkResult
x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
struct anv_physical_device *device,
VkSurfaceCapabilitiesKHR *caps)
{
VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface;
xcb_get_geometry_cookie_t geom_cookie;
xcb_generic_error_t *err;
xcb_get_geometry_reply_t *geom;
unsigned visual_depth;
geom_cookie = xcb_get_geometry(surface->connection, surface->window);
/* This does a round-trip. This is why we do get_geometry first and
* wait to read the reply until after we have a visual.
*/
xcb_visualtype_t *visual =
get_visualtype_for_window(surface->connection, surface->window,
&visual_depth);
geom = xcb_get_geometry_reply(surface->connection, geom_cookie, &err);
if (geom) {
VkExtent2D extent = { geom->width, geom->height };
caps->currentExtent = extent;
caps->minImageExtent = extent;
caps->maxImageExtent = extent;
} else {
/* This can happen if the client didn't wait for the configure event
* to come back from the compositor. In that case, we don't know the
* size of the window so we just return valid "I don't know" stuff.
*/
caps->currentExtent = (VkExtent2D) { -1, -1 };
caps->minImageExtent = (VkExtent2D) { 1, 1 };
caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX };
}
free(err);
free(geom);
if (visual_has_alpha(visual, visual_depth)) {
caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR |
VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR;
} else {
caps->supportedCompositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR |
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
}
caps->minImageCount = 2;
caps->maxImageCount = 4;
caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
caps->maxImageArrayLayers = 1;
caps->supportedUsageFlags =
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
return VK_SUCCESS;
}
static VkResult
x11_surface_get_formats(VkIcdSurfaceBase *surface,
struct anv_physical_device *device,
uint32_t *pSurfaceFormatCount,
VkSurfaceFormatKHR *pSurfaceFormats)
{
if (pSurfaceFormats == NULL) {
*pSurfaceFormatCount = ARRAY_SIZE(formats);
return VK_SUCCESS;
}
assert(*pSurfaceFormatCount >= ARRAY_SIZE(formats));
typed_memcpy(pSurfaceFormats, formats, *pSurfaceFormatCount);
*pSurfaceFormatCount = ARRAY_SIZE(formats);
return VK_SUCCESS;
}
static VkResult
x11_surface_get_present_modes(VkIcdSurfaceBase *surface,
struct anv_physical_device *device,
uint32_t *pPresentModeCount,
VkPresentModeKHR *pPresentModes)
{
if (pPresentModes == NULL) {
*pPresentModeCount = ARRAY_SIZE(present_modes);
return VK_SUCCESS;
}
assert(*pPresentModeCount >= ARRAY_SIZE(present_modes));
typed_memcpy(pPresentModes, present_modes, *pPresentModeCount);
*pPresentModeCount = ARRAY_SIZE(present_modes);
return VK_SUCCESS;
}
static VkResult
x11_surface_create_swapchain(VkIcdSurfaceBase *surface,
struct anv_device *device,
const VkSwapchainCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
struct anv_swapchain **swapchain);
VkResult anv_CreateXcbSurfaceKHR(
VkInstance _instance,
const VkXcbSurfaceCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSurfaceKHR* pSurface)
{
ANV_FROM_HANDLE(anv_instance, instance, _instance);
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR);
VkIcdSurfaceXcb *surface;
surface = anv_alloc2(&instance->alloc, pAllocator, sizeof *surface, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (surface == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
surface->base.platform = VK_ICD_WSI_PLATFORM_XCB;
surface->connection = pCreateInfo->connection;
surface->window = pCreateInfo->window;
*pSurface = _VkIcdSurfaceBase_to_handle(&surface->base);
return VK_SUCCESS;
}
struct x11_image {
struct anv_image * image;
struct anv_device_memory * memory;
xcb_pixmap_t pixmap;
bool busy;
struct xshmfence * shm_fence;
uint32_t sync_fence;
};
struct x11_swapchain {
struct anv_swapchain base;
xcb_connection_t * conn;
xcb_window_t window;
xcb_gc_t gc;
VkExtent2D extent;
uint32_t image_count;
xcb_present_event_t event_id;
xcb_special_event_t * special_event;
uint64_t send_sbc;
uint32_t stamp;
struct x11_image images[0];
};
static VkResult
x11_get_images(struct anv_swapchain *anv_chain,
uint32_t* pCount, VkImage *pSwapchainImages)
{
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
if (pSwapchainImages == NULL) {
*pCount = chain->image_count;
return VK_SUCCESS;
}
assert(chain->image_count <= *pCount);
for (uint32_t i = 0; i < chain->image_count; i++)
pSwapchainImages[i] = anv_image_to_handle(chain->images[i].image);
*pCount = chain->image_count;
return VK_SUCCESS;
}
static VkResult
x11_handle_dri3_present_event(struct x11_swapchain *chain,
xcb_present_generic_event_t *event)
{
switch (event->evtype) {
case XCB_PRESENT_CONFIGURE_NOTIFY: {
xcb_present_configure_notify_event_t *config = (void *) event;
if (config->width != chain->extent.width ||
config->height != chain->extent.height)
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
break;
}
case XCB_PRESENT_EVENT_IDLE_NOTIFY: {
xcb_present_idle_notify_event_t *idle = (void *) event;
for (unsigned i = 0; i < chain->image_count; i++) {
if (chain->images[i].pixmap == idle->pixmap) {
chain->images[i].busy = false;
break;
}
}
break;
}
case XCB_PRESENT_COMPLETE_NOTIFY:
default:
break;
}
return VK_SUCCESS;
}
static VkResult
x11_acquire_next_image(struct anv_swapchain *anv_chain,
uint64_t timeout,
VkSemaphore semaphore,
uint32_t *image_index)
{
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
while (1) {
for (uint32_t i = 0; i < chain->image_count; i++) {
if (!chain->images[i].busy) {
/* We found a non-busy image */
xshmfence_await(chain->images[i].shm_fence);
*image_index = i;
return VK_SUCCESS;
}
}
xcb_flush(chain->conn);
xcb_generic_event_t *event =
xcb_wait_for_special_event(chain->conn, chain->special_event);
if (!event)
return vk_error(VK_ERROR_OUT_OF_DATE_KHR);
VkResult result = x11_handle_dri3_present_event(chain, (void *)event);
free(event);
if (result != VK_SUCCESS)
return result;
}
}
static VkResult
x11_queue_present(struct anv_swapchain *anv_chain,
struct anv_queue *queue,
uint32_t image_index)
{
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
struct x11_image *image = &chain->images[image_index];
assert(image_index < chain->image_count);
uint32_t options = XCB_PRESENT_OPTION_NONE;
int64_t target_msc = 0;
int64_t divisor = 0;
int64_t remainder = 0;
options |= XCB_PRESENT_OPTION_ASYNC;
xshmfence_reset(image->shm_fence);
xcb_void_cookie_t cookie =
xcb_present_pixmap(chain->conn,
chain->window,
image->pixmap,
(uint32_t) chain->send_sbc,
0, /* valid */
0, /* update */
0, /* x_off */
0, /* y_off */
XCB_NONE, /* target_crtc */
XCB_NONE,
image->sync_fence,
options,
target_msc,
divisor,
remainder, 0, NULL);
xcb_discard_reply(chain->conn, cookie.sequence);
image->busy = true;
xcb_flush(chain->conn);
return VK_SUCCESS;
}
static VkResult
x11_image_init(struct anv_device *device, struct x11_swapchain *chain,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks* pAllocator,
struct x11_image *image)
{
xcb_void_cookie_t cookie;
VkResult result;
VkImage image_h;
result = anv_image_create(anv_device_to_handle(device),
&(struct anv_image_create_info) {
.isl_tiling_flags = ISL_TILING_X_BIT,
.stride = 0,
.vk_info =
&(VkImageCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.format = pCreateInfo->imageFormat,
.extent = {
.width = pCreateInfo->imageExtent.width,
.height = pCreateInfo->imageExtent.height,
.depth = 1
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = 1,
/* FIXME: Need a way to use X tiling to allow scanout */
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.flags = 0,
}},
NULL,
&image_h);
if (result != VK_SUCCESS)
return result;
image->image = anv_image_from_handle(image_h);
assert(anv_format_is_color(image->image->format));
VkDeviceMemory memory_h;
result = anv_AllocateMemory(anv_device_to_handle(device),
&(VkMemoryAllocateInfo) {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = image->image->size,
.memoryTypeIndex = 0,
},
NULL /* XXX: pAllocator */,
&memory_h);
if (result != VK_SUCCESS)
goto fail_create_image;
image->memory = anv_device_memory_from_handle(memory_h);
image->memory->bo.is_winsys_bo = true;
anv_BindImageMemory(VK_NULL_HANDLE, image_h, memory_h, 0);
struct anv_surface *surface = &image->image->color_surface;
assert(surface->isl.tiling == ISL_TILING_X);
int ret = anv_gem_set_tiling(device, image->memory->bo.gem_handle,
surface->isl.row_pitch, I915_TILING_X);
if (ret) {
/* FINISHME: Choose a better error. */
result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
"set_tiling failed: %m");
goto fail_alloc_memory;
}
int fd = anv_gem_handle_to_fd(device, image->memory->bo.gem_handle);
if (fd == -1) {
/* FINISHME: Choose a better error. */
result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
"handle_to_fd failed: %m");
goto fail_alloc_memory;
}
uint32_t bpp = 32;
uint32_t depth = 24;
image->pixmap = xcb_generate_id(chain->conn);
cookie =
xcb_dri3_pixmap_from_buffer_checked(chain->conn,
image->pixmap,
chain->window,
image->image->size,
pCreateInfo->imageExtent.width,
pCreateInfo->imageExtent.height,
surface->isl.row_pitch,
depth, bpp, fd);
xcb_discard_reply(chain->conn, cookie.sequence);
int fence_fd = xshmfence_alloc_shm();
if (fence_fd < 0)
goto fail_pixmap;
image->shm_fence = xshmfence_map_shm(fence_fd);
if (image->shm_fence == NULL)
goto fail_shmfence_alloc;
image->sync_fence = xcb_generate_id(chain->conn);
xcb_dri3_fence_from_fd(chain->conn,
image->pixmap,
image->sync_fence,
false,
fence_fd);
image->busy = false;
xshmfence_trigger(image->shm_fence);
return VK_SUCCESS;
fail_shmfence_alloc:
close(fence_fd);
fail_pixmap:
cookie = xcb_free_pixmap(chain->conn, image->pixmap);
xcb_discard_reply(chain->conn, cookie.sequence);
fail_alloc_memory:
anv_FreeMemory(anv_device_to_handle(chain->base.device),
anv_device_memory_to_handle(image->memory), pAllocator);
fail_create_image:
anv_DestroyImage(anv_device_to_handle(chain->base.device),
anv_image_to_handle(image->image), pAllocator);
return result;
}
static void
x11_image_finish(struct x11_swapchain *chain,
const VkAllocationCallbacks* pAllocator,
struct x11_image *image)
{
xcb_void_cookie_t cookie;
cookie = xcb_sync_destroy_fence(chain->conn, image->sync_fence);
xcb_discard_reply(chain->conn, cookie.sequence);
xshmfence_unmap_shm(image->shm_fence);
cookie = xcb_free_pixmap(chain->conn, image->pixmap);
xcb_discard_reply(chain->conn, cookie.sequence);
anv_DestroyImage(anv_device_to_handle(chain->base.device),
anv_image_to_handle(image->image), pAllocator);
anv_FreeMemory(anv_device_to_handle(chain->base.device),
anv_device_memory_to_handle(image->memory), pAllocator);
}
static VkResult
x11_swapchain_destroy(struct anv_swapchain *anv_chain,
const VkAllocationCallbacks *pAllocator)
{
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
for (uint32_t i = 0; i < chain->image_count; i++)
x11_image_finish(chain, pAllocator, &chain->images[i]);
xcb_unregister_for_special_event(chain->conn, chain->special_event);
anv_free2(&chain->base.device->alloc, pAllocator, chain);
return VK_SUCCESS;
}
static VkResult
x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
struct anv_device *device,
const VkSwapchainCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks* pAllocator,
struct anv_swapchain **swapchain_out)
{
VkIcdSurfaceXcb *surface = (VkIcdSurfaceXcb *)icd_surface;
struct x11_swapchain *chain;
xcb_void_cookie_t cookie;
VkResult result;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR);
int num_images = pCreateInfo->minImageCount;
/* For true mailbox mode, we need at least 4 images:
* 1) One to scan out from
* 2) One to have queued for scan-out
* 3) One to be currently held by the Wayland compositor
* 4) One to render to
*/
if (pCreateInfo->presentMode == VK_PRESENT_MODE_MAILBOX_KHR)
num_images = MAX2(num_images, 4);
size_t size = sizeof(*chain) + num_images * sizeof(chain->images[0]);
chain = anv_alloc2(&device->alloc, pAllocator, size, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (chain == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
chain->base.device = device;
chain->base.destroy = x11_swapchain_destroy;
chain->base.get_images = x11_get_images;
chain->base.acquire_next_image = x11_acquire_next_image;
chain->base.queue_present = x11_queue_present;
chain->conn = surface->connection;
chain->window = surface->window;
chain->extent = pCreateInfo->imageExtent;
chain->image_count = num_images;
chain->event_id = xcb_generate_id(chain->conn);
xcb_present_select_input(chain->conn, chain->event_id, chain->window,
XCB_PRESENT_EVENT_MASK_CONFIGURE_NOTIFY |
XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY |
XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY);
/* Create an XCB event queue to hold present events outside of the usual
* application event queue
*/
chain->special_event =
xcb_register_for_special_xge(chain->conn, &xcb_present_id,
chain->event_id, NULL);
chain->gc = xcb_generate_id(chain->conn);
if (!chain->gc) {
/* FINISHME: Choose a better error. */
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_register;
}
cookie = xcb_create_gc(chain->conn,
chain->gc,
chain->window,
XCB_GC_GRAPHICS_EXPOSURES,
(uint32_t []) { 0 });
xcb_discard_reply(chain->conn, cookie.sequence);
uint32_t image = 0;
for (; image < chain->image_count; image++) {
result = x11_image_init(device, chain, pCreateInfo, pAllocator,
&chain->images[image]);
if (result != VK_SUCCESS)
goto fail_init_images;
}
*swapchain_out = &chain->base;
return VK_SUCCESS;
fail_init_images:
for (uint32_t j = 0; j < image; j++)
x11_image_finish(chain, pAllocator, &chain->images[j]);
fail_register:
xcb_unregister_for_special_event(chain->conn, chain->special_event);
anv_free2(&device->alloc, pAllocator, chain);
return result;
}
VkResult
anv_x11_init_wsi(struct anv_instance *instance)
{
struct wsi_x11 *wsi;
VkResult result;
wsi = anv_alloc(&instance->alloc, sizeof(*wsi), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!wsi) {
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail;
}
int ret = pthread_mutex_init(&wsi->mutex, NULL);
if (ret != 0) {
if (ret == ENOMEM) {
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
} else {
/* FINISHME: Choose a better error. */
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
goto fail_alloc;
}
wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
if (!wsi->connections) {
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_mutex;
}
wsi->base.get_support = x11_surface_get_support;
wsi->base.get_capabilities = x11_surface_get_capabilities;
wsi->base.get_formats = x11_surface_get_formats;
wsi->base.get_present_modes = x11_surface_get_present_modes;
wsi->base.create_swapchain = x11_surface_create_swapchain;
instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = &wsi->base;
return VK_SUCCESS;
fail_mutex:
pthread_mutex_destroy(&wsi->mutex);
fail_alloc:
anv_free(&instance->alloc, wsi);
fail:
instance->wsi[VK_ICD_WSI_PLATFORM_XCB] = NULL;
return result;
}
void
anv_x11_finish_wsi(struct anv_instance *instance)
{
struct wsi_x11 *wsi =
(struct wsi_x11 *)instance->wsi[VK_ICD_WSI_PLATFORM_XCB];
if (wsi) {
_mesa_hash_table_destroy(wsi->connections, NULL);
pthread_mutex_destroy(&wsi->mutex);
anv_free(&instance->alloc, wsi);
}
}

View file

@ -0,0 +1,7 @@
{
"file_format_version": "1.0.0",
"ICD": {
"library_path": "@build_libdir@/libvulkan_intel.so",
"abi_versions": "1.0.3"
}
}

View file

@ -0,0 +1,533 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#if GEN_GEN == 7 && !GEN_IS_HASWELL
void
gen7_cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer *cmd_buffer,
uint32_t stages)
{
static const uint32_t sampler_state_opcodes[] = {
[MESA_SHADER_VERTEX] = 43,
[MESA_SHADER_TESS_CTRL] = 44, /* HS */
[MESA_SHADER_TESS_EVAL] = 45, /* DS */
[MESA_SHADER_GEOMETRY] = 46,
[MESA_SHADER_FRAGMENT] = 47,
[MESA_SHADER_COMPUTE] = 0,
};
static const uint32_t binding_table_opcodes[] = {
[MESA_SHADER_VERTEX] = 38,
[MESA_SHADER_TESS_CTRL] = 39,
[MESA_SHADER_TESS_EVAL] = 40,
[MESA_SHADER_GEOMETRY] = 41,
[MESA_SHADER_FRAGMENT] = 42,
[MESA_SHADER_COMPUTE] = 0,
};
anv_foreach_stage(s, stages) {
if (cmd_buffer->state.samplers[s].alloc_size > 0) {
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS),
._3DCommandSubOpcode = sampler_state_opcodes[s],
.PointertoVSSamplerState = cmd_buffer->state.samplers[s].offset);
}
/* Always emit binding table pointers if we're asked to, since on SKL
* this is what flushes push constants. */
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_BINDING_TABLE_POINTERS_VS),
._3DCommandSubOpcode = binding_table_opcodes[s],
.PointertoVSBindingTable = cmd_buffer->state.binding_tables[s].offset);
}
}
uint32_t
gen7_cmd_buffer_flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
{
VkShaderStageFlags dirty = cmd_buffer->state.descriptors_dirty &
cmd_buffer->state.pipeline->active_stages;
VkResult result = VK_SUCCESS;
anv_foreach_stage(s, dirty) {
result = anv_cmd_buffer_emit_samplers(cmd_buffer, s,
&cmd_buffer->state.samplers[s]);
if (result != VK_SUCCESS)
break;
result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s,
&cmd_buffer->state.binding_tables[s]);
if (result != VK_SUCCESS)
break;
}
if (result != VK_SUCCESS) {
assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
assert(result == VK_SUCCESS);
/* Re-emit state base addresses so we get the new surface state base
* address before we start emitting binding tables etc.
*/
anv_cmd_buffer_emit_state_base_address(cmd_buffer);
/* Re-emit all active binding tables */
dirty |= cmd_buffer->state.pipeline->active_stages;
anv_foreach_stage(s, dirty) {
result = anv_cmd_buffer_emit_samplers(cmd_buffer, s,
&cmd_buffer->state.samplers[s]);
if (result != VK_SUCCESS)
return result;
result = anv_cmd_buffer_emit_binding_table(cmd_buffer, s,
&cmd_buffer->state.binding_tables[s]);
if (result != VK_SUCCESS)
return result;
}
}
cmd_buffer->state.descriptors_dirty &= ~dirty;
return dirty;
}
#endif /* GEN_GEN == 7 && !GEN_IS_HASWELL */
static inline int64_t
clamp_int64(int64_t x, int64_t min, int64_t max)
{
if (x < min)
return min;
else if (x < max)
return x;
else
return max;
}
#if GEN_GEN == 7 && !GEN_IS_HASWELL
void
gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
{
uint32_t count = cmd_buffer->state.dynamic.scissor.count;
const VkRect2D *scissors = cmd_buffer->state.dynamic.scissor.scissors;
struct anv_state scissor_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
for (uint32_t i = 0; i < count; i++) {
const VkRect2D *s = &scissors[i];
/* Since xmax and ymax are inclusive, we have to have xmax < xmin or
* ymax < ymin for empty clips. In case clip x, y, width height are all
* 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
* what we want. Just special case empty clips and produce a canonical
* empty clip. */
static const struct GEN7_SCISSOR_RECT empty_scissor = {
.ScissorRectangleYMin = 1,
.ScissorRectangleXMin = 1,
.ScissorRectangleYMax = 0,
.ScissorRectangleXMax = 0
};
const int max = 0xffff;
struct GEN7_SCISSOR_RECT scissor = {
/* Do this math using int64_t so overflow gets clamped correctly. */
.ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
.ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
.ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max),
.ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max)
};
if (s->extent.width <= 0 || s->extent.height <= 0) {
GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8,
&empty_scissor);
} else {
GEN7_SCISSOR_RECT_pack(NULL, scissor_state.map + i * 8, &scissor);
}
}
anv_batch_emit(&cmd_buffer->batch, GEN7_3DSTATE_SCISSOR_STATE_POINTERS,
.ScissorRectPointer = scissor_state.offset);
if (!cmd_buffer->device->info.has_llc)
anv_state_clflush(scissor_state);
}
#endif
static const uint32_t vk_to_gen_index_type[] = {
[VK_INDEX_TYPE_UINT16] = INDEX_WORD,
[VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
};
static const uint32_t restart_index_for_type[] = {
[VK_INDEX_TYPE_UINT16] = UINT16_MAX,
[VK_INDEX_TYPE_UINT32] = UINT32_MAX,
};
void genX(CmdBindIndexBuffer)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkIndexType indexType)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
if (GEN_IS_HASWELL)
cmd_buffer->state.restart_index = restart_index_for_type[indexType];
cmd_buffer->state.gen7.index_buffer = buffer;
cmd_buffer->state.gen7.index_type = vk_to_gen_index_type[indexType];
cmd_buffer->state.gen7.index_offset = offset;
}
static VkResult
flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
struct anv_state surfaces = { 0, }, samplers = { 0, };
VkResult result;
result = anv_cmd_buffer_emit_samplers(cmd_buffer,
MESA_SHADER_COMPUTE, &samplers);
if (result != VK_SUCCESS)
return result;
result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
MESA_SHADER_COMPUTE, &surfaces);
if (result != VK_SUCCESS)
return result;
struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
unsigned push_constant_data_size =
(prog_data->nr_params + local_id_dwords) * 4;
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
unsigned push_constant_regs = reg_aligned_constant_size / 32;
if (push_state.alloc_size) {
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD),
.CURBETotalDataLength = push_state.alloc_size,
.CURBEDataStartAddress = push_state.offset);
}
assert(prog_data->total_shared <= 64 * 1024);
uint32_t slm_size = 0;
if (prog_data->total_shared > 0) {
/* slm_size is in 4k increments, but must be a power of 2. */
slm_size = 4 * 1024;
while (slm_size < prog_data->total_shared)
slm_size <<= 1;
slm_size /= 4 * 1024;
}
struct anv_state state =
anv_state_pool_emit(&device->dynamic_state_pool,
GENX(INTERFACE_DESCRIPTOR_DATA), 64,
.KernelStartPointer = pipeline->cs_simd,
.BindingTablePointer = surfaces.offset,
.SamplerStatePointer = samplers.offset,
.ConstantURBEntryReadLength =
push_constant_regs,
#if !GEN_IS_HASWELL
.ConstantURBEntryReadOffset = 0,
#endif
.BarrierEnable = cs_prog_data->uses_barrier,
.SharedLocalMemorySize = slm_size,
.NumberofThreadsinGPGPUThreadGroup =
pipeline->cs_thread_width_max);
const uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
.InterfaceDescriptorTotalLength = size,
.InterfaceDescriptorDataStartAddress = state.offset);
return VK_SUCCESS;
}
#define emit_lri(batch, reg, imm) \
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), \
.RegisterOffset = __anv_reg_num(reg), \
.DataDWord = imm)
void
genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
{
/* References for GL state:
*
* - commits e307cfa..228d5a3
* - src/mesa/drivers/dri/i965/gen7_l3_state.c
*/
uint32_t l3cr2_slm, l3cr2_noslm;
anv_pack_struct(&l3cr2_noslm, GENX(L3CNTLREG2),
.URBAllocation = 24,
.ROAllocation = 0,
.DCAllocation = 16);
anv_pack_struct(&l3cr2_slm, GENX(L3CNTLREG2),
.SLMEnable = 1,
.URBAllocation = 16,
.URBLowBandwidth = 1,
.ROAllocation = 0,
.DCAllocation = 8);
const uint32_t l3cr2_val = enable_slm ? l3cr2_slm : l3cr2_noslm;
bool changed = cmd_buffer->state.current_l3_config != l3cr2_val;
if (changed) {
/* According to the hardware docs, the L3 partitioning can only be
* changed while the pipeline is completely drained and the caches are
* flushed, which involves a first PIPE_CONTROL flush which stalls the
* pipeline...
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.DCFlushEnable = true,
.PostSyncOperation = NoWrite,
.CommandStreamerStallEnable = true);
/* ...followed by a second pipelined PIPE_CONTROL that initiates
* invalidation of the relevant caches. Note that because RO
* invalidation happens at the top of the pipeline (i.e. right away as
* the PIPE_CONTROL command is processed by the CS) we cannot combine it
* with the previous stalling flush as the hardware documentation
* suggests, because that would cause the CS to stall on previous
* rendering *after* RO invalidation and wouldn't prevent the RO caches
* from being polluted by concurrent rendering before the stall
* completes. This intentionally doesn't implement the SKL+ hardware
* workaround suggesting to enable CS stall on PIPE_CONTROLs with the
* texture cache invalidation bit set for GPGPU workloads because the
* previous and subsequent PIPE_CONTROLs already guarantee that there is
* no concurrent GPGPU kernel execution (see SKL HSD 2132585).
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.TextureCacheInvalidationEnable = true,
.ConstantCacheInvalidationEnable = true,
.InstructionCacheInvalidateEnable = true,
.StateCacheInvalidationEnable = true,
.PostSyncOperation = NoWrite);
/* Now send a third stalling flush to make sure that invalidation is
* complete when the L3 configuration registers are modified.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.DCFlushEnable = true,
.PostSyncOperation = NoWrite,
.CommandStreamerStallEnable = true);
anv_finishme("write GEN7_L3SQCREG1");
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG2), l3cr2_val);
uint32_t l3cr3_slm, l3cr3_noslm;
anv_pack_struct(&l3cr3_noslm, GENX(L3CNTLREG3),
.ISAllocation = 8,
.CAllocation = 4,
.TAllocation = 8);
anv_pack_struct(&l3cr3_slm, GENX(L3CNTLREG3),
.ISAllocation = 8,
.CAllocation = 8,
.TAllocation = 8);
const uint32_t l3cr3_val = enable_slm ? l3cr3_slm : l3cr3_noslm;
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG3), l3cr3_val);
cmd_buffer->state.current_l3_config = l3cr2_val;
}
}
void
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
VkResult result;
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
bool needs_slm = cs_prog_data->base.total_shared > 0;
genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm);
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
(cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
/* FIXME: figure out descriptors for gen7 */
result = flush_compute_descriptor_set(cmd_buffer);
assert(result == VK_SUCCESS);
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
cmd_buffer->state.compute_dirty = 0;
}
void
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_RENDER_TARGETS |
ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)) {
const struct anv_image_view *iview =
anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
const struct anv_image *image = iview ? iview->image : NULL;
const struct anv_format *anv_format =
iview ? anv_format_for_vk_format(iview->vk_format) : NULL;
const bool has_depth = iview && anv_format->has_depth;
const uint32_t depth_format = has_depth ?
isl_surf_get_depth_format(&cmd_buffer->device->isl_dev,
&image->depth_surface.isl) : D16_UNORM;
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
.DepthBufferSurfaceFormat = depth_format,
.LineWidth = cmd_buffer->state.dynamic.line_width,
.GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
.GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
.GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
};
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen7.sf);
}
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
64);
struct GENX(COLOR_CALC_STATE) cc = {
.BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
.BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
.BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
.BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
.StencilReferenceValue = d->stencil_reference.front & 0xff,
.BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff,
};
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
if (!cmd_buffer->device->info.has_llc)
anv_state_clflush(cc_state);
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_CC_STATE_POINTERS),
.ColorCalcStatePointer = cc_state.offset);
}
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_RENDER_TARGETS |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)];
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
.StencilTestMask = d->stencil_compare_mask.front & 0xff,
.StencilWriteMask = d->stencil_write_mask.front & 0xff,
.BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
.BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
};
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
struct anv_state ds_state =
anv_cmd_buffer_merge_dynamic(cmd_buffer, depth_stencil_dw,
pipeline->gen7.depth_stencil_state,
GENX(DEPTH_STENCIL_STATE_length), 64);
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS),
.PointertoDEPTH_STENCIL_STATE = ds_state.offset);
}
if (cmd_buffer->state.gen7.index_buffer &&
cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_INDEX_BUFFER)) {
struct anv_buffer *buffer = cmd_buffer->state.gen7.index_buffer;
uint32_t offset = cmd_buffer->state.gen7.index_offset;
#if GEN_IS_HASWELL
anv_batch_emit(&cmd_buffer->batch, GEN75_3DSTATE_VF,
.IndexedDrawCutIndexEnable = pipeline->primitive_restart,
.CutIndex = cmd_buffer->state.restart_index);
#endif
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER),
#if !GEN_IS_HASWELL
.CutIndexEnable = pipeline->primitive_restart,
#endif
.IndexFormat = cmd_buffer->state.gen7.index_type,
.MemoryObjectControlState = GENX(MOCS),
.BufferStartingAddress = { buffer->bo, buffer->offset + offset },
.BufferEndingAddress = { buffer->bo, buffer->offset + buffer->size });
}
cmd_buffer->state.dirty = 0;
}
void genX(CmdSetEvent)(
VkCommandBuffer commandBuffer,
VkEvent event,
VkPipelineStageFlags stageMask)
{
stub();
}
void genX(CmdResetEvent)(
VkCommandBuffer commandBuffer,
VkEvent event,
VkPipelineStageFlags stageMask)
{
stub();
}
void genX(CmdWaitEvents)(
VkCommandBuffer commandBuffer,
uint32_t eventCount,
const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags destStageMask,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
stub();
}

View file

@ -0,0 +1,402 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "genX_pipeline_util.h"
static void
gen7_emit_rs_state(struct anv_pipeline *pipeline,
const VkPipelineRasterizationStateCreateInfo *info,
const struct anv_graphics_pipeline_create_info *extra)
{
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
/* LegacyGlobalDepthBiasEnable */
.StatisticsEnable = true,
.FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
.BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
.ViewTransformEnable = !(extra && extra->use_rectlist),
.FrontWinding = vk_to_gen_front_face[info->frontFace],
/* bool AntiAliasingEnable; */
.CullMode = vk_to_gen_cullmode[info->cullMode],
/* uint32_t LineEndCapAntialiasingRegionWidth; */
.ScissorRectangleEnable = !(extra && extra->use_rectlist),
/* uint32_t MultisampleRasterizationMode; */
/* bool LastPixelEnable; */
.TriangleStripListProvokingVertexSelect = 0,
.LineStripListProvokingVertexSelect = 0,
.TriangleFanProvokingVertexSelect = 1,
/* uint32_t AALineDistanceMode; */
/* uint32_t VertexSubPixelPrecisionSelect; */
.UsePointWidthState = false,
.PointWidth = 1.0,
.GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
.GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
.GlobalDepthOffsetEnablePoint = info->depthBiasEnable,
};
GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf);
}
static void
gen7_emit_ds_state(struct anv_pipeline *pipeline,
const VkPipelineDepthStencilStateCreateInfo *info)
{
if (info == NULL) {
/* We're going to OR this together with the dynamic state. We need
* to make sure it's initialized to something useful.
*/
memset(pipeline->gen7.depth_stencil_state, 0,
sizeof(pipeline->gen7.depth_stencil_state));
return;
}
struct GENX(DEPTH_STENCIL_STATE) state = {
.DepthTestEnable = info->depthTestEnable,
.DepthBufferWriteEnable = info->depthWriteEnable,
.DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp],
.DoubleSidedStencilEnable = true,
.StencilTestEnable = info->stencilTestEnable,
.StencilBufferWriteEnable = info->stencilTestEnable,
.StencilFailOp = vk_to_gen_stencil_op[info->front.failOp],
.StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp],
.StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp],
.StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp],
.BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp],
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp],
.BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[info->back.depthFailOp],
.BackFaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp],
};
GENX(DEPTH_STENCIL_STATE_pack)(NULL, &pipeline->gen7.depth_stencil_state, &state);
}
static void
gen7_emit_cb_state(struct anv_pipeline *pipeline,
const VkPipelineColorBlendStateCreateInfo *info,
const VkPipelineMultisampleStateCreateInfo *ms_info)
{
struct anv_device *device = pipeline->device;
if (info == NULL || info->attachmentCount == 0) {
pipeline->blend_state =
anv_state_pool_emit(&device->dynamic_state_pool,
GENX(BLEND_STATE), 64,
.ColorBufferBlendEnable = false,
.WriteDisableAlpha = true,
.WriteDisableRed = true,
.WriteDisableGreen = true,
.WriteDisableBlue = true);
} else {
const VkPipelineColorBlendAttachmentState *a = &info->pAttachments[0];
struct GENX(BLEND_STATE) blend = {
.AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
.AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
.LogicOpEnable = info->logicOpEnable,
.LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
.ColorBufferBlendEnable = a->blendEnable,
.ColorClampRange = COLORCLAMP_RTFORMAT,
.PreBlendColorClampEnable = true,
.PostBlendColorClampEnable = true,
.SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
.DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
.ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
.SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
.DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
.AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
.WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
.WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
.WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
.WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
};
/* Our hardware applies the blend factor prior to the blend function
* regardless of what function is used. Technically, this means the
* hardware can do MORE than GL or Vulkan specify. However, it also
* means that, for MIN and MAX, we have to stomp the blend factor to
* ONE to make it a no-op.
*/
if (a->colorBlendOp == VK_BLEND_OP_MIN ||
a->colorBlendOp == VK_BLEND_OP_MAX) {
blend.SourceBlendFactor = BLENDFACTOR_ONE;
blend.DestinationBlendFactor = BLENDFACTOR_ONE;
}
if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
a->alphaBlendOp == VK_BLEND_OP_MAX) {
blend.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
blend.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
}
pipeline->blend_state = anv_state_pool_alloc(&device->dynamic_state_pool,
GENX(BLEND_STATE_length) * 4,
64);
GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend);
if (pipeline->device->info.has_llc)
anv_state_clflush(pipeline->blend_state);
}
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS),
.BlendStatePointer = pipeline->blend_state.offset);
}
VkResult
genX(graphics_pipeline_create)(
VkDevice _device,
struct anv_pipeline_cache * cache,
const VkGraphicsPipelineCreateInfo* pCreateInfo,
const struct anv_graphics_pipeline_create_info *extra,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipeline)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_pipeline *pipeline;
VkResult result;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
result = anv_pipeline_init(pipeline, device, cache,
pCreateInfo, extra, pAllocator);
if (result != VK_SUCCESS) {
anv_free2(&device->alloc, pAllocator, pipeline);
return result;
}
assert(pCreateInfo->pVertexInputState);
emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra);
assert(pCreateInfo->pRasterizationState);
gen7_emit_rs_state(pipeline, pCreateInfo->pRasterizationState, extra);
gen7_emit_ds_state(pipeline, pCreateInfo->pDepthStencilState);
gen7_emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
pCreateInfo->pMultisampleState);
emit_urb_setup(pipeline);
const VkPipelineRasterizationStateCreateInfo *rs_info =
pCreateInfo->pRasterizationState;
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP),
.FrontWinding = vk_to_gen_front_face[rs_info->frontFace],
.CullMode = vk_to_gen_cullmode[rs_info->cullMode],
.ClipEnable = !(extra && extra->use_rectlist),
.APIMode = APIMODE_OGL,
.ViewportXYClipTestEnable = true,
.ClipMode = CLIPMODE_NORMAL,
.TriangleStripListProvokingVertexSelect = 0,
.LineStripListProvokingVertexSelect = 0,
.TriangleFanProvokingVertexSelect = 1,
.MinimumPointWidth = 0.125,
.MaximumPointWidth = 255.875,
.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1);
if (pCreateInfo->pMultisampleState &&
pCreateInfo->pMultisampleState->rasterizationSamples > 1)
anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO");
uint32_t samples = 1;
uint32_t log2_samples = __builtin_ffs(samples) - 1;
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE),
.PixelLocation = PIXLOC_CENTER,
.NumberofMultisamples = log2_samples);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK),
.SampleMask = 0xff);
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
#if 0
/* From gen7_vs_state.c */
/**
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
* Geometry > Geometry Shader > State:
*
* "Note: Because of corruption in IVB:GT2, software needs to flush the
* whole fixed function pipeline when the GS enable changes value in
* the 3DSTATE_GS."
*
* The hardware architects have clarified that in this context "flush the
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
* Stall" bit set.
*/
if (!brw->is_haswell && !brw->is_baytrail)
gen7_emit_vs_workaround_flush(brw);
#endif
if (pipeline->vs_vec4 == NO_KERNEL || (extra && extra->disable_vs))
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), .VSFunctionEnable = false);
else
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS),
.KernelStartPointer = pipeline->vs_vec4,
.ScratchSpaceBaseOffset = pipeline->scratch_start[MESA_SHADER_VERTEX],
.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base),
.DispatchGRFStartRegisterforURBData =
vs_prog_data->base.base.dispatch_grf_start_reg,
.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length,
.VertexURBEntryReadOffset = 0,
.MaximumNumberofThreads = device->info.max_vs_threads - 1,
.StatisticsEnable = true,
.VSFunctionEnable = true);
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
if (pipeline->gs_kernel == NO_KERNEL || (extra && extra->disable_vs)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .GSEnable = false);
} else {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS),
.KernelStartPointer = pipeline->gs_kernel,
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base),
.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1,
.OutputTopology = gs_prog_data->output_topology,
.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length,
.IncludeVertexHandles = gs_prog_data->base.include_vue_handles,
.DispatchGRFStartRegisterforURBData =
gs_prog_data->base.base.dispatch_grf_start_reg,
.MaximumNumberofThreads = device->info.max_gs_threads - 1,
/* This in the next dword on HSW. */
.ControlDataFormat = gs_prog_data->control_data_format,
.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords,
.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1,
.DispatchMode = gs_prog_data->base.dispatch_mode,
.GSStatisticsEnable = true,
.IncludePrimitiveID = gs_prog_data->include_primitive_id,
# if (GEN_IS_HASWELL)
.ReorderMode = REORDER_TRAILING,
# else
.ReorderEnable = true,
# endif
.GSEnable = true);
}
if (pipeline->ps_ksp0 == NO_KERNEL) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE));
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM),
.StatisticsEnable = true,
.ThreadDispatchEnable = false,
.LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */
.LineAntialiasingRegionWidth = 1, /* 1.0 pixels */
.EarlyDepthStencilControl = EDSC_NORMAL,
.PointRasterizationRule = RASTRULE_UPPER_RIGHT);
/* Even if no fragments are ever dispatched, the hardware hangs if we
* don't at least set the maximum number of threads.
*/
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
.MaximumNumberofThreads = device->info.max_wm_threads - 1);
} else {
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 ||
wm_prog_data->urb_setup[VARYING_SLOT_BFC1] != -1)
anv_finishme("two-sided color needs sbe swizzling setup");
if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1)
anv_finishme("primitive_id needs sbe swizzling setup");
emit_3dstate_sbe(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
.KernelStartPointer0 = pipeline->ps_ksp0,
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
.PerThreadScratchSpace = scratch_space(&wm_prog_data->base),
.MaximumNumberofThreads = device->info.max_wm_threads - 1,
.PushConstantEnable = wm_prog_data->base.nr_params > 0,
.AttributeEnable = wm_prog_data->num_varying_inputs > 0,
.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask,
.RenderTargetFastClearEnable = false,
.DualSourceBlendEnable = false,
.RenderTargetResolveEnable = false,
.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
POSOFFSET_SAMPLE : POSOFFSET_NONE,
._32PixelDispatchEnable = false,
._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL,
._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL,
.DispatchGRFStartRegisterforConstantSetupData0 = pipeline->ps_grf_start0,
.DispatchGRFStartRegisterforConstantSetupData1 = 0,
.DispatchGRFStartRegisterforConstantSetupData2 = pipeline->ps_grf_start2,
#if 0
/* Haswell requires the sample mask to be set in this packet as well as
* in 3DSTATE_SAMPLE_MASK; the values should match. */
/* _NEW_BUFFERS, _NEW_MULTISAMPLE */
#endif
.KernelStartPointer1 = 0,
.KernelStartPointer2 = pipeline->ps_ksp2);
/* FIXME-GEN7: This needs a lot more work, cf gen7 upload_wm_state(). */
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM),
.StatisticsEnable = true,
.ThreadDispatchEnable = true,
.LineEndCapAntialiasingRegionWidth = 0, /* 0.5 pixels */
.LineAntialiasingRegionWidth = 1, /* 1.0 pixels */
.EarlyDepthStencilControl = EDSC_NORMAL,
.PointRasterizationRule = RASTRULE_UPPER_RIGHT,
.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode,
.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth,
.PixelShaderUsesSourceW = wm_prog_data->uses_src_w,
.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask,
.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes);
}
*pPipeline = anv_pipeline_to_handle(pipeline);
return VK_SUCCESS;
}

View file

@ -0,0 +1,529 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#if GEN_GEN == 8
void
gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
{
uint32_t count = cmd_buffer->state.dynamic.viewport.count;
const VkViewport *viewports = cmd_buffer->state.dynamic.viewport.viewports;
struct anv_state sf_clip_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
for (uint32_t i = 0; i < count; i++) {
const VkViewport *vp = &viewports[i];
/* The gen7 state struct has just the matrix and guardband fields, the
* gen8 struct adds the min/max viewport fields. */
struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
.ViewportMatrixElementm00 = vp->width / 2,
.ViewportMatrixElementm11 = vp->height / 2,
.ViewportMatrixElementm22 = 1.0,
.ViewportMatrixElementm30 = vp->x + vp->width / 2,
.ViewportMatrixElementm31 = vp->y + vp->height / 2,
.ViewportMatrixElementm32 = 0.0,
.XMinClipGuardband = -1.0f,
.XMaxClipGuardband = 1.0f,
.YMinClipGuardband = -1.0f,
.YMaxClipGuardband = 1.0f,
.XMinViewPort = vp->x,
.XMaxViewPort = vp->x + vp->width - 1,
.YMinViewPort = vp->y,
.YMaxViewPort = vp->y + vp->height - 1,
};
struct GENX(CC_VIEWPORT) cc_viewport = {
.MinimumDepth = vp->minDepth,
.MaximumDepth = vp->maxDepth
};
GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
&sf_clip_viewport);
GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
}
if (!cmd_buffer->device->info.has_llc) {
anv_state_clflush(sf_clip_state);
anv_state_clflush(cc_state);
}
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC),
.CCViewportPointer = cc_state.offset);
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP),
.SFClipViewportPointer = sf_clip_state.offset);
}
#endif
#define emit_lri(batch, reg, imm) \
anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), \
.RegisterOffset = __anv_reg_num(reg), \
.DataDWord = imm)
void
genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, bool enable_slm)
{
/* References for GL state:
*
* - commits e307cfa..228d5a3
* - src/mesa/drivers/dri/i965/gen7_l3_state.c
*/
uint32_t l3cr_slm, l3cr_noslm;
anv_pack_struct(&l3cr_noslm, GENX(L3CNTLREG),
.URBAllocation = 48,
.AllAllocation = 48);
anv_pack_struct(&l3cr_slm, GENX(L3CNTLREG),
.SLMEnable = 1,
.URBAllocation = 16,
.AllAllocation = 48);
const uint32_t l3cr_val = enable_slm ? l3cr_slm : l3cr_noslm;
bool changed = cmd_buffer->state.current_l3_config != l3cr_val;
if (changed) {
/* According to the hardware docs, the L3 partitioning can only be
* changed while the pipeline is completely drained and the caches are
* flushed, which involves a first PIPE_CONTROL flush which stalls the
* pipeline...
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.DCFlushEnable = true,
.PostSyncOperation = NoWrite,
.CommandStreamerStallEnable = true);
/* ...followed by a second pipelined PIPE_CONTROL that initiates
* invalidation of the relevant caches. Note that because RO
* invalidation happens at the top of the pipeline (i.e. right away as
* the PIPE_CONTROL command is processed by the CS) we cannot combine it
* with the previous stalling flush as the hardware documentation
* suggests, because that would cause the CS to stall on previous
* rendering *after* RO invalidation and wouldn't prevent the RO caches
* from being polluted by concurrent rendering before the stall
* completes. This intentionally doesn't implement the SKL+ hardware
* workaround suggesting to enable CS stall on PIPE_CONTROLs with the
* texture cache invalidation bit set for GPGPU workloads because the
* previous and subsequent PIPE_CONTROLs already guarantee that there is
* no concurrent GPGPU kernel execution (see SKL HSD 2132585).
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.TextureCacheInvalidationEnable = true,
.ConstantCacheInvalidationEnable = true,
.InstructionCacheInvalidateEnable = true,
.StateCacheInvalidationEnable = true,
.PostSyncOperation = NoWrite);
/* Now send a third stalling flush to make sure that invalidation is
* complete when the L3 configuration registers are modified.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.DCFlushEnable = true,
.PostSyncOperation = NoWrite,
.CommandStreamerStallEnable = true);
emit_lri(&cmd_buffer->batch, GENX(L3CNTLREG), l3cr_val);
cmd_buffer->state.current_l3_config = l3cr_val;
}
}
static void
__emit_genx_sf_state(struct anv_cmd_buffer *cmd_buffer)
{
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
.LineWidth = cmd_buffer->state.dynamic.line_width,
};
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
/* FIXME: gen9.fs */
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw,
cmd_buffer->state.pipeline->gen8.sf);
}
#include "genxml/gen9_pack.h"
static void
__emit_gen9_sf_state(struct anv_cmd_buffer *cmd_buffer)
{
uint32_t sf_dw[GENX(3DSTATE_SF_length)];
struct GEN9_3DSTATE_SF sf = {
GEN9_3DSTATE_SF_header,
.LineWidth = cmd_buffer->state.dynamic.line_width,
};
GEN9_3DSTATE_SF_pack(NULL, sf_dw, &sf);
/* FIXME: gen9.fs */
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw,
cmd_buffer->state.pipeline->gen8.sf);
}
static void
__emit_sf_state(struct anv_cmd_buffer *cmd_buffer)
{
if (cmd_buffer->device->info.is_cherryview)
__emit_gen9_sf_state(cmd_buffer);
else
__emit_genx_sf_state(cmd_buffer);
}
void
genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {
__emit_sf_state(cmd_buffer);
}
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){
uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
struct GENX(3DSTATE_RASTER) raster = {
GENX(3DSTATE_RASTER_header),
.GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
.GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
.GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
};
GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
pipeline->gen8.raster);
}
/* Stencil reference values moved from COLOR_CALC_STATE in gen8 to
* 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split
* across different state packets for gen8 and gen9. We handle that by
* using a big old #if switch here.
*/
#if GEN_GEN == 8
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GENX(COLOR_CALC_STATE_length) * 4,
64);
struct GENX(COLOR_CALC_STATE) cc = {
.BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
.BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
.BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
.BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
.StencilReferenceValue = d->stencil_reference.front & 0xff,
.BackFaceStencilReferenceValue = d->stencil_reference.back & 0xff,
};
GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
if (!cmd_buffer->device->info.has_llc)
anv_state_clflush(cc_state);
anv_batch_emit(&cmd_buffer->batch,
GENX(3DSTATE_CC_STATE_POINTERS),
.ColorCalcStatePointer = cc_state.offset,
.ColorCalcStatePointerValid = true);
}
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = {
GENX(3DSTATE_WM_DEPTH_STENCIL_header),
.StencilTestMask = d->stencil_compare_mask.front & 0xff,
.StencilWriteMask = d->stencil_write_mask.front & 0xff,
.BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
.BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
};
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw,
&wm_depth_stencil);
anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
pipeline->gen8.wm_depth_stencil);
}
#else
if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
struct anv_state cc_state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
GEN9_COLOR_CALC_STATE_length * 4,
64);
struct GEN9_COLOR_CALC_STATE cc = {
.BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
.BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
.BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
.BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
};
GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
if (!cmd_buffer->device->info.has_llc)
anv_state_clflush(cc_state);
anv_batch_emit(&cmd_buffer->batch,
GEN9_3DSTATE_CC_STATE_POINTERS,
.ColorCalcStatePointer = cc_state.offset,
.ColorCalcStatePointerValid = true);
}
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length];
struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
GEN9_3DSTATE_WM_DEPTH_STENCIL_header,
.StencilTestMask = d->stencil_compare_mask.front & 0xff,
.StencilWriteMask = d->stencil_write_mask.front & 0xff,
.BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
.BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
.StencilReferenceValue = d->stencil_reference.front & 0xff,
.BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,
};
GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil);
anv_batch_emit_merge(&cmd_buffer->batch, dwords,
pipeline->gen9.wm_depth_stencil);
}
#endif
if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
ANV_CMD_DIRTY_INDEX_BUFFER)) {
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF),
.IndexedDrawCutIndexEnable = pipeline->primitive_restart,
.CutIndex = cmd_buffer->state.restart_index,
);
}
cmd_buffer->state.dirty = 0;
}
void genX(CmdBindIndexBuffer)(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,
VkDeviceSize offset,
VkIndexType indexType)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
static const uint32_t vk_to_gen_index_type[] = {
[VK_INDEX_TYPE_UINT16] = INDEX_WORD,
[VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
};
static const uint32_t restart_index_for_type[] = {
[VK_INDEX_TYPE_UINT16] = UINT16_MAX,
[VK_INDEX_TYPE_UINT32] = UINT32_MAX,
};
cmd_buffer->state.restart_index = restart_index_for_type[indexType];
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER),
.IndexFormat = vk_to_gen_index_type[indexType],
.MemoryObjectControlState = GENX(MOCS),
.BufferStartingAddress = { buffer->bo, buffer->offset + offset },
.BufferSize = buffer->size - offset);
cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
}
static VkResult
flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
struct anv_state surfaces = { 0, }, samplers = { 0, };
VkResult result;
result = anv_cmd_buffer_emit_samplers(cmd_buffer,
MESA_SHADER_COMPUTE, &samplers);
if (result != VK_SUCCESS)
return result;
result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
MESA_SHADER_COMPUTE, &surfaces);
if (result != VK_SUCCESS)
return result;
struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
unsigned push_constant_data_size =
(prog_data->nr_params + local_id_dwords) * 4;
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
unsigned push_constant_regs = reg_aligned_constant_size / 32;
if (push_state.alloc_size) {
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD),
.CURBETotalDataLength = push_state.alloc_size,
.CURBEDataStartAddress = push_state.offset);
}
assert(prog_data->total_shared <= 64 * 1024);
uint32_t slm_size = 0;
if (prog_data->total_shared > 0) {
/* slm_size is in 4k increments, but must be a power of 2. */
slm_size = 4 * 1024;
while (slm_size < prog_data->total_shared)
slm_size <<= 1;
slm_size /= 4 * 1024;
}
struct anv_state state =
anv_state_pool_emit(&device->dynamic_state_pool,
GENX(INTERFACE_DESCRIPTOR_DATA), 64,
.KernelStartPointer = pipeline->cs_simd,
.KernelStartPointerHigh = 0,
.BindingTablePointer = surfaces.offset,
.BindingTableEntryCount = 0,
.SamplerStatePointer = samplers.offset,
.SamplerCount = 0,
.ConstantIndirectURBEntryReadLength = push_constant_regs,
.ConstantURBEntryReadOffset = 0,
.BarrierEnable = cs_prog_data->uses_barrier,
.SharedLocalMemorySize = slm_size,
.NumberofThreadsinGPGPUThreadGroup =
pipeline->cs_thread_width_max);
uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
.InterfaceDescriptorTotalLength = size,
.InterfaceDescriptorDataStartAddress = state.offset);
return VK_SUCCESS;
}
void
genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
VkResult result;
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
bool needs_slm = cs_prog_data->base.total_shared > 0;
genX(cmd_buffer_config_l3)(cmd_buffer, needs_slm);
genX(flush_pipeline_select_gpgpu)(cmd_buffer);
if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
(cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
result = flush_compute_descriptor_set(cmd_buffer);
assert(result == VK_SUCCESS);
cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
cmd_buffer->state.compute_dirty = 0;
}
void genX(CmdSetEvent)(
VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.DestinationAddressType = DAT_PPGTT,
.PostSyncOperation = WriteImmediateData,
.Address = {
&cmd_buffer->device->dynamic_state_block_pool.bo,
event->state.offset
},
.ImmediateData = VK_EVENT_SET);
}
void genX(CmdResetEvent)(
VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
.DestinationAddressType = DAT_PPGTT,
.PostSyncOperation = WriteImmediateData,
.Address = {
&cmd_buffer->device->dynamic_state_block_pool.bo,
event->state.offset
},
.ImmediateData = VK_EVENT_RESET);
}
void genX(CmdWaitEvents)(
VkCommandBuffer commandBuffer,
uint32_t eventCount,
const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags destStageMask,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
for (uint32_t i = 0; i < eventCount; i++) {
ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT),
.WaitMode = PollingMode,
.CompareOperation = COMPARE_SAD_EQUAL_SDD,
.SemaphoreDataDword = VK_EVENT_SET,
.SemaphoreAddress = {
&cmd_buffer->device->dynamic_state_block_pool.bo,
event->state.offset
});
}
genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
false, /* byRegion */
memoryBarrierCount, pMemoryBarriers,
bufferMemoryBarrierCount, pBufferMemoryBarriers,
imageMemoryBarrierCount, pImageMemoryBarriers);
}

View file

@ -0,0 +1,538 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "genX_pipeline_util.h"
static void
emit_ia_state(struct anv_pipeline *pipeline,
const VkPipelineInputAssemblyStateCreateInfo *info,
const struct anv_graphics_pipeline_create_info *extra)
{
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY),
.PrimitiveTopologyType = pipeline->topology);
}
static void
emit_rs_state(struct anv_pipeline *pipeline,
const VkPipelineRasterizationStateCreateInfo *info,
const VkPipelineMultisampleStateCreateInfo *ms_info,
const struct anv_graphics_pipeline_create_info *extra)
{
uint32_t samples = 1;
if (ms_info)
samples = ms_info->rasterizationSamples;
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
.ViewportTransformEnable = !(extra && extra->use_rectlist),
.TriangleStripListProvokingVertexSelect = 0,
.LineStripListProvokingVertexSelect = 0,
.TriangleFanProvokingVertexSelect = 1,
.PointWidthSource = Vertex,
.PointWidth = 1.0,
};
/* FINISHME: VkBool32 rasterizerDiscardEnable; */
GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf);
struct GENX(3DSTATE_RASTER) raster = {
GENX(3DSTATE_RASTER_header),
/* For details on 3DSTATE_RASTER multisample state, see the BSpec table
* "Multisample Modes State".
*/
.DXMultisampleRasterizationEnable = samples > 1,
.ForcedSampleCount = FSC_NUMRASTSAMPLES_0,
.ForceMultisampling = false,
.FrontWinding = vk_to_gen_front_face[info->frontFace],
.CullMode = vk_to_gen_cullmode[info->cullMode],
.FrontFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
.BackFaceFillMode = vk_to_gen_fillmode[info->polygonMode],
.ScissorRectangleEnable = !(extra && extra->use_rectlist),
#if GEN_GEN == 8
.ViewportZClipTestEnable = true,
#else
/* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
.ViewportZFarClipTestEnable = true,
.ViewportZNearClipTestEnable = true,
#endif
.GlobalDepthOffsetEnableSolid = info->depthBiasEnable,
.GlobalDepthOffsetEnableWireframe = info->depthBiasEnable,
.GlobalDepthOffsetEnablePoint = info->depthBiasEnable,
};
GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster);
}
static void
emit_cb_state(struct anv_pipeline *pipeline,
const VkPipelineColorBlendStateCreateInfo *info,
const VkPipelineMultisampleStateCreateInfo *ms_info)
{
struct anv_device *device = pipeline->device;
uint32_t num_dwords = GENX(BLEND_STATE_length);
pipeline->blend_state =
anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
struct GENX(BLEND_STATE) blend_state = {
.AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
.AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
};
/* Default everything to disabled */
for (uint32_t i = 0; i < 8; i++) {
blend_state.Entry[i].WriteDisableAlpha = true;
blend_state.Entry[i].WriteDisableRed = true;
blend_state.Entry[i].WriteDisableGreen = true;
blend_state.Entry[i].WriteDisableBlue = true;
}
struct anv_pipeline_bind_map *map =
&pipeline->bindings[MESA_SHADER_FRAGMENT];
bool has_writeable_rt = false;
for (unsigned i = 0; i < map->surface_count; i++) {
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
/* All color attachments are at the beginning of the binding table */
if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
break;
/* We can have at most 8 attachments */
assert(i < 8);
if (binding->offset >= info->attachmentCount)
continue;
const VkPipelineColorBlendAttachmentState *a =
&info->pAttachments[binding->offset];
if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
a->colorBlendOp != a->alphaBlendOp) {
blend_state.IndependentAlphaBlendEnable = true;
}
blend_state.Entry[i] = (struct GENX(BLEND_STATE_ENTRY)) {
.LogicOpEnable = info->logicOpEnable,
.LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
.ColorBufferBlendEnable = a->blendEnable,
.PreBlendSourceOnlyClampEnable = false,
.ColorClampRange = COLORCLAMP_RTFORMAT,
.PreBlendColorClampEnable = true,
.PostBlendColorClampEnable = true,
.SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
.DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
.ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
.SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
.DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
.AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
.WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
.WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
.WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
.WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
};
if (a->colorWriteMask != 0)
has_writeable_rt = true;
/* Our hardware applies the blend factor prior to the blend function
* regardless of what function is used. Technically, this means the
* hardware can do MORE than GL or Vulkan specify. However, it also
* means that, for MIN and MAX, we have to stomp the blend factor to
* ONE to make it a no-op.
*/
if (a->colorBlendOp == VK_BLEND_OP_MIN ||
a->colorBlendOp == VK_BLEND_OP_MAX) {
blend_state.Entry[i].SourceBlendFactor = BLENDFACTOR_ONE;
blend_state.Entry[i].DestinationBlendFactor = BLENDFACTOR_ONE;
}
if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
a->alphaBlendOp == VK_BLEND_OP_MAX) {
blend_state.Entry[i].SourceAlphaBlendFactor = BLENDFACTOR_ONE;
blend_state.Entry[i].DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
}
}
struct GENX(BLEND_STATE_ENTRY) *bs0 = &blend_state.Entry[0];
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND),
.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable,
.HasWriteableRT = has_writeable_rt,
.ColorBufferBlendEnable = bs0->ColorBufferBlendEnable,
.SourceAlphaBlendFactor = bs0->SourceAlphaBlendFactor,
.DestinationAlphaBlendFactor =
bs0->DestinationAlphaBlendFactor,
.SourceBlendFactor = bs0->SourceBlendFactor,
.DestinationBlendFactor = bs0->DestinationBlendFactor,
.AlphaTestEnable = false,
.IndependentAlphaBlendEnable =
blend_state.IndependentAlphaBlendEnable);
GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
if (!device->info.has_llc)
anv_state_clflush(pipeline->blend_state);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS),
.BlendStatePointer = pipeline->blend_state.offset,
.BlendStatePointerValid = true);
}
static void
emit_ds_state(struct anv_pipeline *pipeline,
const VkPipelineDepthStencilStateCreateInfo *info)
{
uint32_t *dw = GEN_GEN == 8 ?
pipeline->gen8.wm_depth_stencil : pipeline->gen9.wm_depth_stencil;
if (info == NULL) {
/* We're going to OR this together with the dynamic state. We need
* to make sure it's initialized to something useful.
*/
memset(pipeline->gen8.wm_depth_stencil, 0,
sizeof(pipeline->gen8.wm_depth_stencil));
memset(pipeline->gen9.wm_depth_stencil, 0,
sizeof(pipeline->gen9.wm_depth_stencil));
return;
}
/* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = {
.DepthTestEnable = info->depthTestEnable,
.DepthBufferWriteEnable = info->depthWriteEnable,
.DepthTestFunction = vk_to_gen_compare_op[info->depthCompareOp],
.DoubleSidedStencilEnable = true,
.StencilTestEnable = info->stencilTestEnable,
.StencilBufferWriteEnable = info->stencilTestEnable,
.StencilFailOp = vk_to_gen_stencil_op[info->front.failOp],
.StencilPassDepthPassOp = vk_to_gen_stencil_op[info->front.passOp],
.StencilPassDepthFailOp = vk_to_gen_stencil_op[info->front.depthFailOp],
.StencilTestFunction = vk_to_gen_compare_op[info->front.compareOp],
.BackfaceStencilFailOp = vk_to_gen_stencil_op[info->back.failOp],
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info->back.passOp],
.BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info->back.depthFailOp],
.BackfaceStencilTestFunction = vk_to_gen_compare_op[info->back.compareOp],
};
/* From the Broadwell PRM:
*
* "If Depth_Test_Enable = 1 AND Depth_Test_func = EQUAL, the
* Depth_Write_Enable must be set to 0."
*/
if (info->depthTestEnable && info->depthCompareOp == VK_COMPARE_OP_EQUAL)
wm_depth_stencil.DepthBufferWriteEnable = false;
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dw, &wm_depth_stencil);
}
static void
emit_ms_state(struct anv_pipeline *pipeline,
const VkPipelineMultisampleStateCreateInfo *info)
{
uint32_t samples = 1;
uint32_t log2_samples = 0;
/* From the Vulkan 1.0 spec:
* If pSampleMask is NULL, it is treated as if the mask has all bits
* enabled, i.e. no coverage is removed from fragments.
*
* 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
*/
uint32_t sample_mask = 0xffff;
if (info) {
samples = info->rasterizationSamples;
log2_samples = __builtin_ffs(samples) - 1;
}
if (info && info->pSampleMask)
sample_mask &= info->pSampleMask[0];
if (info && info->sampleShadingEnable)
anv_finishme("VkPipelineMultisampleStateCreateInfo::sampleShadingEnable");
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE),
/* The PRM says that this bit is valid only for DX9:
*
* SW can choose to set this bit only for DX9 API. DX10/OGL API's
* should not have any effect by setting or not setting this bit.
*/
.PixelPositionOffsetEnable = false,
.PixelLocation = CENTER,
.NumberofMultisamples = log2_samples);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK),
.SampleMask = sample_mask);
}
VkResult
genX(graphics_pipeline_create)(
VkDevice _device,
struct anv_pipeline_cache * cache,
const VkGraphicsPipelineCreateInfo* pCreateInfo,
const struct anv_graphics_pipeline_create_info *extra,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipeline)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_pipeline *pipeline;
VkResult result;
uint32_t offset, length;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
result = anv_pipeline_init(pipeline, device, cache,
pCreateInfo, extra, pAllocator);
if (result != VK_SUCCESS) {
anv_free2(&device->alloc, pAllocator, pipeline);
return result;
}
assert(pCreateInfo->pVertexInputState);
emit_vertex_input(pipeline, pCreateInfo->pVertexInputState, extra);
assert(pCreateInfo->pInputAssemblyState);
emit_ia_state(pipeline, pCreateInfo->pInputAssemblyState, extra);
assert(pCreateInfo->pRasterizationState);
emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
pCreateInfo->pMultisampleState, extra);
emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
emit_ds_state(pipeline, pCreateInfo->pDepthStencilState);
emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
pCreateInfo->pMultisampleState);
emit_urb_setup(pipeline);
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP),
.ClipEnable = !(extra && extra->use_rectlist),
.EarlyCullEnable = true,
.APIMode = 1, /* D3D */
.ViewportXYClipTestEnable = true,
.ClipMode =
pCreateInfo->pRasterizationState->rasterizerDiscardEnable ?
REJECT_ALL : NORMAL,
.NonPerspectiveBarycentricEnable = wm_prog_data ?
(wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0,
.TriangleStripListProvokingVertexSelect = 0,
.LineStripListProvokingVertexSelect = 0,
.TriangleFanProvokingVertexSelect = 1,
.MinimumPointWidth = 0.125,
.MaximumPointWidth = 255.875,
.MaximumVPIndex = pCreateInfo->pViewportState->viewportCount - 1);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM),
.StatisticsEnable = true,
.LineEndCapAntialiasingRegionWidth = _05pixels,
.LineAntialiasingRegionWidth = _10pixels,
.EarlyDepthStencilControl = NORMAL,
.ForceThreadDispatchEnable = NORMAL,
.PointRasterizationRule = RASTRULE_UPPER_RIGHT,
.BarycentricInterpolationMode =
pipeline->ps_ksp0 == NO_KERNEL ?
0 : wm_prog_data->barycentric_interp_modes);
if (pipeline->gs_kernel == NO_KERNEL) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), .Enable = false);
} else {
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
offset = 1;
length = (gs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS),
.SingleProgramFlow = false,
.KernelStartPointer = pipeline->gs_kernel,
.VectorMaskEnable = false,
.SamplerCount = 0,
.BindingTableEntryCount = 0,
.ExpectedVertexCount = gs_prog_data->vertices_in,
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base),
.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1,
.OutputTopology = gs_prog_data->output_topology,
.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length,
.IncludeVertexHandles = gs_prog_data->base.include_vue_handles,
.DispatchGRFStartRegisterForURBData =
gs_prog_data->base.base.dispatch_grf_start_reg,
.MaximumNumberofThreads = device->info.max_gs_threads / 2 - 1,
.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords,
.DispatchMode = gs_prog_data->base.dispatch_mode,
.StatisticsEnable = true,
.IncludePrimitiveID = gs_prog_data->include_primitive_id,
.ReorderMode = TRAILING,
.Enable = true,
.ControlDataFormat = gs_prog_data->control_data_format,
.StaticOutput = gs_prog_data->static_vertex_count >= 0,
.StaticOutputVertexCount =
gs_prog_data->static_vertex_count >= 0 ?
gs_prog_data->static_vertex_count : 0,
/* FIXME: mesa sets this based on ctx->Transform.ClipPlanesEnabled:
* UserClipDistanceClipTestEnableBitmask_3DSTATE_GS(v)
* UserClipDistanceCullTestEnableBitmask(v)
*/
.VertexURBEntryOutputReadOffset = offset,
.VertexURBEntryOutputLength = length);
}
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
/* Skip the VUE header and position slots */
offset = 1;
length = (vs_prog_data->base.vue_map.num_slots + 1) / 2 - offset;
uint32_t vs_start = pipeline->vs_simd8 != NO_KERNEL ? pipeline->vs_simd8 :
pipeline->vs_vec4;
if (vs_start == NO_KERNEL || (extra && extra->disable_vs))
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS),
.FunctionEnable = false,
/* Even if VS is disabled, SBE still gets the amount of
* vertex data to read from this field. */
.VertexURBEntryOutputReadOffset = offset,
.VertexURBEntryOutputLength = length);
else
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS),
.KernelStartPointer = vs_start,
.SingleVertexDispatch = false,
.VectorMaskEnable = false,
.SamplerCount = 0,
.BindingTableEntryCount =
vs_prog_data->base.base.binding_table.size_bytes / 4,
.ThreadDispatchPriority = false,
.FloatingPointMode = IEEE754,
.IllegalOpcodeExceptionEnable = false,
.AccessesUAV = false,
.SoftwareExceptionEnable = false,
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_VERTEX],
.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base),
.DispatchGRFStartRegisterForURBData =
vs_prog_data->base.base.dispatch_grf_start_reg,
.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length,
.VertexURBEntryReadOffset = 0,
.MaximumNumberofThreads = device->info.max_vs_threads - 1,
.StatisticsEnable = false,
.SIMD8DispatchEnable = pipeline->vs_simd8 != NO_KERNEL,
.VertexCacheDisable = false,
.FunctionEnable = true,
.VertexURBEntryOutputReadOffset = offset,
.VertexURBEntryOutputLength = length,
.UserClipDistanceClipTestEnableBitmask = 0,
.UserClipDistanceCullTestEnableBitmask = 0);
const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
if (pipeline->ps_ksp0 == NO_KERNEL) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS));
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
.PixelShaderValid = false);
} else {
emit_3dstate_sbe(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
.KernelStartPointer0 = pipeline->ps_ksp0,
.SingleProgramFlow = false,
.VectorMaskEnable = true,
.SamplerCount = 1,
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
.PerThreadScratchSpace = scratch_space(&wm_prog_data->base),
.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias,
.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
POSOFFSET_SAMPLE: POSOFFSET_NONE,
.PushConstantEnable = wm_prog_data->base.nr_params > 0,
._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL,
._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL,
._32PixelDispatchEnable = false,
.DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0,
.DispatchGRFStartRegisterForConstantSetupData1 = 0,
.DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2,
.KernelStartPointer1 = 0,
.KernelStartPointer2 = pipeline->ps_ksp2);
bool per_sample_ps = pCreateInfo->pMultisampleState &&
pCreateInfo->pMultisampleState->sampleShadingEnable;
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
.PixelShaderValid = true,
.PixelShaderKillsPixel = wm_prog_data->uses_kill,
.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode,
.AttributeEnable = wm_prog_data->num_varying_inputs > 0,
.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask,
.PixelShaderIsPerSample = per_sample_ps,
.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth,
.PixelShaderUsesSourceW = wm_prog_data->uses_src_w,
#if GEN_GEN >= 9
.PixelShaderPullsBary = wm_prog_data->pulls_bary,
.InputCoverageMaskState = wm_prog_data->uses_sample_mask ?
ICMS_INNER_CONSERVATIVE : ICMS_NONE,
#else
.PixelShaderUsesInputCoverageMask =
wm_prog_data->uses_sample_mask,
#endif
);
}
*pPipeline = anv_pipeline_to_handle(pipeline);
return VK_SUCCESS;
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,129 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
VkResult
genX(compute_pipeline_create)(
VkDevice _device,
struct anv_pipeline_cache * cache,
const VkComputePipelineCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipeline)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_pipeline *pipeline;
VkResult result;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
pipeline->device = device;
pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
pipeline->blend_state.map = NULL;
result = anv_reloc_list_init(&pipeline->batch_relocs,
pAllocator ? pAllocator : &device->alloc);
if (result != VK_SUCCESS) {
anv_free2(&device->alloc, pAllocator, pipeline);
return result;
}
pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
pipeline->batch.relocs = &pipeline->batch_relocs;
/* When we free the pipeline, we detect stages based on the NULL status
* of various prog_data pointers. Make them NULL by default.
*/
memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
pipeline->vs_simd8 = NO_KERNEL;
pipeline->vs_vec4 = NO_KERNEL;
pipeline->gs_kernel = NO_KERNEL;
pipeline->active_stages = 0;
pipeline->total_scratch = 0;
assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module);
anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
pCreateInfo->stage.pName,
pCreateInfo->stage.pSpecializationInfo);
pipeline->use_repclear = false;
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
unsigned push_constant_data_size =
(prog_data->nr_params + local_id_dwords) * 4;
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
unsigned push_constant_regs = reg_aligned_constant_size / 32;
uint32_t group_size = cs_prog_data->local_size[0] *
cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
pipeline->cs_thread_width_max =
DIV_ROUND_UP(group_size, cs_prog_data->simd_size);
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
if (remainder > 0)
pipeline->cs_right_mask = ~0u >> (32 - remainder);
else
pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
const uint32_t vfe_curbe_allocation =
push_constant_regs * pipeline->cs_thread_width_max;
anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE),
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE],
.PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048),
#if GEN_GEN > 7
.ScratchSpaceBasePointerHigh = 0,
.StackSize = 0,
#else
.GPGPUMode = true,
#endif
.MaximumNumberofThreads = device->info.max_cs_threads - 1,
.NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2,
.ResetGatewayTimer = true,
#if GEN_GEN <= 8
.BypassGatewayControl = true,
#endif
.URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2,
.CURBEAllocationSize = vfe_curbe_allocation);
*pPipeline = anv_pipeline_to_handle(pipeline);
return VK_SUCCESS;
}

View file

@ -0,0 +1,433 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
static uint32_t
vertex_element_comp_control(enum isl_format format, unsigned comp)
{
uint8_t bits;
switch (comp) {
case 0: bits = isl_format_layouts[format].channels.r.bits; break;
case 1: bits = isl_format_layouts[format].channels.g.bits; break;
case 2: bits = isl_format_layouts[format].channels.b.bits; break;
case 3: bits = isl_format_layouts[format].channels.a.bits; break;
default: unreachable("Invalid component");
}
if (bits) {
return VFCOMP_STORE_SRC;
} else if (comp < 3) {
return VFCOMP_STORE_0;
} else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
isl_format_layouts[format].channels.r.type == ISL_SINT) {
assert(comp == 3);
return VFCOMP_STORE_1_INT;
} else {
assert(comp == 3);
return VFCOMP_STORE_1_FP;
}
}
static void
emit_vertex_input(struct anv_pipeline *pipeline,
const VkPipelineVertexInputStateCreateInfo *info,
const struct anv_graphics_pipeline_create_info *extra)
{
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
uint32_t elements;
if (extra && extra->disable_vs) {
/* If the VS is disabled, just assume the user knows what they're
* doing and apply the layout blindly. This can only come from
* meta, so this *should* be safe.
*/
elements = 0;
for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++)
elements |= (1 << info->pVertexAttributeDescriptions[i].location);
} else {
/* Pull inputs_read out of the VS prog data */
uint64_t inputs_read = vs_prog_data->inputs_read;
assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
elements = inputs_read >> VERT_ATTRIB_GENERIC0;
}
#if GEN_GEN >= 8
/* On BDW+, we only need to allocate space for base ids. Setting up
* the actual vertex and instance id is a separate packet.
*/
const bool needs_svgs_elem = vs_prog_data->uses_basevertex ||
vs_prog_data->uses_baseinstance;
#else
/* On Haswell and prior, vertex and instance id are created by using the
* ComponentControl fields, so we need an element for any of them.
*/
const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
vs_prog_data->uses_instanceid ||
vs_prog_data->uses_basevertex ||
vs_prog_data->uses_baseinstance;
#endif
uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem;
if (elem_count == 0)
return;
uint32_t *p;
const uint32_t num_dwords = 1 + elem_count * 2;
p = anv_batch_emitn(&pipeline->batch, num_dwords,
GENX(3DSTATE_VERTEX_ELEMENTS));
memset(p + 1, 0, (num_dwords - 1) * 4);
for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
const VkVertexInputAttributeDescription *desc =
&info->pVertexAttributeDescriptions[i];
enum isl_format format = anv_get_isl_format(desc->format,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_TILING_LINEAR,
NULL);
assert(desc->binding < 32);
if ((elements & (1 << desc->location)) == 0)
continue; /* Binding unused */
uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1));
struct GENX(VERTEX_ELEMENT_STATE) element = {
.VertexBufferIndex = desc->binding,
.Valid = true,
.SourceElementFormat = format,
.EdgeFlagEnable = false,
.SourceElementOffset = desc->offset,
.Component0Control = vertex_element_comp_control(format, 0),
.Component1Control = vertex_element_comp_control(format, 1),
.Component2Control = vertex_element_comp_control(format, 2),
.Component3Control = vertex_element_comp_control(format, 3),
};
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
#if GEN_GEN >= 8
/* On Broadwell and later, we have a separate VF_INSTANCING packet
* that controls instancing. On Haswell and prior, that's part of
* VERTEX_BUFFER_STATE which we emit later.
*/
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING),
.InstancingEnable = pipeline->instancing_enable[desc->binding],
.VertexElementIndex = slot,
/* Vulkan so far doesn't have an instance divisor, so
* this is always 1 (ignored if not instancing). */
.InstanceDataStepRate = 1);
#endif
}
const uint32_t id_slot = __builtin_popcount(elements);
if (needs_svgs_elem) {
/* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
* "Within a VERTEX_ELEMENT_STATE structure, if a Component
* Control field is set to something other than VFCOMP_STORE_SRC,
* no higher-numbered Component Control fields may be set to
* VFCOMP_STORE_SRC"
*
* This means, that if we have BaseInstance, we need BaseVertex as
* well. Just do all or nothing.
*/
uint32_t base_ctrl = (vs_prog_data->uses_basevertex ||
vs_prog_data->uses_baseinstance) ?
VFCOMP_STORE_SRC : VFCOMP_STORE_0;
struct GENX(VERTEX_ELEMENT_STATE) element = {
.VertexBufferIndex = 32, /* Reserved for this */
.Valid = true,
.SourceElementFormat = ISL_FORMAT_R32G32_UINT,
.Component0Control = base_ctrl,
.Component1Control = base_ctrl,
#if GEN_GEN >= 8
.Component2Control = VFCOMP_STORE_0,
.Component3Control = VFCOMP_STORE_0,
#else
.Component2Control = VFCOMP_STORE_VID,
.Component3Control = VFCOMP_STORE_IID,
#endif
};
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
}
#if GEN_GEN >= 8
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS),
.VertexIDEnable = vs_prog_data->uses_vertexid,
.VertexIDComponentNumber = 2,
.VertexIDElementOffset = id_slot,
.InstanceIDEnable = vs_prog_data->uses_instanceid,
.InstanceIDComponentNumber = 3,
.InstanceIDElementOffset = id_slot);
#endif
}
static inline void
emit_urb_setup(struct anv_pipeline *pipeline)
{
#if GEN_GEN == 7 && !GEN_IS_HASWELL
struct anv_device *device = pipeline->device;
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
*
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
* needs to be sent before any combination of VS associated 3DSTATE."
*/
anv_batch_emit(&pipeline->batch, GEN7_PIPE_CONTROL,
.DepthStallEnable = true,
.PostSyncOperation = WriteImmediateData,
.Address = { &device->workaround_bo, 0 });
#endif
unsigned push_start = 0;
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
unsigned push_size = pipeline->urb.push_size[i];
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS),
._3DCommandSubOpcode = 18 + i,
.ConstantBufferOffset = (push_size > 0) ? push_start : 0,
.ConstantBufferSize = push_size);
push_start += pipeline->urb.push_size[i];
}
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_URB_VS),
._3DCommandSubOpcode = 48 + i,
.VSURBStartingAddress = pipeline->urb.start[i],
.VSURBEntryAllocationSize = pipeline->urb.size[i] - 1,
.VSNumberofURBEntries = pipeline->urb.entries[i]);
}
}
static void
emit_3dstate_sbe(struct anv_pipeline *pipeline)
{
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
const struct brw_vue_map *fs_input_map;
if (pipeline->gs_kernel == NO_KERNEL)
fs_input_map = &vs_prog_data->base.vue_map;
else
fs_input_map = &gs_prog_data->base.vue_map;
struct GENX(3DSTATE_SBE) sbe = {
GENX(3DSTATE_SBE_header),
.AttributeSwizzleEnable = true,
.PointSpriteTextureCoordinateOrigin = UPPERLEFT,
.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
.ConstantInterpolationEnable = wm_prog_data->flat_inputs,
#if GEN_GEN >= 9
.Attribute0ActiveComponentFormat = ACF_XYZW,
.Attribute1ActiveComponentFormat = ACF_XYZW,
.Attribute2ActiveComponentFormat = ACF_XYZW,
.Attribute3ActiveComponentFormat = ACF_XYZW,
.Attribute4ActiveComponentFormat = ACF_XYZW,
.Attribute5ActiveComponentFormat = ACF_XYZW,
.Attribute6ActiveComponentFormat = ACF_XYZW,
.Attribute7ActiveComponentFormat = ACF_XYZW,
.Attribute8ActiveComponentFormat = ACF_XYZW,
.Attribute9ActiveComponentFormat = ACF_XYZW,
.Attribute10ActiveComponentFormat = ACF_XYZW,
.Attribute11ActiveComponentFormat = ACF_XYZW,
.Attribute12ActiveComponentFormat = ACF_XYZW,
.Attribute13ActiveComponentFormat = ACF_XYZW,
.Attribute14ActiveComponentFormat = ACF_XYZW,
.Attribute15ActiveComponentFormat = ACF_XYZW,
/* wow, much field, very attribute */
.Attribute16ActiveComponentFormat = ACF_XYZW,
.Attribute17ActiveComponentFormat = ACF_XYZW,
.Attribute18ActiveComponentFormat = ACF_XYZW,
.Attribute19ActiveComponentFormat = ACF_XYZW,
.Attribute20ActiveComponentFormat = ACF_XYZW,
.Attribute21ActiveComponentFormat = ACF_XYZW,
.Attribute22ActiveComponentFormat = ACF_XYZW,
.Attribute23ActiveComponentFormat = ACF_XYZW,
.Attribute24ActiveComponentFormat = ACF_XYZW,
.Attribute25ActiveComponentFormat = ACF_XYZW,
.Attribute26ActiveComponentFormat = ACF_XYZW,
.Attribute27ActiveComponentFormat = ACF_XYZW,
.Attribute28ActiveComponentFormat = ACF_XYZW,
.Attribute29ActiveComponentFormat = ACF_XYZW,
.Attribute28ActiveComponentFormat = ACF_XYZW,
.Attribute29ActiveComponentFormat = ACF_XYZW,
.Attribute30ActiveComponentFormat = ACF_XYZW,
#endif
};
#if GEN_GEN >= 8
/* On Broadwell, they broke 3DSTATE_SBE into two packets */
struct GENX(3DSTATE_SBE_SWIZ) swiz = {
GENX(3DSTATE_SBE_SWIZ_header),
};
#else
# define swiz sbe
#endif
int max_source_attr = 0;
for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
int input_index = wm_prog_data->urb_setup[attr];
if (input_index < 0)
continue;
const int slot = fs_input_map->varying_to_slot[attr];
if (input_index >= 16)
continue;
if (slot == -1) {
/* This attribute does not exist in the VUE--that means that the
* vertex shader did not write to it. It could be that it's a
* regular varying read by the fragment shader but not written by
* the vertex shader or it's gl_PrimitiveID. In the first case the
* value is undefined, in the second it needs to be
* gl_PrimitiveID.
*/
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
swiz.Attribute[input_index].ComponentOverrideX = true;
swiz.Attribute[input_index].ComponentOverrideY = true;
swiz.Attribute[input_index].ComponentOverrideZ = true;
swiz.Attribute[input_index].ComponentOverrideW = true;
} else {
assert(slot >= 2);
const int source_attr = slot - 2;
max_source_attr = MAX2(max_source_attr, source_attr);
/* We have to subtract two slots to accout for the URB entry output
* read offset in the VS and GS stages.
*/
swiz.Attribute[input_index].SourceAttribute = source_attr;
}
}
sbe.VertexURBEntryReadOffset = 1; /* Skip the VUE header and position slots */
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch,
GENX(3DSTATE_SBE_length));
GENX(3DSTATE_SBE_pack)(&pipeline->batch, dw, &sbe);
#if GEN_GEN >= 8
dw = anv_batch_emit_dwords(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ_length));
GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz);
#endif
}
static inline uint32_t
scratch_space(const struct brw_stage_prog_data *prog_data)
{
return ffs(prog_data->total_scratch / 2048);
}
static const uint32_t vk_to_gen_cullmode[] = {
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
};
static const uint32_t vk_to_gen_fillmode[] = {
[VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
[VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
[VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
};
static const uint32_t vk_to_gen_front_face[] = {
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
[VK_FRONT_FACE_CLOCKWISE] = 0
};
static const uint32_t vk_to_gen_logic_op[] = {
[VK_LOGIC_OP_COPY] = LOGICOP_COPY,
[VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
[VK_LOGIC_OP_AND] = LOGICOP_AND,
[VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
[VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
[VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
[VK_LOGIC_OP_XOR] = LOGICOP_XOR,
[VK_LOGIC_OP_OR] = LOGICOP_OR,
[VK_LOGIC_OP_NOR] = LOGICOP_NOR,
[VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
[VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
[VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
[VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
[VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
[VK_LOGIC_OP_NAND] = LOGICOP_NAND,
[VK_LOGIC_OP_SET] = LOGICOP_SET,
};
static const uint32_t vk_to_gen_blend[] = {
[VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
[VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
[VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
[VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
[VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
[VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
[VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
[VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
[VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
[VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
[VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
};
static const uint32_t vk_to_gen_blend_op[] = {
[VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
[VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
[VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
[VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
[VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
};
static const uint32_t vk_to_gen_compare_op[] = {
[VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
[VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
[VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL,
[VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL,
[VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
};
static const uint32_t vk_to_gen_stencil_op[] = {
[VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
[VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
[VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
[VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
[VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
[VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
[VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
[VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
};

View file

@ -0,0 +1,270 @@
/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
VkResult
genX(init_device_state)(struct anv_device *device)
{
GENX(MEMORY_OBJECT_CONTROL_STATE_pack)(NULL, &device->default_mocs,
&GENX(MOCS));
struct anv_batch batch;
uint32_t cmds[64];
batch.start = batch.next = cmds;
batch.end = (void *) cmds + sizeof(cmds);
anv_batch_emit(&batch, GENX(PIPELINE_SELECT),
#if GEN_GEN >= 9
.MaskBits = 3,
#endif
.PipelineSelection = _3D);
anv_batch_emit(&batch, GENX(3DSTATE_VF_STATISTICS),
.StatisticsEnable = true);
anv_batch_emit(&batch, GENX(3DSTATE_HS));
anv_batch_emit(&batch, GENX(3DSTATE_TE));
anv_batch_emit(&batch, GENX(3DSTATE_DS));
anv_batch_emit(&batch, GENX(3DSTATE_STREAMOUT), .SOFunctionEnable = false);
anv_batch_emit(&batch, GENX(3DSTATE_AA_LINE_PARAMETERS));
#if GEN_GEN >= 8
anv_batch_emit(&batch, GENX(3DSTATE_WM_CHROMAKEY),
.ChromaKeyKillEnable = false);
/* See the Vulkan 1.0 spec Table 24.1 "Standard sample locations" and
* VkPhysicalDeviceFeatures::standardSampleLocations.
*/
anv_batch_emit(&batch, GENX(3DSTATE_SAMPLE_PATTERN),
._1xSample0XOffset = 0.5,
._1xSample0YOffset = 0.5,
._2xSample0XOffset = 0.25,
._2xSample0YOffset = 0.25,
._2xSample1XOffset = 0.75,
._2xSample1YOffset = 0.75,
._4xSample0XOffset = 0.375,
._4xSample0YOffset = 0.125,
._4xSample1XOffset = 0.875,
._4xSample1YOffset = 0.375,
._4xSample2XOffset = 0.125,
._4xSample2YOffset = 0.625,
._4xSample3XOffset = 0.625,
._4xSample3YOffset = 0.875,
._8xSample0XOffset = 0.5625,
._8xSample0YOffset = 0.3125,
._8xSample1XOffset = 0.4375,
._8xSample1YOffset = 0.6875,
._8xSample2XOffset = 0.8125,
._8xSample2YOffset = 0.5625,
._8xSample3XOffset = 0.3125,
._8xSample3YOffset = 0.1875,
._8xSample4XOffset = 0.1875,
._8xSample4YOffset = 0.8125,
._8xSample5XOffset = 0.0625,
._8xSample5YOffset = 0.4375,
._8xSample6XOffset = 0.6875,
._8xSample6YOffset = 0.9375,
._8xSample7XOffset = 0.9375,
._8xSample7YOffset = 0.0625,
#if GEN_GEN >= 9
._16xSample0XOffset = 0.5625,
._16xSample0YOffset = 0.5625,
._16xSample1XOffset = 0.4375,
._16xSample1YOffset = 0.3125,
._16xSample2XOffset = 0.3125,
._16xSample2YOffset = 0.6250,
._16xSample3XOffset = 0.7500,
._16xSample3YOffset = 0.4375,
._16xSample4XOffset = 0.1875,
._16xSample4YOffset = 0.3750,
._16xSample5XOffset = 0.6250,
._16xSample5YOffset = 0.8125,
._16xSample6XOffset = 0.8125,
._16xSample6YOffset = 0.6875,
._16xSample7XOffset = 0.6875,
._16xSample7YOffset = 0.1875,
._16xSample8XOffset = 0.3750,
._16xSample8YOffset = 0.8750,
._16xSample9XOffset = 0.5000,
._16xSample9YOffset = 0.0625,
._16xSample10XOffset = 0.2500,
._16xSample10YOffset = 0.1250,
._16xSample11XOffset = 0.1250,
._16xSample11YOffset = 0.7500,
._16xSample12XOffset = 0.0000,
._16xSample12YOffset = 0.5000,
._16xSample13XOffset = 0.9375,
._16xSample13YOffset = 0.2500,
._16xSample14XOffset = 0.8750,
._16xSample14YOffset = 0.9375,
._16xSample15XOffset = 0.0625,
._16xSample15YOffset = 0.0000,
#endif
);
#endif
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END));
assert(batch.next <= batch.end);
return anv_device_submit_simple_batch(device, &batch);
}
static inline uint32_t
vk_to_gen_tex_filter(VkFilter filter, bool anisotropyEnable)
{
switch (filter) {
default:
assert(!"Invalid filter");
case VK_FILTER_NEAREST:
return MAPFILTER_NEAREST;
case VK_FILTER_LINEAR:
return anisotropyEnable ? MAPFILTER_ANISOTROPIC : MAPFILTER_LINEAR;
}
}
static inline uint32_t
vk_to_gen_max_anisotropy(float ratio)
{
return (anv_clamp_f(ratio, 2, 16) - 2) / 2;
}
static const uint32_t vk_to_gen_mipmap_mode[] = {
[VK_SAMPLER_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
[VK_SAMPLER_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
};
static const uint32_t vk_to_gen_tex_address[] = {
[VK_SAMPLER_ADDRESS_MODE_REPEAT] = TCM_WRAP,
[VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT] = TCM_MIRROR,
[VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE] = TCM_CLAMP,
[VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,
[VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,
};
/* Vulkan specifies the result of shadow comparisons as:
* 1 if ref <op> texel,
* 0 otherwise.
*
* The hardware does:
* 0 if texel <op> ref,
* 1 otherwise.
*
* So, these look a bit strange because there's both a negation
* and swapping of the arguments involved.
*/
static const uint32_t vk_to_gen_shadow_compare_op[] = {
[VK_COMPARE_OP_NEVER] = PREFILTEROPALWAYS,
[VK_COMPARE_OP_LESS] = PREFILTEROPLEQUAL,
[VK_COMPARE_OP_EQUAL] = PREFILTEROPNOTEQUAL,
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLESS,
[VK_COMPARE_OP_GREATER] = PREFILTEROPGEQUAL,
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPEQUAL,
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGREATER,
[VK_COMPARE_OP_ALWAYS] = PREFILTEROPNEVER,
};
VkResult genX(CreateSampler)(
VkDevice _device,
const VkSamplerCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkSampler* pSampler)
{
ANV_FROM_HANDLE(anv_device, device, _device);
struct anv_sampler *sampler;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
sampler = anv_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!sampler)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
uint32_t border_color_offset = device->border_colors.offset +
pCreateInfo->borderColor * 64;
struct GENX(SAMPLER_STATE) sampler_state = {
.SamplerDisable = false,
.TextureBorderColorMode = DX10OGL,
#if GEN_GEN >= 8
.LODPreClampMode = CLAMP_MODE_OGL,
#else
.LODPreClampEnable = CLAMP_ENABLE_OGL,
#endif
#if GEN_GEN == 8
.BaseMipLevel = 0.0,
#endif
.MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipmapMode],
.MagModeFilter = vk_to_gen_tex_filter(pCreateInfo->magFilter,
pCreateInfo->anisotropyEnable),
.MinModeFilter = vk_to_gen_tex_filter(pCreateInfo->minFilter,
pCreateInfo->anisotropyEnable),
.TextureLODBias = anv_clamp_f(pCreateInfo->mipLodBias, -16, 15.996),
.AnisotropicAlgorithm = EWAApproximation,
.MinLOD = anv_clamp_f(pCreateInfo->minLod, 0, 14),
.MaxLOD = anv_clamp_f(pCreateInfo->maxLod, 0, 14),
.ChromaKeyEnable = 0,
.ChromaKeyIndex = 0,
.ChromaKeyMode = 0,
.ShadowFunction = vk_to_gen_shadow_compare_op[pCreateInfo->compareOp],
.CubeSurfaceControlMode = OVERRIDE,
.BorderColorPointer = border_color_offset,
#if GEN_GEN >= 8
.LODClampMagnificationMode = MIPNONE,
#endif
.MaximumAnisotropy = vk_to_gen_max_anisotropy(pCreateInfo->maxAnisotropy),
.RAddressMinFilterRoundingEnable = 0,
.RAddressMagFilterRoundingEnable = 0,
.VAddressMinFilterRoundingEnable = 0,
.VAddressMagFilterRoundingEnable = 0,
.UAddressMinFilterRoundingEnable = 0,
.UAddressMagFilterRoundingEnable = 0,
.TrilinearFilterQuality = 0,
.NonnormalizedCoordinateEnable = pCreateInfo->unnormalizedCoordinates,
.TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeU],
.TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeV],
.TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressModeW],
};
GENX(SAMPLER_STATE_pack)(NULL, sampler->state, &sampler_state);
*pSampler = anv_sampler_to_handle(sampler);
return VK_SUCCESS;
}

View file

@ -0,0 +1,7 @@
{
"file_format_version": "1.0.0",
"ICD": {
"library_path": "libvulkan_intel.so",
"abi_versions": "1.0.3"
}
}

5
src/intel/vulkan/tests/.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
block_pool
block_pool_no_free
state_pool
state_pool_free_list_only
state_pool_no_free

Some files were not shown because too many files have changed in this diff Show more