mesa/src/intel/vulkan/genX_pipeline.c

1928 lines
74 KiB
C
Raw Normal View History

/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_private.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_pack.h"
#include "common/gen_l3_config.h"
#include "common/gen_sample_positions.h"
#include "vk_util.h"
#include "vk_format_info.h"
static uint32_t
vertex_element_comp_control(enum isl_format format, unsigned comp)
{
uint8_t bits;
switch (comp) {
case 0: bits = isl_format_layouts[format].channels.r.bits; break;
case 1: bits = isl_format_layouts[format].channels.g.bits; break;
case 2: bits = isl_format_layouts[format].channels.b.bits; break;
case 3: bits = isl_format_layouts[format].channels.a.bits; break;
default: unreachable("Invalid component");
}
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
/*
* Take in account hardware restrictions when dealing with 64-bit floats.
*
* From Broadwell spec, command reference structures, page 586:
* "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
* 64-bit components are stored * in the URB without any conversion. In
* this case, vertex elements must be written as 128 or 256 bits, with
* VFCOMP_STORE_0 being used to pad the output as required. E.g., if
* R64_PASSTHRU is used to copy a 64-bit Red component into the URB,
* Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3
* set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or
* Components 1-3 must be specified as VFCOMP_STORE_0 in order to output
* a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires
* Component 3 to be specified as VFCOMP_STORE_0 in order to output a
* 256-bit vertex element."
*/
if (bits) {
return VFCOMP_STORE_SRC;
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
} else if (comp >= 2 &&
!isl_format_layouts[format].channels.b.bits &&
isl_format_layouts[format].channels.r.type == ISL_RAW) {
/* When emitting 64-bit attributes, we need to write either 128 or 256
* bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and
* VFCOMP_STORE_0 to pad the written chunk */
return VFCOMP_NOSTORE;
} else if (comp < 3 ||
isl_format_layouts[format].channels.r.type == ISL_RAW) {
/* Note we need to pad with value 0, not 1, due hardware restrictions
* (see comment above) */
return VFCOMP_STORE_0;
} else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
isl_format_layouts[format].channels.r.type == ISL_SINT) {
assert(comp == 3);
return VFCOMP_STORE_1_INT;
} else {
assert(comp == 3);
return VFCOMP_STORE_1_FP;
}
}
static void
emit_vertex_input(struct anv_pipeline *pipeline,
const VkPipelineVertexInputStateCreateInfo *info)
{
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
/* Pull inputs_read out of the VS prog data */
const uint64_t inputs_read = vs_prog_data->inputs_read;
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
const uint64_t double_inputs_read = vs_prog_data->double_inputs_read;
assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
vs_prog_data->uses_instanceid ||
vs_prog_data->uses_basevertex ||
vs_prog_data->uses_baseinstance;
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
uint32_t elem_count = __builtin_popcount(elements) -
__builtin_popcount(elements_double) / 2;
const uint32_t total_elems =
elem_count + needs_svgs_elem + vs_prog_data->uses_drawid;
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
if (total_elems == 0)
return;
uint32_t *p;
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
const uint32_t num_dwords = 1 + total_elems * 2;
p = anv_batch_emitn(&pipeline->batch, num_dwords,
GENX(3DSTATE_VERTEX_ELEMENTS));
if (!p)
return;
memset(p + 1, 0, (num_dwords - 1) * 4);
for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
const VkVertexInputAttributeDescription *desc =
&info->pVertexAttributeDescriptions[i];
enum isl_format format = anv_get_isl_format(&pipeline->device->info,
desc->format,
VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_TILING_LINEAR);
assert(desc->binding < MAX_VBS);
if ((elements & (1 << desc->location)) == 0)
continue; /* Binding unused */
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
uint32_t slot =
__builtin_popcount(elements & ((1 << desc->location) - 1)) -
DIV_ROUND_UP(__builtin_popcount(elements_double &
((1 << desc->location) -1)), 2);
struct GENX(VERTEX_ELEMENT_STATE) element = {
.VertexBufferIndex = desc->binding,
.Valid = true,
.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) format,
.EdgeFlagEnable = false,
.SourceElementOffset = desc->offset,
.Component0Control = vertex_element_comp_control(format, 0),
.Component1Control = vertex_element_comp_control(format, 1),
.Component2Control = vertex_element_comp_control(format, 2),
.Component3Control = vertex_element_comp_control(format, 3),
};
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
#if GEN_GEN >= 8
/* On Broadwell and later, we have a separate VF_INSTANCING packet
* that controls instancing. On Haswell and prior, that's part of
* VERTEX_BUFFER_STATE which we emit later.
*/
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.InstancingEnable = pipeline->instancing_enable[desc->binding];
vfi.VertexElementIndex = slot;
/* Our implementation of VK_KHX_multiview uses instancing to draw
* the different views. If the client asks for instancing, we
* need to use the Instance Data Step Rate to ensure that we
* repeat the client's per-instance data once for each view.
*/
vfi.InstanceDataStepRate = anv_subpass_view_count(pipeline->subpass);
}
#endif
}
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes So far, input_reads was a bitmap tracking which vertex input locations were being used. In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4) consumes just one location, any other small attribute. So we mark the proper bit in inputs_read, and also the same bit in double_inputs_read if the attribute is a dvec3/dvec4. But in Vulkan, this is slightly different: a dvec3/dvec4 attribute consumes two locations, not just one. And hence two bits would be marked in inputs_read for the same vertex input attribute. To avoid handling two different situations in NIR, we just choose the latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4 vertex input attribute is marked with two bits in the inputs_read bitmap (and also in the double_inputs_read), and following attributes are adjusted accordingly. As example, if in our GLSL/IR shader we have three attributes: layout(location = 0) vec3 attr0; layout(location = 1) dvec4 attr1; layout(location = 2) dvec3 attr2; then in our NIR shader we put attr0 in location 0, attr1 in locations 1 and 2, and attr2 in location 3 and 4. Checking carefully, basically we are using slots rather than locations in NIR. When emitting the vertices, we do a inverse map to know the corresponding location for each slot. v2 (Jason): - use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR. v3 (Jason): - Fix commit log error. - Use ladder ifs and fix braces. - elements_double is divisible by 2, don't need DIV_ROUND_UP(). - Use if ladder instead of a switch. - Add comment about hardware restriction in 64bit vertex attributes. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
const uint32_t id_slot = elem_count;
if (needs_svgs_elem) {
/* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
* "Within a VERTEX_ELEMENT_STATE structure, if a Component
* Control field is set to something other than VFCOMP_STORE_SRC,
* no higher-numbered Component Control fields may be set to
* VFCOMP_STORE_SRC"
*
* This means, that if we have BaseInstance, we need BaseVertex as
* well. Just do all or nothing.
*/
uint32_t base_ctrl = (vs_prog_data->uses_basevertex ||
vs_prog_data->uses_baseinstance) ?
VFCOMP_STORE_SRC : VFCOMP_STORE_0;
struct GENX(VERTEX_ELEMENT_STATE) element = {
.VertexBufferIndex = ANV_SVGS_VB_INDEX,
.Valid = true,
.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT,
.Component0Control = base_ctrl,
.Component1Control = base_ctrl,
#if GEN_GEN >= 8
.Component2Control = VFCOMP_STORE_0,
.Component3Control = VFCOMP_STORE_0,
#else
.Component2Control = VFCOMP_STORE_VID,
.Component3Control = VFCOMP_STORE_IID,
#endif
};
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
}
#if GEN_GEN >= 8
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), sgvs) {
sgvs.VertexIDEnable = vs_prog_data->uses_vertexid;
sgvs.VertexIDComponentNumber = 2;
sgvs.VertexIDElementOffset = id_slot;
sgvs.InstanceIDEnable = vs_prog_data->uses_instanceid;
sgvs.InstanceIDComponentNumber = 3;
sgvs.InstanceIDElementOffset = id_slot;
}
#endif
const uint32_t drawid_slot = elem_count + needs_svgs_elem;
if (vs_prog_data->uses_drawid) {
struct GENX(VERTEX_ELEMENT_STATE) element = {
.VertexBufferIndex = ANV_DRAWID_VB_INDEX,
.Valid = true,
.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32_UINT,
.Component0Control = VFCOMP_STORE_SRC,
.Component1Control = VFCOMP_STORE_0,
.Component2Control = VFCOMP_STORE_0,
.Component3Control = VFCOMP_STORE_0,
};
GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
&p[1 + drawid_slot * 2],
&element);
#if GEN_GEN >= 8
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
vfi.VertexElementIndex = drawid_slot;
}
#endif
}
}
void
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
const struct gen_l3_config *l3_config,
VkShaderStageFlags active_stages,
const unsigned entry_size[4])
{
const struct gen_device_info *devinfo = &device->info;
#if GEN_IS_HASWELL
const unsigned push_constant_kb = devinfo->gt == 3 ? 32 : 16;
#else
const unsigned push_constant_kb = GEN_GEN >= 8 ? 32 : 16;
#endif
const unsigned urb_size_kb = gen_get_l3_config_urb_size(devinfo, l3_config);
unsigned entries[4];
unsigned start[4];
gen_get_urb_config(devinfo,
1024 * push_constant_kb, 1024 * urb_size_kb,
active_stages &
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
entry_size, entries, start);
#if GEN_GEN == 7 && !GEN_IS_HASWELL
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
*
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
* needs to be sent before any combination of VS associated 3DSTATE."
*/
anv_batch_emit(batch, GEN7_PIPE_CONTROL, pc) {
pc.DepthStallEnable = true;
pc.PostSyncOperation = WriteImmediateData;
pc.Address = (struct anv_address) { &device->workaround_bo, 0 };
}
#endif
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = start[i];
urb.VSURBEntryAllocationSize = entry_size[i] - 1;
urb.VSNumberofURBEntries = entries[i];
}
}
}
static void
emit_urb_setup(struct anv_pipeline *pipeline)
{
unsigned entry_size[4];
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
const struct brw_vue_prog_data *prog_data =
!anv_pipeline_has_stage(pipeline, i) ? NULL :
(const struct brw_vue_prog_data *) pipeline->shaders[i]->prog_data;
entry_size[i] = prog_data ? prog_data->urb_entry_size : 1;
}
genX(emit_urb_setup)(pipeline->device, &pipeline->batch,
pipeline->urb.l3_config,
pipeline->active_stages, entry_size);
}
static void
emit_3dstate_sbe(struct anv_pipeline *pipeline)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), sbe);
#if GEN_GEN >= 8
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ), sbe);
#endif
return;
}
const struct brw_vue_map *fs_input_map =
&anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
struct GENX(3DSTATE_SBE) sbe = {
GENX(3DSTATE_SBE_header),
.AttributeSwizzleEnable = true,
.PointSpriteTextureCoordinateOrigin = UPPERLEFT,
.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
.ConstantInterpolationEnable = wm_prog_data->flat_inputs,
};
#if GEN_GEN >= 9
for (unsigned i = 0; i < 32; i++)
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
#endif
#if GEN_GEN >= 8
/* On Broadwell, they broke 3DSTATE_SBE into two packets */
struct GENX(3DSTATE_SBE_SWIZ) swiz = {
GENX(3DSTATE_SBE_SWIZ_header),
};
#else
# define swiz sbe
#endif
/* Skip the VUE header and position slots by default */
unsigned urb_entry_read_offset = 1;
int max_source_attr = 0;
for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
int input_index = wm_prog_data->urb_setup[attr];
if (input_index < 0)
continue;
/* gl_Layer is stored in the VUE header */
if (attr == VARYING_SLOT_LAYER) {
urb_entry_read_offset = 0;
continue;
}
if (attr == VARYING_SLOT_PNTC) {
sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
continue;
}
const int slot = fs_input_map->varying_to_slot[attr];
if (input_index >= 16)
continue;
if (slot == -1) {
/* This attribute does not exist in the VUE--that means that the
* vertex shader did not write to it. It could be that it's a
* regular varying read by the fragment shader but not written by
* the vertex shader or it's gl_PrimitiveID. In the first case the
* value is undefined, in the second it needs to be
* gl_PrimitiveID.
*/
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
swiz.Attribute[input_index].ComponentOverrideX = true;
swiz.Attribute[input_index].ComponentOverrideY = true;
swiz.Attribute[input_index].ComponentOverrideZ = true;
swiz.Attribute[input_index].ComponentOverrideW = true;
} else {
/* We have to subtract two slots to accout for the URB entry output
* read offset in the VS and GS stages.
*/
const int source_attr = slot - 2 * urb_entry_read_offset;
assert(source_attr >= 0 && source_attr < 32);
max_source_attr = MAX2(max_source_attr, source_attr);
swiz.Attribute[input_index].SourceAttribute = source_attr;
}
}
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
#if GEN_GEN >= 8
sbe.ForceVertexURBEntryReadOffset = true;
sbe.ForceVertexURBEntryReadLength = true;
#endif
uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch,
GENX(3DSTATE_SBE_length));
if (!dw)
return;
GENX(3DSTATE_SBE_pack)(&pipeline->batch, dw, &sbe);
#if GEN_GEN >= 8
dw = anv_batch_emit_dwords(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ_length));
if (!dw)
return;
GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz);
#endif
}
static const uint32_t vk_to_gen_cullmode[] = {
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
};
static const uint32_t vk_to_gen_fillmode[] = {
[VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
[VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
[VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
};
static const uint32_t vk_to_gen_front_face[] = {
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
[VK_FRONT_FACE_CLOCKWISE] = 0
};
static void
emit_rs_state(struct anv_pipeline *pipeline,
const VkPipelineRasterizationStateCreateInfo *rs_info,
const VkPipelineMultisampleStateCreateInfo *ms_info,
const struct anv_render_pass *pass,
const struct anv_subpass *subpass)
{
struct GENX(3DSTATE_SF) sf = {
GENX(3DSTATE_SF_header),
};
sf.ViewportTransformEnable = true;
sf.StatisticsEnable = true;
sf.TriangleStripListProvokingVertexSelect = 0;
sf.LineStripListProvokingVertexSelect = 0;
sf.TriangleFanProvokingVertexSelect = 1;
anv: Default PointSize to 1.0 if not written by the shader The Vulkan rules for point size are a bit whacky. If you only have a vertex shader and you use points, then you must write PointSize in your vertex shader. If you have a geometry or tessellation shader, then it's dependent on the shaderTessellationAndGeometryPointSize device feature. From the Vulkan 1.0.38 specification: "shaderTessellationAndGeometryPointSize indicates whether the PointSize built-in decoration is available in the tessellation control, tessellation evaluation, and geometry shader stages. If this feature is not enabled, members decorated with the PointSize built-in decoration must not be read from or written to and all points written from a tessellation or geometry shader will have a size of 1.0. This also indicates whether shader modules can declare the TessellationPointSize capability for tessellation control and evaluation shaders, or if the shader modules can declare the GeometryPointSize capability for geometry shaders. An implementation supporting this feature must also support one or both of the tessellationShader or geometryShader features." In other words, if the feature is disbled (the client can disable features!) then they don't write PointSize and we provide a 1.0 default but if the feature is enabled, they do write PointSize and we use the one they wrote in the shader. There are at least two valid ways we can implement this: 1) Track whether or not shaderTessellationAndGeometryPointSize is enabled and set the 3DSTATE_SF bits based on that and what stages are enabled, ignoring the shader source. 2) Just look at the last geometry stage VUE map and see if they wrote PointSize and set the 3DSTATE_SF accordingly. The second solution is the easiest and the most robust against invalid usage of the Vulkan API, so we choose to go with that one. This fixes all of the dEQP-VK.tessellation.primitive_discard.*point_mode tests. The tests are also broken because they unconditionally enable shaderTessellationAndGeometryPointSize if it's supported by the implementation and then don't write PointSize in the evaluation shader. However, since this is the "robust against invalid API usage" solution, the tests happily pass. :-) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-13 09:30:13 -08:00
const struct brw_vue_prog_data *last_vue_prog_data =
anv_pipeline_get_last_vue_prog_data(pipeline);
if (last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
sf.PointWidthSource = Vertex;
} else {
sf.PointWidthSource = State;
sf.PointWidth = 1.0;
}
#if GEN_GEN >= 8
struct GENX(3DSTATE_RASTER) raster = {
GENX(3DSTATE_RASTER_header),
};
#else
# define raster sf
#endif
/* For details on 3DSTATE_RASTER multisample state, see the BSpec table
* "Multisample Modes State".
*/
#if GEN_GEN >= 8
raster.DXMultisampleRasterizationEnable = true;
/* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
* computations. If we ever set this bit to a different value, they will
* need to be updated accordingly.
*/
raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
raster.ForceMultisampling = false;
#else
raster.MultisampleRasterizationMode =
(ms_info && ms_info->rasterizationSamples > 1) ?
MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
#endif
raster.FrontWinding = vk_to_gen_front_face[rs_info->frontFace];
raster.CullMode = vk_to_gen_cullmode[rs_info->cullMode];
raster.FrontFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode];
raster.BackFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode];
raster.ScissorRectangleEnable = true;
#if GEN_GEN >= 9
/* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
raster.ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable;
raster.ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable;
#elif GEN_GEN >= 8
raster.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
#endif
raster.GlobalDepthOffsetEnableSolid = rs_info->depthBiasEnable;
raster.GlobalDepthOffsetEnableWireframe = rs_info->depthBiasEnable;
raster.GlobalDepthOffsetEnablePoint = rs_info->depthBiasEnable;
#if GEN_GEN == 7
/* Gen7 requires that we provide the depth format in 3DSTATE_SF so that it
* can get the depth offsets correct.
*/
if (subpass->depth_stencil_attachment.attachment < pass->attachment_count) {
VkFormat vk_format =
pass->attachments[subpass->depth_stencil_attachment.attachment].format;
assert(vk_format_is_depth_or_stencil(vk_format));
if (vk_format_aspects(vk_format) & VK_IMAGE_ASPECT_DEPTH_BIT) {
enum isl_format isl_format =
anv_get_isl_format(&pipeline->device->info, vk_format,
VK_IMAGE_ASPECT_DEPTH_BIT,
VK_IMAGE_TILING_OPTIMAL);
sf.DepthBufferSurfaceFormat =
isl_format_get_depth_format(isl_format, false);
}
}
#endif
#if GEN_GEN >= 8
GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf);
GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster);
#else
# undef raster
GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf);
#endif
}
static void
emit_ms_state(struct anv_pipeline *pipeline,
const VkPipelineMultisampleStateCreateInfo *info)
{
uint32_t samples = 1;
uint32_t log2_samples = 0;
/* From the Vulkan 1.0 spec:
* If pSampleMask is NULL, it is treated as if the mask has all bits
* enabled, i.e. no coverage is removed from fragments.
*
* 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
*/
#if GEN_GEN >= 8
uint32_t sample_mask = 0xffff;
#else
uint32_t sample_mask = 0xff;
#endif
if (info) {
samples = info->rasterizationSamples;
log2_samples = __builtin_ffs(samples) - 1;
}
if (info && info->pSampleMask)
sample_mask &= info->pSampleMask[0];
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
ms.NumberofMultisamples = log2_samples;
ms.PixelLocation = CENTER;
#if GEN_GEN >= 8
/* The PRM says that this bit is valid only for DX9:
*
* SW can choose to set this bit only for DX9 API. DX10/OGL API's
* should not have any effect by setting or not setting this bit.
*/
ms.PixelPositionOffsetEnable = false;
#else
switch (samples) {
case 1:
GEN_SAMPLE_POS_1X(ms.Sample);
break;
case 2:
GEN_SAMPLE_POS_2X(ms.Sample);
break;
case 4:
GEN_SAMPLE_POS_4X(ms.Sample);
break;
case 8:
GEN_SAMPLE_POS_8X(ms.Sample);
break;
default:
break;
}
#endif
}
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
sm.SampleMask = sample_mask;
}
}
static const uint32_t vk_to_gen_logic_op[] = {
[VK_LOGIC_OP_COPY] = LOGICOP_COPY,
[VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
[VK_LOGIC_OP_AND] = LOGICOP_AND,
[VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
[VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
[VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
[VK_LOGIC_OP_XOR] = LOGICOP_XOR,
[VK_LOGIC_OP_OR] = LOGICOP_OR,
[VK_LOGIC_OP_NOR] = LOGICOP_NOR,
[VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
[VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
[VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
[VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
[VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
[VK_LOGIC_OP_NAND] = LOGICOP_NAND,
[VK_LOGIC_OP_SET] = LOGICOP_SET,
};
static const uint32_t vk_to_gen_blend[] = {
[VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
[VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
[VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
[VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
[VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
[VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
[VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
[VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
[VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
[VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
[VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
};
static const uint32_t vk_to_gen_blend_op[] = {
[VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
[VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
[VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
[VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
[VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
};
static const uint32_t vk_to_gen_compare_op[] = {
[VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
[VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
[VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL,
[VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL,
[VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
};
static const uint32_t vk_to_gen_stencil_op[] = {
[VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
[VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
[VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
[VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
[VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
[VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
[VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
[VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
};
/* This function sanitizes the VkStencilOpState by looking at the compare ops
* and trying to determine whether or not a given stencil op can ever actually
* occur. Stencil ops which can never occur are set to VK_STENCIL_OP_KEEP.
* This function returns true if, after sanitation, any of the stencil ops are
* set to something other than VK_STENCIL_OP_KEEP.
*/
static bool
sanitize_stencil_face(VkStencilOpState *face,
VkCompareOp depthCompareOp)
{
/* If compareOp is ALWAYS then the stencil test will never fail and failOp
* will never happen. Set failOp to KEEP in this case.
*/
if (face->compareOp == VK_COMPARE_OP_ALWAYS)
face->failOp = VK_STENCIL_OP_KEEP;
/* If compareOp is NEVER or depthCompareOp is NEVER then one of the depth
* or stencil tests will fail and passOp will never happen.
*/
if (face->compareOp == VK_COMPARE_OP_NEVER ||
depthCompareOp == VK_COMPARE_OP_NEVER)
face->passOp = VK_STENCIL_OP_KEEP;
/* If compareOp is NEVER or depthCompareOp is ALWAYS then either the
* stencil test will fail or the depth test will pass. In either case,
* depthFailOp will never happen.
*/
if (face->compareOp == VK_COMPARE_OP_NEVER ||
depthCompareOp == VK_COMPARE_OP_ALWAYS)
face->depthFailOp = VK_STENCIL_OP_KEEP;
return face->failOp != VK_STENCIL_OP_KEEP ||
face->depthFailOp != VK_STENCIL_OP_KEEP ||
face->passOp != VK_STENCIL_OP_KEEP;
}
/* Intel hardware is fairly sensitive to whether or not depth/stencil writes
* are enabled. In the presence of discards, it's fairly easy to get into the
* non-promoted case which means a fairly big performance hit. From the Iron
* Lake PRM, Vol 2, pt. 1, section 8.4.3.2, "Early Depth Test Cases":
*
* "Non-promoted depth (N) is active whenever the depth test can be done
* early but it cannot determine whether or not to write source depth to
* the depth buffer, therefore the depth write must be performed post pixel
* shader. This includes cases where the pixel shader can kill pixels,
* including via sampler chroma key, as well as cases where the alpha test
* function is enabled, which kills pixels based on a programmable alpha
* test. In this case, even if the depth test fails, the pixel cannot be
* killed if a stencil write is indicated. Whether or not the stencil write
* happens depends on whether or not the pixel is killed later. In these
* cases if stencil test fails and stencil writes are off, the pixels can
* also be killed early. If stencil writes are enabled, the pixels must be
* treated as Computed depth (described above)."
*
* The same thing as mentioned in the stencil case can happen in the depth
* case as well if it thinks it writes depth but, thanks to the depth test
* being GL_EQUAL, the write doesn't actually matter. A little extra work
* up-front to try and disable depth and stencil writes can make a big
* difference.
*
* Unfortunately, the way depth and stencil testing is specified, there are
* many case where, regardless of depth/stencil writes being enabled, nothing
* actually gets written due to some other bit of state being set. This
* function attempts to "sanitize" the depth stencil state and disable writes
* and sometimes even testing whenever possible.
*/
static void
sanitize_ds_state(VkPipelineDepthStencilStateCreateInfo *state,
bool *stencilWriteEnable,
VkImageAspectFlags ds_aspects)
{
*stencilWriteEnable = state->stencilTestEnable;
/* If the depth test is disabled, we won't be writing anything. */
if (!state->depthTestEnable)
state->depthWriteEnable = false;
/* The Vulkan spec requires that if either depth or stencil is not present,
* the pipeline is to act as if the test silently passes.
*/
if (!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
state->depthWriteEnable = false;
state->depthCompareOp = VK_COMPARE_OP_ALWAYS;
}
if (!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)) {
*stencilWriteEnable = false;
state->front.compareOp = VK_COMPARE_OP_ALWAYS;
state->back.compareOp = VK_COMPARE_OP_ALWAYS;
}
/* If the stencil test is enabled and always fails, then we will never get
* to the depth test so we can just disable the depth test entirely.
*/
if (state->stencilTestEnable &&
state->front.compareOp == VK_COMPARE_OP_NEVER &&
state->back.compareOp == VK_COMPARE_OP_NEVER) {
state->depthTestEnable = false;
state->depthWriteEnable = false;
}
/* If depthCompareOp is EQUAL then the value we would be writing to the
* depth buffer is the same as the value that's already there so there's no
* point in writing it.
*/
if (state->depthCompareOp == VK_COMPARE_OP_EQUAL)
state->depthWriteEnable = false;
/* If the stencil ops are such that we don't actually ever modify the
* stencil buffer, we should disable writes.
*/
if (!sanitize_stencil_face(&state->front, state->depthCompareOp) &&
!sanitize_stencil_face(&state->back, state->depthCompareOp))
*stencilWriteEnable = false;
/* If the depth test always passes and we never write out depth, that's the
* same as if the depth test is disabled entirely.
*/
if (state->depthCompareOp == VK_COMPARE_OP_ALWAYS &&
!state->depthWriteEnable)
state->depthTestEnable = false;
/* If the stencil test always passes and we never write out stencil, that's
* the same as if the stencil test is disabled entirely.
*/
if (state->front.compareOp == VK_COMPARE_OP_ALWAYS &&
state->back.compareOp == VK_COMPARE_OP_ALWAYS &&
!*stencilWriteEnable)
state->stencilTestEnable = false;
}
static void
emit_ds_state(struct anv_pipeline *pipeline,
const VkPipelineDepthStencilStateCreateInfo *pCreateInfo,
const struct anv_render_pass *pass,
const struct anv_subpass *subpass)
{
#if GEN_GEN == 7
# define depth_stencil_dw pipeline->gen7.depth_stencil_state
#elif GEN_GEN == 8
# define depth_stencil_dw pipeline->gen8.wm_depth_stencil
#else
# define depth_stencil_dw pipeline->gen9.wm_depth_stencil
#endif
if (pCreateInfo == NULL) {
/* We're going to OR this together with the dynamic state. We need
* to make sure it's initialized to something useful.
*/
pipeline->writes_stencil = false;
pipeline->stencil_test_enable = false;
pipeline->writes_depth = false;
pipeline->depth_test_enable = false;
memset(depth_stencil_dw, 0, sizeof(depth_stencil_dw));
return;
}
VkImageAspectFlags ds_aspects = 0;
if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
VkFormat depth_stencil_format =
pass->attachments[subpass->depth_stencil_attachment.attachment].format;
ds_aspects = vk_format_aspects(depth_stencil_format);
}
VkPipelineDepthStencilStateCreateInfo info = *pCreateInfo;
sanitize_ds_state(&info, &pipeline->writes_stencil, ds_aspects);
pipeline->stencil_test_enable = info.stencilTestEnable;
pipeline->writes_depth = info.depthWriteEnable;
pipeline->depth_test_enable = info.depthTestEnable;
/* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
#if GEN_GEN <= 7
struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
#else
struct GENX(3DSTATE_WM_DEPTH_STENCIL) depth_stencil = {
#endif
.DepthTestEnable = info.depthTestEnable,
.DepthBufferWriteEnable = info.depthWriteEnable,
.DepthTestFunction = vk_to_gen_compare_op[info.depthCompareOp],
.DoubleSidedStencilEnable = true,
.StencilTestEnable = info.stencilTestEnable,
.StencilFailOp = vk_to_gen_stencil_op[info.front.failOp],
.StencilPassDepthPassOp = vk_to_gen_stencil_op[info.front.passOp],
.StencilPassDepthFailOp = vk_to_gen_stencil_op[info.front.depthFailOp],
.StencilTestFunction = vk_to_gen_compare_op[info.front.compareOp],
.BackfaceStencilFailOp = vk_to_gen_stencil_op[info.back.failOp],
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info.back.passOp],
.BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info.back.depthFailOp],
.BackfaceStencilTestFunction = vk_to_gen_compare_op[info.back.compareOp],
};
#if GEN_GEN <= 7
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
#else
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, depth_stencil_dw, &depth_stencil);
#endif
}
static void
emit_cb_state(struct anv_pipeline *pipeline,
const VkPipelineColorBlendStateCreateInfo *info,
const VkPipelineMultisampleStateCreateInfo *ms_info)
{
struct anv_device *device = pipeline->device;
struct GENX(BLEND_STATE) blend_state = {
#if GEN_GEN >= 8
.AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
.AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
#endif
};
uint32_t surface_count = 0;
struct anv_pipeline_bind_map *map;
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
surface_count = map->surface_count;
}
const uint32_t num_dwords = GENX(BLEND_STATE_length) +
GENX(BLEND_STATE_ENTRY_length) * surface_count;
pipeline->blend_state =
anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
bool has_writeable_rt = false;
uint32_t *state_pos = pipeline->blend_state.map;
state_pos += GENX(BLEND_STATE_length);
#if GEN_GEN >= 8
struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
#endif
for (unsigned i = 0; i < surface_count; i++) {
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
/* All color attachments are at the beginning of the binding table */
if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
break;
/* We can have at most 8 attachments */
assert(i < 8);
if (info == NULL || binding->index >= info->attachmentCount) {
/* Default everything to disabled */
struct GENX(BLEND_STATE_ENTRY) entry = {
.WriteDisableAlpha = true,
.WriteDisableRed = true,
.WriteDisableGreen = true,
.WriteDisableBlue = true,
};
GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
state_pos += GENX(BLEND_STATE_ENTRY_length);
continue;
}
assert(binding->binding == 0);
const VkPipelineColorBlendAttachmentState *a =
&info->pAttachments[binding->index];
struct GENX(BLEND_STATE_ENTRY) entry = {
#if GEN_GEN < 8
.AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
.AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
#endif
.LogicOpEnable = info->logicOpEnable,
.LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
.ColorBufferBlendEnable = a->blendEnable,
.ColorClampRange = COLORCLAMP_RTFORMAT,
.PreBlendColorClampEnable = true,
.PostBlendColorClampEnable = true,
.SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
.DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
.ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
.SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
.DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
.AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
.WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
.WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
.WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
.WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
};
if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
a->colorBlendOp != a->alphaBlendOp) {
#if GEN_GEN >= 8
blend_state.IndependentAlphaBlendEnable = true;
#else
entry.IndependentAlphaBlendEnable = true;
#endif
}
if (a->colorWriteMask != 0)
has_writeable_rt = true;
/* Our hardware applies the blend factor prior to the blend function
* regardless of what function is used. Technically, this means the
* hardware can do MORE than GL or Vulkan specify. However, it also
* means that, for MIN and MAX, we have to stomp the blend factor to
* ONE to make it a no-op.
*/
if (a->colorBlendOp == VK_BLEND_OP_MIN ||
a->colorBlendOp == VK_BLEND_OP_MAX) {
entry.SourceBlendFactor = BLENDFACTOR_ONE;
entry.DestinationBlendFactor = BLENDFACTOR_ONE;
}
if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
a->alphaBlendOp == VK_BLEND_OP_MAX) {
entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
}
GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
state_pos += GENX(BLEND_STATE_ENTRY_length);
#if GEN_GEN >= 8
if (i == 0)
bs0 = entry;
#endif
}
#if GEN_GEN >= 8
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) {
blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
blend.HasWriteableRT = has_writeable_rt;
blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor;
blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor;
blend.SourceBlendFactor = bs0.SourceBlendFactor;
blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
blend.AlphaTestEnable = false;
blend.IndependentAlphaBlendEnable =
blend_state.IndependentAlphaBlendEnable;
}
#else
(void)has_writeable_rt;
#endif
GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
anv_state_flush(device, pipeline->blend_state);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
bsp.BlendStatePointer = pipeline->blend_state.offset;
#if GEN_GEN >= 8
bsp.BlendStatePointerValid = true;
#endif
}
}
static void
emit_3dstate_clip(struct anv_pipeline *pipeline,
const VkPipelineViewportStateCreateInfo *vp_info,
const VkPipelineRasterizationStateCreateInfo *rs_info)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
(void) wm_prog_data;
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
clip.ClipEnable = true;
clip.StatisticsEnable = true;
clip.EarlyCullEnable = true;
clip.APIMode = APIMODE_D3D,
clip.ViewportXYClipTestEnable = true;
clip.ClipMode = CLIPMODE_NORMAL;
clip.TriangleStripListProvokingVertexSelect = 0;
clip.LineStripListProvokingVertexSelect = 0;
clip.TriangleFanProvokingVertexSelect = 1;
clip.MinimumPointWidth = 0.125;
clip.MaximumPointWidth = 255.875;
const struct brw_vue_prog_data *last =
anv_pipeline_get_last_vue_prog_data(pipeline);
/* From the Vulkan 1.0.45 spec:
*
* "If the last active vertex processing stage shader entry point's
* interface does not include a variable decorated with
* ViewportIndex, then the first viewport is used."
*/
if (vp_info && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
clip.MaximumVPIndex = vp_info->viewportCount - 1;
} else {
clip.MaximumVPIndex = 0;
}
/* From the Vulkan 1.0.45 spec:
*
* "If the last active vertex processing stage shader entry point's
* interface does not include a variable decorated with Layer, then
* the first layer is used."
*/
clip.ForceZeroRTAIndexEnable =
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
#if GEN_GEN == 7
clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace];
clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode];
clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
if (last) {
clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
}
#else
clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
(wm_prog_data->barycentric_interp_modes &
BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) != 0 : 0;
#endif
}
}
static void
emit_3dstate_streamout(struct anv_pipeline *pipeline,
const VkPipelineRasterizationStateCreateInfo *rs_info)
{
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), so) {
so.RenderingDisable = rs_info->rasterizerDiscardEnable;
}
}
static uint32_t
get_sampler_count(const struct anv_shader_bin *bin)
{
uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4);
/* We can potentially have way more than 32 samplers and that's ok.
* However, the 3DSTATE_XS packets only have 3 bits to specify how
* many to pre-fetch and all values above 4 are marked reserved.
*/
return MIN2(count_by_4, 4);
}
static uint32_t
get_binding_table_entry_count(const struct anv_shader_bin *bin)
{
return DIV_ROUND_UP(bin->bind_map.surface_count, 32);
}
static struct anv_address
get_scratch_address(struct anv_pipeline *pipeline,
gl_shader_stage stage,
const struct anv_shader_bin *bin)
{
return (struct anv_address) {
.bo = anv_scratch_pool_alloc(pipeline->device,
&pipeline->device->scratch_pool,
stage, bin->prog_data->total_scratch),
.offset = 0,
};
}
static uint32_t
get_scratch_space(const struct anv_shader_bin *bin)
{
return ffs(bin->prog_data->total_scratch / 2048);
}
static void
emit_3dstate_vs(struct anv_pipeline *pipeline)
{
const struct gen_device_info *devinfo = &pipeline->device->info;
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
const struct anv_shader_bin *vs_bin =
pipeline->shaders[MESA_SHADER_VERTEX];
assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX));
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
vs.Enable = true;
vs.StatisticsEnable = true;
vs.KernelStartPointer = vs_bin->kernel.offset;
#if GEN_GEN >= 8
vs.SIMD8DispatchEnable =
vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
#endif
assert(!vs_prog_data->base.base.use_alt_mode);
#if GEN_GEN < 11
vs.SingleVertexDispatch = false;
#endif
vs.VectorMaskEnable = false;
vs.SamplerCount = get_sampler_count(vs_bin);
vs.BindingTableEntryCount = get_binding_table_entry_count(vs_bin);
vs.FloatingPointMode = IEEE754;
vs.IllegalOpcodeExceptionEnable = false;
vs.SoftwareExceptionEnable = false;
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
vs.VertexCacheDisable = false;
vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
vs.VertexURBEntryReadOffset = 0;
vs.DispatchGRFStartRegisterForURBData =
vs_prog_data->base.base.dispatch_grf_start_reg;
#if GEN_GEN >= 8
vs.UserClipDistanceClipTestEnableBitmask =
vs_prog_data->base.clip_distance_mask;
vs.UserClipDistanceCullTestEnableBitmask =
vs_prog_data->base.cull_distance_mask;
#endif
vs.PerThreadScratchSpace = get_scratch_space(vs_bin);
vs.ScratchSpaceBasePointer =
get_scratch_address(pipeline, MESA_SHADER_VERTEX, vs_bin);
}
}
static void
emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline,
const VkPipelineTessellationStateCreateInfo *tess_info)
{
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), ds);
return;
}
const struct gen_device_info *devinfo = &pipeline->device->info;
const struct anv_shader_bin *tcs_bin =
pipeline->shaders[MESA_SHADER_TESS_CTRL];
const struct anv_shader_bin *tes_bin =
pipeline->shaders[MESA_SHADER_TESS_EVAL];
const struct brw_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline);
const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs) {
hs.Enable = true;
hs.StatisticsEnable = true;
hs.KernelStartPointer = tcs_bin->kernel.offset;
hs.SamplerCount = get_sampler_count(tcs_bin);
hs.BindingTableEntryCount = get_binding_table_entry_count(tcs_bin);
hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
hs.IncludeVertexHandles = true;
hs.InstanceCount = tcs_prog_data->instances - 1;
hs.VertexURBEntryReadLength = 0;
hs.VertexURBEntryReadOffset = 0;
hs.DispatchGRFStartRegisterForURBData =
tcs_prog_data->base.base.dispatch_grf_start_reg;
hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
hs.ScratchSpaceBasePointer =
get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin);
}
const VkPipelineTessellationDomainOriginStateCreateInfoKHR *domain_origin_state =
tess_info ? vk_find_struct_const(tess_info, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR) : NULL;
VkTessellationDomainOriginKHR uv_origin =
domain_origin_state ? domain_origin_state->domainOrigin :
VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR;
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te) {
te.Partitioning = tes_prog_data->partitioning;
if (uv_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT_KHR) {
te.OutputTopology = tes_prog_data->output_topology;
} else {
/* When the origin is upper-left, we have to flip the winding order */
if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
te.OutputTopology = OUTPUT_TRI_CW;
} else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
te.OutputTopology = OUTPUT_TRI_CCW;
} else {
te.OutputTopology = tes_prog_data->output_topology;
}
}
te.TEDomain = tes_prog_data->domain;
te.TEEnable = true;
te.MaximumTessellationFactorOdd = 63.0;
te.MaximumTessellationFactorNotOdd = 64.0;
}
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), ds) {
ds.Enable = true;
ds.StatisticsEnable = true;
ds.KernelStartPointer = tes_bin->kernel.offset;
ds.SamplerCount = get_sampler_count(tes_bin);
ds.BindingTableEntryCount = get_binding_table_entry_count(tes_bin);
ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
ds.ComputeWCoordinateEnable =
tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length;
ds.PatchURBEntryReadOffset = 0;
ds.DispatchGRFStartRegisterForURBData =
tes_prog_data->base.base.dispatch_grf_start_reg;
#if GEN_GEN >= 8
ds.DispatchMode =
tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ?
DISPATCH_MODE_SIMD8_SINGLE_PATCH :
DISPATCH_MODE_SIMD4X2;
ds.UserClipDistanceClipTestEnableBitmask =
tes_prog_data->base.clip_distance_mask;
ds.UserClipDistanceCullTestEnableBitmask =
tes_prog_data->base.cull_distance_mask;
#endif
ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
ds.ScratchSpaceBasePointer =
get_scratch_address(pipeline, MESA_SHADER_TESS_EVAL, tes_bin);
}
}
static void
emit_3dstate_gs(struct anv_pipeline *pipeline)
{
const struct gen_device_info *devinfo = &pipeline->device->info;
const struct anv_shader_bin *gs_bin =
pipeline->shaders[MESA_SHADER_GEOMETRY];
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
return;
}
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
gs.Enable = true;
gs.StatisticsEnable = true;
gs.KernelStartPointer = gs_bin->kernel.offset;
gs.DispatchMode = gs_prog_data->base.dispatch_mode;
gs.SingleProgramFlow = false;
gs.VectorMaskEnable = false;
gs.SamplerCount = get_sampler_count(gs_bin);
gs.BindingTableEntryCount = get_binding_table_entry_count(gs_bin);
gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
if (GEN_GEN == 8) {
/* Broadwell is weird. It needs us to divide by 2. */
gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1;
} else {
gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
}
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
gs.OutputTopology = gs_prog_data->output_topology;
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
gs.ControlDataFormat = gs_prog_data->control_data_format;
gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
gs.ReorderMode = TRAILING;
#if GEN_GEN >= 8
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
gs_prog_data->static_vertex_count : 0;
#endif
gs.VertexURBEntryReadOffset = 0;
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
gs.DispatchGRFStartRegisterForURBData =
gs_prog_data->base.base.dispatch_grf_start_reg;
#if GEN_GEN >= 8
gs.UserClipDistanceClipTestEnableBitmask =
gs_prog_data->base.clip_distance_mask;
gs.UserClipDistanceCullTestEnableBitmask =
gs_prog_data->base.cull_distance_mask;
#endif
gs.PerThreadScratchSpace = get_scratch_space(gs_bin);
gs.ScratchSpaceBasePointer =
get_scratch_address(pipeline, MESA_SHADER_GEOMETRY, gs_bin);
}
}
static bool
has_color_buffer_write_enabled(const struct anv_pipeline *pipeline,
const VkPipelineColorBlendStateCreateInfo *blend)
{
const struct anv_shader_bin *shader_bin =
pipeline->shaders[MESA_SHADER_FRAGMENT];
if (!shader_bin)
return false;
const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map;
for (int i = 0; i < bind_map->surface_count; i++) {
struct anv_pipeline_binding *binding = &bind_map->surface_to_descriptor[i];
if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
continue;
if (binding->index == UINT32_MAX)
continue;
if (blend->pAttachments[binding->index].colorWriteMask != 0)
return true;
}
return false;
}
static void
emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass,
const VkPipelineColorBlendStateCreateInfo *blend,
const VkPipelineMultisampleStateCreateInfo *multisample)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
MAYBE_UNUSED uint32_t samples =
multisample ? multisample->rasterizationSamples : 1;
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
wm.StatisticsEnable = true;
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
wm.LineAntialiasingRegionWidth = _10pixels;
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
if (wm_prog_data->early_fragment_tests) {
wm.EarlyDepthStencilControl = EDSC_PREPS;
} else if (wm_prog_data->has_side_effects) {
wm.EarlyDepthStencilControl = EDSC_PSEXEC;
} else {
wm.EarlyDepthStencilControl = EDSC_NORMAL;
}
wm.BarycentricInterpolationMode =
wm_prog_data->barycentric_interp_modes;
#if GEN_GEN < 8
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
/* If the subpass has a depth or stencil self-dependency, then we
* need to force the hardware to do the depth/stencil write *after*
* fragment shader execution. Otherwise, the writes may hit memory
* before we get around to fetching from the input attachment and we
* may get the depth or stencil value from the current draw rather
* than the previous one.
*/
wm.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
wm_prog_data->uses_kill;
if (wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
wm_prog_data->has_side_effects ||
wm.PixelShaderKillsPixel ||
has_color_buffer_write_enabled(pipeline, blend))
wm.ThreadDispatchEnable = true;
if (samples > 1) {
wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
if (wm_prog_data->persample_dispatch) {
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
} else {
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
}
} else {
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
}
#endif
}
}
}
UNUSED static bool
is_dual_src_blend_factor(VkBlendFactor factor)
{
return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
}
static void
emit_3dstate_ps(struct anv_pipeline *pipeline,
const VkPipelineColorBlendStateCreateInfo *blend)
{
MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info;
const struct anv_shader_bin *fs_bin =
pipeline->shaders[MESA_SHADER_FRAGMENT];
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
#if GEN_GEN == 7
/* Even if no fragments are ever dispatched, gen7 hardware hangs if
* we don't at least set the maximum number of threads.
*/
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
#endif
}
return;
}
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
#if GEN_GEN < 8
/* The hardware wedges if you have this bit set but don't turn on any dual
* source blend factors.
*/
bool dual_src_blend = false;
if (wm_prog_data->dual_src_blend && blend) {
for (uint32_t i = 0; i < blend->attachmentCount; i++) {
const VkPipelineColorBlendAttachmentState *bstate =
&blend->pAttachments[i];
if (bstate->blendEnable &&
(is_dual_src_blend_factor(bstate->srcColorBlendFactor) ||
is_dual_src_blend_factor(bstate->dstColorBlendFactor) ||
is_dual_src_blend_factor(bstate->srcAlphaBlendFactor) ||
is_dual_src_blend_factor(bstate->dstAlphaBlendFactor))) {
dual_src_blend = true;
break;
}
}
}
#endif
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
ps.KernelStartPointer0 = fs_bin->kernel.offset;
ps.KernelStartPointer1 = 0;
ps.KernelStartPointer2 = fs_bin->kernel.offset +
wm_prog_data->prog_offset_2;
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
ps._32PixelDispatchEnable = false;
ps.SingleProgramFlow = false;
ps.VectorMaskEnable = true;
ps.SamplerCount = get_sampler_count(fs_bin);
ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin);
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
wm_prog_data->base.ubo_ranges[0].length;
ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
POSOFFSET_SAMPLE: POSOFFSET_NONE;
#if GEN_GEN < 8
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
ps.DualSourceBlendEnable = dual_src_blend;
#endif
#if GEN_IS_HASWELL
/* Haswell requires the sample mask to be set in this packet as well
* as in 3DSTATE_SAMPLE_MASK; the values should match.
*/
ps.SampleMask = 0xff;
#endif
#if GEN_GEN >= 9
ps.MaximumNumberofThreadsPerPSD = 64 - 1;
#elif GEN_GEN >= 8
ps.MaximumNumberofThreadsPerPSD = 64 - 2;
#else
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
#endif
ps.DispatchGRFStartRegisterForConstantSetupData0 =
wm_prog_data->base.dispatch_grf_start_reg;
ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
ps.DispatchGRFStartRegisterForConstantSetupData2 =
wm_prog_data->dispatch_grf_start_reg_2;
ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
ps.ScratchSpaceBasePointer =
get_scratch_address(pipeline, MESA_SHADER_FRAGMENT, fs_bin);
}
}
#if GEN_GEN >= 8
static void
emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
struct anv_subpass *subpass,
const VkPipelineColorBlendStateCreateInfo *blend)
{
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps);
return;
}
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
ps.PixelShaderValid = true;
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
/* If the subpass has a depth or stencil self-dependency, then we need
* to force the hardware to do the depth/stencil write *after* fragment
* shader execution. Otherwise, the writes may hit memory before we get
* around to fetching from the input attachment and we may get the depth
* or stencil value from the current draw rather than the previous one.
*/
ps.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
wm_prog_data->uses_kill;
/* The stricter cross-primitive coherency guarantees that the hardware
* gives us with the "Accesses UAV" bit set for at least one shader stage
* and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
* redundant within the current image, atomic counter and SSBO GL APIs,
* which all have very loose ordering and coherency requirements and
* generally rely on the application to insert explicit barriers when a
* shader invocation is expected to see the memory writes performed by the
* invocations of some previous primitive. Regardless of the value of
* "UAV coherency required", the "Accesses UAV" bits will implicitly cause
* an in most cases useless DC flush when the lowermost stage with the bit
* set finishes execution.
*
* It would be nice to disable it, but in some cases we can't because on
* Gen8+ it also has an influence on rasterization via the PS UAV-only
* signal (which could be set independently from the coherency mechanism
* in the 3DSTATE_WM command on Gen7), and because in some cases it will
* determine whether the hardware skips execution of the fragment shader
* or not via the ThreadDispatchEnable signal. However if we know that
* GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
* GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
* difference so we may just disable it here.
*
* Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
* take into account KillPixels when no depth or stencil writes are
* enabled. In order for occlusion queries to work correctly with no
* attachments, we need to force-enable here.
*/
if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
!has_color_buffer_write_enabled(pipeline, blend))
ps.PixelShaderHasUAV = true;
#if GEN_GEN >= 9
ps.PixelShaderPullsBary = wm_prog_data->pulls_bary;
ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ?
ICMS_INNER_CONSERVATIVE : ICMS_NONE;
#else
ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
#endif
}
}
static void
emit_3dstate_vf_topology(struct anv_pipeline *pipeline)
{
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
vft.PrimitiveTopologyType = pipeline->topology;
}
}
#endif
static void
emit_3dstate_vf_statistics(struct anv_pipeline *pipeline)
{
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
vfs.StatisticsEnable = true;
}
}
static void
compute_kill_pixel(struct anv_pipeline *pipeline,
const VkPipelineMultisampleStateCreateInfo *ms_info,
const struct anv_subpass *subpass)
{
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
pipeline->kill_pixel = false;
return;
}
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
/* This computes the KillPixel portion of the computation for whether or
* not we want to enable the PMA fix on gen8 or gen9. It's given by this
* chunk of the giant formula:
*
* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
*
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is
* 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept
* of an alpha test.
*/
pipeline->kill_pixel =
subpass->has_ds_self_dep || wm_prog_data->uses_kill ||
wm_prog_data->uses_omask ||
(ms_info && ms_info->alphaToCoverageEnable);
}
static VkResult
genX(graphics_pipeline_create)(
VkDevice _device,
struct anv_pipeline_cache * cache,
const VkGraphicsPipelineCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipeline)
{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
struct anv_pipeline *pipeline;
VkResult result;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
result = anv_pipeline_init(pipeline, device, cache,
pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
vk_free2(&device->alloc, pAllocator, pipeline);
return result;
}
assert(pCreateInfo->pVertexInputState);
emit_vertex_input(pipeline, pCreateInfo->pVertexInputState);
assert(pCreateInfo->pRasterizationState);
emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
pCreateInfo->pMultisampleState, pass, subpass);
emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
pCreateInfo->pMultisampleState);
compute_kill_pixel(pipeline, pCreateInfo->pMultisampleState, subpass);
emit_urb_setup(pipeline);
emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
pCreateInfo->pRasterizationState);
emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState);
#if 0
/* From gen7_vs_state.c */
/**
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
* Geometry > Geometry Shader > State:
*
* "Note: Because of corruption in IVB:GT2, software needs to flush the
* whole fixed function pipeline when the GS enable changes value in
* the 3DSTATE_GS."
*
* The hardware architects have clarified that in this context "flush the
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
* Stall" bit set.
*/
if (!device->info.is_haswell && !device->info.is_baytrail)
gen7_emit_vs_workaround_flush(brw);
#endif
emit_3dstate_vs(pipeline);
emit_3dstate_hs_te_ds(pipeline, pCreateInfo->pTessellationState);
emit_3dstate_gs(pipeline);
emit_3dstate_sbe(pipeline);
emit_3dstate_wm(pipeline, subpass, pCreateInfo->pColorBlendState,
pCreateInfo->pMultisampleState);
emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState);
#if GEN_GEN >= 8
emit_3dstate_ps_extra(pipeline, subpass, pCreateInfo->pColorBlendState);
emit_3dstate_vf_topology(pipeline);
#endif
emit_3dstate_vf_statistics(pipeline);
*pPipeline = anv_pipeline_to_handle(pipeline);
return pipeline->batch.status;
}
static VkResult
compute_pipeline_create(
VkDevice _device,
struct anv_pipeline_cache * cache,
const VkComputePipelineCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipeline)
{
ANV_FROM_HANDLE(anv_device, device, _device);
const struct anv_physical_device *physical_device =
&device->instance->physicalDevice;
const struct gen_device_info *devinfo = &physical_device->info;
struct anv_pipeline *pipeline;
VkResult result;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (pipeline == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
pipeline->device = device;
pipeline->blend_state.map = NULL;
result = anv_reloc_list_init(&pipeline->batch_relocs,
pAllocator ? pAllocator : &device->alloc);
if (result != VK_SUCCESS) {
vk_free2(&device->alloc, pAllocator, pipeline);
return result;
}
pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
pipeline->batch.relocs = &pipeline->batch_relocs;
pipeline->batch.status = VK_SUCCESS;
/* When we free the pipeline, we detect stages based on the NULL status
* of various prog_data pointers. Make them NULL by default.
*/
memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
pipeline->active_stages = 0;
pipeline->needs_data_cache = false;
assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module);
result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
pCreateInfo->stage.pName,
pCreateInfo->stage.pSpecializationInfo);
if (result != VK_SUCCESS) {
vk_free2(&device->alloc, pAllocator, pipeline);
return result;
}
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
anv_pipeline_setup_l3_config(pipeline, cs_prog_data->base.total_shared > 0);
uint32_t group_size = cs_prog_data->local_size[0] *
cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
if (remainder > 0)
pipeline->cs_right_mask = ~0u >> (32 - remainder);
else
pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
const uint32_t vfe_curbe_allocation =
ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
cs_prog_data->push.cross_thread.regs, 2);
const uint32_t subslices = MAX2(physical_device->subslice_total, 1);
const struct anv_shader_bin *cs_bin =
pipeline->shaders[MESA_SHADER_COMPUTE];
anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) {
#if GEN_GEN > 7
vfe.StackSize = 0;
#else
vfe.GPGPUMode = true;
#endif
vfe.MaximumNumberofThreads =
devinfo->max_cs_threads * subslices - 1;
vfe.NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2;
#if GEN_GEN < 11
vfe.ResetGatewayTimer = true;
#endif
#if GEN_GEN <= 8
vfe.BypassGatewayControl = true;
#endif
vfe.URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2;
vfe.CURBEAllocationSize = vfe_curbe_allocation;
vfe.PerThreadScratchSpace = get_scratch_space(cs_bin);
vfe.ScratchSpaceBasePointer =
get_scratch_address(pipeline, MESA_SHADER_COMPUTE, cs_bin);
}
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
.KernelStartPointer = cs_bin->kernel.offset,
.SamplerCount = get_sampler_count(cs_bin),
.BindingTableEntryCount = get_binding_table_entry_count(cs_bin),
.BarrierEnable = cs_prog_data->uses_barrier,
.SharedLocalMemorySize =
encode_slm_size(GEN_GEN, cs_prog_data->base.total_shared),
#if !GEN_IS_HASWELL
.ConstantURBEntryReadOffset = 0,
#endif
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
#if GEN_GEN >= 8 || GEN_IS_HASWELL
.CrossThreadConstantDataReadLength =
cs_prog_data->push.cross_thread.regs,
#endif
.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
};
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
pipeline->interface_descriptor_data,
&desc);
*pPipeline = anv_pipeline_to_handle(pipeline);
return pipeline->batch.status;
}
VkResult genX(CreateGraphicsPipelines)(
VkDevice _device,
VkPipelineCache pipelineCache,
uint32_t count,
const VkGraphicsPipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipelines)
{
ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache);
VkResult result = VK_SUCCESS;
unsigned i;
for (i = 0; i < count; i++) {
result = genX(graphics_pipeline_create)(_device,
pipeline_cache,
&pCreateInfos[i],
pAllocator, &pPipelines[i]);
/* Bail out on the first error as it is not obvious what error should be
* report upon 2 different failures. */
if (result != VK_SUCCESS)
break;
}
for (; i < count; i++)
pPipelines[i] = VK_NULL_HANDLE;
return result;
}
VkResult genX(CreateComputePipelines)(
VkDevice _device,
VkPipelineCache pipelineCache,
uint32_t count,
const VkComputePipelineCreateInfo* pCreateInfos,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipelines)
{
ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache);
VkResult result = VK_SUCCESS;
unsigned i;
for (i = 0; i < count; i++) {
result = compute_pipeline_create(_device, pipeline_cache,
&pCreateInfos[i],
pAllocator, &pPipelines[i]);
/* Bail out on the first error as it is not obvious what error should be
* report upon 2 different failures. */
if (result != VK_SUCCESS)
break;
}
for (; i < count; i++)
pPipelines[i] = VK_NULL_HANDLE;
return result;
}