2016-01-22 15:59:02 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2015 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "anv_private.h"
|
|
|
|
|
|
2016-02-20 09:08:27 -08:00
|
|
|
#include "genxml/gen_macros.h"
|
|
|
|
|
#include "genxml/genX_pack.h"
|
2016-01-22 15:59:02 -08:00
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
#include "common/gen_l3_config.h"
|
|
|
|
|
#include "common/gen_sample_positions.h"
|
|
|
|
|
#include "vk_format_info.h"
|
|
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
|
vertex_element_comp_control(enum isl_format format, unsigned comp)
|
|
|
|
|
{
|
|
|
|
|
uint8_t bits;
|
|
|
|
|
switch (comp) {
|
|
|
|
|
case 0: bits = isl_format_layouts[format].channels.r.bits; break;
|
|
|
|
|
case 1: bits = isl_format_layouts[format].channels.g.bits; break;
|
|
|
|
|
case 2: bits = isl_format_layouts[format].channels.b.bits; break;
|
|
|
|
|
case 3: bits = isl_format_layouts[format].channels.a.bits; break;
|
|
|
|
|
default: unreachable("Invalid component");
|
|
|
|
|
}
|
|
|
|
|
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
/*
|
|
|
|
|
* Take in account hardware restrictions when dealing with 64-bit floats.
|
|
|
|
|
*
|
|
|
|
|
* From Broadwell spec, command reference structures, page 586:
|
|
|
|
|
* "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
|
|
|
|
|
* 64-bit components are stored * in the URB without any conversion. In
|
|
|
|
|
* this case, vertex elements must be written as 128 or 256 bits, with
|
|
|
|
|
* VFCOMP_STORE_0 being used to pad the output as required. E.g., if
|
|
|
|
|
* R64_PASSTHRU is used to copy a 64-bit Red component into the URB,
|
|
|
|
|
* Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3
|
|
|
|
|
* set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or
|
|
|
|
|
* Components 1-3 must be specified as VFCOMP_STORE_0 in order to output
|
|
|
|
|
* a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires
|
|
|
|
|
* Component 3 to be specified as VFCOMP_STORE_0 in order to output a
|
|
|
|
|
* 256-bit vertex element."
|
|
|
|
|
*/
|
2016-11-12 11:42:09 -08:00
|
|
|
if (bits) {
|
|
|
|
|
return VFCOMP_STORE_SRC;
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
} else if (comp >= 2 &&
|
|
|
|
|
!isl_format_layouts[format].channels.b.bits &&
|
|
|
|
|
isl_format_layouts[format].channels.r.type == ISL_RAW) {
|
|
|
|
|
/* When emitting 64-bit attributes, we need to write either 128 or 256
|
|
|
|
|
* bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and
|
|
|
|
|
* VFCOMP_STORE_0 to pad the written chunk */
|
|
|
|
|
return VFCOMP_NOSTORE;
|
|
|
|
|
} else if (comp < 3 ||
|
|
|
|
|
isl_format_layouts[format].channels.r.type == ISL_RAW) {
|
|
|
|
|
/* Note we need to pad with value 0, not 1, due hardware restrictions
|
|
|
|
|
* (see comment above) */
|
2016-11-12 11:42:09 -08:00
|
|
|
return VFCOMP_STORE_0;
|
|
|
|
|
} else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
|
|
|
|
|
isl_format_layouts[format].channels.r.type == ISL_SINT) {
|
|
|
|
|
assert(comp == 3);
|
|
|
|
|
return VFCOMP_STORE_1_INT;
|
|
|
|
|
} else {
|
|
|
|
|
assert(comp == 3);
|
|
|
|
|
return VFCOMP_STORE_1_FP;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_vertex_input(struct anv_pipeline *pipeline,
|
|
|
|
|
const VkPipelineVertexInputStateCreateInfo *info)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
/* Pull inputs_read out of the VS prog data */
|
|
|
|
|
const uint64_t inputs_read = vs_prog_data->inputs_read;
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
const uint64_t double_inputs_read = vs_prog_data->double_inputs_read;
|
2016-11-12 11:42:09 -08:00
|
|
|
assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
|
|
|
|
|
const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
|
2016-11-12 11:42:09 -08:00
|
|
|
const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
|
|
|
|
|
vs_prog_data->uses_instanceid ||
|
|
|
|
|
vs_prog_data->uses_basevertex ||
|
|
|
|
|
vs_prog_data->uses_baseinstance;
|
|
|
|
|
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
uint32_t elem_count = __builtin_popcount(elements) -
|
|
|
|
|
__builtin_popcount(elements_double) / 2;
|
|
|
|
|
|
2017-01-29 03:15:03 +00:00
|
|
|
const uint32_t total_elems =
|
|
|
|
|
elem_count + needs_svgs_elem + vs_prog_data->uses_drawid;
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
if (total_elems == 0)
|
2016-11-12 11:42:09 -08:00
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
uint32_t *p;
|
|
|
|
|
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
const uint32_t num_dwords = 1 + total_elems * 2;
|
2016-11-12 11:42:09 -08:00
|
|
|
p = anv_batch_emitn(&pipeline->batch, num_dwords,
|
|
|
|
|
GENX(3DSTATE_VERTEX_ELEMENTS));
|
2017-03-09 14:22:25 +01:00
|
|
|
if (!p)
|
|
|
|
|
return;
|
2016-11-12 11:42:09 -08:00
|
|
|
memset(p + 1, 0, (num_dwords - 1) * 4);
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < info->vertexAttributeDescriptionCount; i++) {
|
|
|
|
|
const VkVertexInputAttributeDescription *desc =
|
|
|
|
|
&info->pVertexAttributeDescriptions[i];
|
|
|
|
|
enum isl_format format = anv_get_isl_format(&pipeline->device->info,
|
|
|
|
|
desc->format,
|
|
|
|
|
VK_IMAGE_ASPECT_COLOR_BIT,
|
|
|
|
|
VK_IMAGE_TILING_LINEAR);
|
|
|
|
|
|
2017-01-29 02:44:32 +00:00
|
|
|
assert(desc->binding < MAX_VBS);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
if ((elements & (1 << desc->location)) == 0)
|
|
|
|
|
continue; /* Binding unused */
|
|
|
|
|
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
uint32_t slot =
|
|
|
|
|
__builtin_popcount(elements & ((1 << desc->location) - 1)) -
|
|
|
|
|
DIV_ROUND_UP(__builtin_popcount(elements_double &
|
|
|
|
|
((1 << desc->location) -1)), 2);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
struct GENX(VERTEX_ELEMENT_STATE) element = {
|
|
|
|
|
.VertexBufferIndex = desc->binding,
|
|
|
|
|
.Valid = true,
|
2017-08-25 20:08:58 -07:00
|
|
|
.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) format,
|
2016-11-12 11:42:09 -08:00
|
|
|
.EdgeFlagEnable = false,
|
|
|
|
|
.SourceElementOffset = desc->offset,
|
|
|
|
|
.Component0Control = vertex_element_comp_control(format, 0),
|
|
|
|
|
.Component1Control = vertex_element_comp_control(format, 1),
|
|
|
|
|
.Component2Control = vertex_element_comp_control(format, 2),
|
|
|
|
|
.Component3Control = vertex_element_comp_control(format, 3),
|
|
|
|
|
};
|
|
|
|
|
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
/* On Broadwell and later, we have a separate VF_INSTANCING packet
|
|
|
|
|
* that controls instancing. On Haswell and prior, that's part of
|
|
|
|
|
* VERTEX_BUFFER_STATE which we emit later.
|
|
|
|
|
*/
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
|
|
|
|
vfi.InstancingEnable = pipeline->instancing_enable[desc->binding];
|
|
|
|
|
vfi.VertexElementIndex = slot;
|
2017-03-22 17:15:32 -07:00
|
|
|
/* Our implementation of VK_KHX_multiview uses instancing to draw
|
|
|
|
|
* the different views. If the client asks for instancing, we
|
|
|
|
|
* need to use the Instance Data Step Rate to ensure that we
|
|
|
|
|
* repeat the client's per-instance data once for each view.
|
|
|
|
|
*/
|
|
|
|
|
vfi.InstanceDataStepRate = anv_subpass_view_count(pipeline->subpass);
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
const uint32_t id_slot = elem_count;
|
2016-11-12 11:42:09 -08:00
|
|
|
if (needs_svgs_elem) {
|
|
|
|
|
/* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
|
|
|
|
|
* "Within a VERTEX_ELEMENT_STATE structure, if a Component
|
|
|
|
|
* Control field is set to something other than VFCOMP_STORE_SRC,
|
|
|
|
|
* no higher-numbered Component Control fields may be set to
|
|
|
|
|
* VFCOMP_STORE_SRC"
|
|
|
|
|
*
|
|
|
|
|
* This means, that if we have BaseInstance, we need BaseVertex as
|
|
|
|
|
* well. Just do all or nothing.
|
|
|
|
|
*/
|
|
|
|
|
uint32_t base_ctrl = (vs_prog_data->uses_basevertex ||
|
|
|
|
|
vs_prog_data->uses_baseinstance) ?
|
|
|
|
|
VFCOMP_STORE_SRC : VFCOMP_STORE_0;
|
|
|
|
|
|
|
|
|
|
struct GENX(VERTEX_ELEMENT_STATE) element = {
|
2017-01-29 02:46:12 +00:00
|
|
|
.VertexBufferIndex = ANV_SVGS_VB_INDEX,
|
2016-11-12 11:42:09 -08:00
|
|
|
.Valid = true,
|
2017-08-25 20:08:58 -07:00
|
|
|
.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32G32_UINT,
|
2016-11-12 11:42:09 -08:00
|
|
|
.Component0Control = base_ctrl,
|
|
|
|
|
.Component1Control = base_ctrl,
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
.Component2Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component3Control = VFCOMP_STORE_0,
|
|
|
|
|
#else
|
|
|
|
|
.Component2Control = VFCOMP_STORE_VID,
|
|
|
|
|
.Component3Control = VFCOMP_STORE_IID,
|
|
|
|
|
#endif
|
|
|
|
|
};
|
|
|
|
|
GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_SGVS), sgvs) {
|
|
|
|
|
sgvs.VertexIDEnable = vs_prog_data->uses_vertexid;
|
|
|
|
|
sgvs.VertexIDComponentNumber = 2;
|
|
|
|
|
sgvs.VertexIDElementOffset = id_slot;
|
|
|
|
|
sgvs.InstanceIDEnable = vs_prog_data->uses_instanceid;
|
|
|
|
|
sgvs.InstanceIDComponentNumber = 3;
|
|
|
|
|
sgvs.InstanceIDElementOffset = id_slot;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2017-01-29 03:15:03 +00:00
|
|
|
|
|
|
|
|
const uint32_t drawid_slot = elem_count + needs_svgs_elem;
|
|
|
|
|
if (vs_prog_data->uses_drawid) {
|
|
|
|
|
struct GENX(VERTEX_ELEMENT_STATE) element = {
|
|
|
|
|
.VertexBufferIndex = ANV_DRAWID_VB_INDEX,
|
|
|
|
|
.Valid = true,
|
2017-08-25 20:08:58 -07:00
|
|
|
.SourceElementFormat = (enum GENX(SURFACE_FORMAT)) ISL_FORMAT_R32_UINT,
|
2017-01-29 03:15:03 +00:00
|
|
|
.Component0Control = VFCOMP_STORE_SRC,
|
|
|
|
|
.Component1Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component2Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component3Control = VFCOMP_STORE_0,
|
|
|
|
|
};
|
|
|
|
|
GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
|
|
|
|
|
&p[1 + drawid_slot * 2],
|
|
|
|
|
&element);
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
|
|
|
|
vfi.VertexElementIndex = drawid_slot;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
2016-11-15 11:43:07 -08:00
|
|
|
const struct gen_l3_config *l3_config,
|
2016-11-12 11:42:09 -08:00
|
|
|
VkShaderStageFlags active_stages,
|
2016-11-15 11:43:07 -08:00
|
|
|
const unsigned entry_size[4])
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
2016-11-15 11:43:07 -08:00
|
|
|
const struct gen_device_info *devinfo = &device->info;
|
|
|
|
|
#if GEN_IS_HASWELL
|
|
|
|
|
const unsigned push_constant_kb = devinfo->gt == 3 ? 32 : 16;
|
|
|
|
|
#else
|
|
|
|
|
const unsigned push_constant_kb = GEN_GEN >= 8 ? 32 : 16;
|
|
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2016-11-15 11:43:07 -08:00
|
|
|
const unsigned urb_size_kb = gen_get_l3_config_urb_size(devinfo, l3_config);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2016-11-15 11:43:07 -08:00
|
|
|
unsigned entries[4];
|
|
|
|
|
unsigned start[4];
|
|
|
|
|
gen_get_urb_config(devinfo,
|
|
|
|
|
1024 * push_constant_kb, 1024 * urb_size_kb,
|
|
|
|
|
active_stages &
|
|
|
|
|
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
|
|
|
|
|
active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
|
|
|
|
|
entry_size, entries, start);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
#if GEN_GEN == 7 && !GEN_IS_HASWELL
|
|
|
|
|
/* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
|
|
|
|
|
*
|
|
|
|
|
* "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
|
|
|
|
|
* needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
|
|
|
|
|
* 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
|
|
|
|
|
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
|
|
|
|
|
* needs to be sent before any combination of VS associated 3DSTATE."
|
|
|
|
|
*/
|
|
|
|
|
anv_batch_emit(batch, GEN7_PIPE_CONTROL, pc) {
|
|
|
|
|
pc.DepthStallEnable = true;
|
|
|
|
|
pc.PostSyncOperation = WriteImmediateData;
|
|
|
|
|
pc.Address = (struct anv_address) { &device->workaround_bo, 0 };
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2016-11-15 11:43:07 -08:00
|
|
|
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
|
|
|
|
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
|
|
|
|
|
urb._3DCommandSubOpcode += i;
|
|
|
|
|
urb.VSURBStartingAddress = start[i];
|
|
|
|
|
urb.VSURBEntryAllocationSize = entry_size[i] - 1;
|
|
|
|
|
urb.VSNumberofURBEntries = entries[i];
|
|
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-06 21:18:03 -07:00
|
|
|
static void
|
2016-11-12 11:42:09 -08:00
|
|
|
emit_urb_setup(struct anv_pipeline *pipeline)
|
|
|
|
|
{
|
2016-11-15 11:43:07 -08:00
|
|
|
unsigned entry_size[4];
|
|
|
|
|
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
|
|
|
|
|
const struct brw_vue_prog_data *prog_data =
|
|
|
|
|
!anv_pipeline_has_stage(pipeline, i) ? NULL :
|
|
|
|
|
(const struct brw_vue_prog_data *) pipeline->shaders[i]->prog_data;
|
|
|
|
|
|
|
|
|
|
entry_size[i] = prog_data ? prog_data->urb_entry_size : 1;
|
|
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
genX(emit_urb_setup)(pipeline->device, &pipeline->batch,
|
2016-11-15 11:43:07 -08:00
|
|
|
pipeline->urb.l3_config,
|
|
|
|
|
pipeline->active_stages, entry_size);
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_3dstate_sbe(struct anv_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE), sbe);
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ), sbe);
|
|
|
|
|
#endif
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-01-12 17:07:08 -08:00
|
|
|
const struct brw_vue_map *fs_input_map =
|
|
|
|
|
&anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
struct GENX(3DSTATE_SBE) sbe = {
|
|
|
|
|
GENX(3DSTATE_SBE_header),
|
|
|
|
|
.AttributeSwizzleEnable = true,
|
|
|
|
|
.PointSpriteTextureCoordinateOrigin = UPPERLEFT,
|
|
|
|
|
.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
|
|
|
|
|
.ConstantInterpolationEnable = wm_prog_data->flat_inputs,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 9
|
|
|
|
|
for (unsigned i = 0; i < 32; i++)
|
|
|
|
|
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
/* On Broadwell, they broke 3DSTATE_SBE into two packets */
|
|
|
|
|
struct GENX(3DSTATE_SBE_SWIZ) swiz = {
|
|
|
|
|
GENX(3DSTATE_SBE_SWIZ_header),
|
|
|
|
|
};
|
|
|
|
|
#else
|
|
|
|
|
# define swiz sbe
|
|
|
|
|
#endif
|
|
|
|
|
|
2017-01-05 13:17:53 +01:00
|
|
|
/* Skip the VUE header and position slots by default */
|
|
|
|
|
unsigned urb_entry_read_offset = 1;
|
2016-11-12 11:42:09 -08:00
|
|
|
int max_source_attr = 0;
|
|
|
|
|
for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
|
|
|
|
|
int input_index = wm_prog_data->urb_setup[attr];
|
|
|
|
|
|
|
|
|
|
if (input_index < 0)
|
|
|
|
|
continue;
|
|
|
|
|
|
2017-01-05 13:17:53 +01:00
|
|
|
/* gl_Layer is stored in the VUE header */
|
|
|
|
|
if (attr == VARYING_SLOT_LAYER) {
|
|
|
|
|
urb_entry_read_offset = 0;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
if (attr == VARYING_SLOT_PNTC) {
|
|
|
|
|
sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const int slot = fs_input_map->varying_to_slot[attr];
|
|
|
|
|
|
|
|
|
|
if (input_index >= 16)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
if (slot == -1) {
|
|
|
|
|
/* This attribute does not exist in the VUE--that means that the
|
|
|
|
|
* vertex shader did not write to it. It could be that it's a
|
|
|
|
|
* regular varying read by the fragment shader but not written by
|
|
|
|
|
* the vertex shader or it's gl_PrimitiveID. In the first case the
|
|
|
|
|
* value is undefined, in the second it needs to be
|
|
|
|
|
* gl_PrimitiveID.
|
|
|
|
|
*/
|
|
|
|
|
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
|
|
|
|
|
swiz.Attribute[input_index].ComponentOverrideX = true;
|
|
|
|
|
swiz.Attribute[input_index].ComponentOverrideY = true;
|
|
|
|
|
swiz.Attribute[input_index].ComponentOverrideZ = true;
|
|
|
|
|
swiz.Attribute[input_index].ComponentOverrideW = true;
|
|
|
|
|
} else {
|
|
|
|
|
/* We have to subtract two slots to accout for the URB entry output
|
|
|
|
|
* read offset in the VS and GS stages.
|
|
|
|
|
*/
|
2017-01-05 13:17:53 +01:00
|
|
|
assert(slot >= 2);
|
|
|
|
|
const int source_attr = slot - 2 * urb_entry_read_offset;
|
|
|
|
|
max_source_attr = MAX2(max_source_attr, source_attr);
|
2016-11-12 11:42:09 -08:00
|
|
|
swiz.Attribute[input_index].SourceAttribute = source_attr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-01-05 13:17:53 +01:00
|
|
|
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
|
2016-11-12 11:42:09 -08:00
|
|
|
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
|
2017-01-05 13:17:53 +01:00
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
sbe.ForceVertexURBEntryReadOffset = true;
|
|
|
|
|
sbe.ForceVertexURBEntryReadLength = true;
|
|
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch,
|
|
|
|
|
GENX(3DSTATE_SBE_length));
|
2017-03-09 14:22:25 +01:00
|
|
|
if (!dw)
|
|
|
|
|
return;
|
2016-11-12 11:42:09 -08:00
|
|
|
GENX(3DSTATE_SBE_pack)(&pipeline->batch, dw, &sbe);
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
dw = anv_batch_emit_dwords(&pipeline->batch, GENX(3DSTATE_SBE_SWIZ_length));
|
2017-03-09 14:22:25 +01:00
|
|
|
if (!dw)
|
|
|
|
|
return;
|
2016-11-12 11:42:09 -08:00
|
|
|
GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const uint32_t vk_to_gen_cullmode[] = {
|
|
|
|
|
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
|
|
|
|
|
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
|
|
|
|
|
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
|
|
|
|
|
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const uint32_t vk_to_gen_fillmode[] = {
|
|
|
|
|
[VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
|
|
|
|
|
[VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
|
|
|
|
|
[VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const uint32_t vk_to_gen_front_face[] = {
|
|
|
|
|
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
|
|
|
|
|
[VK_FRONT_FACE_CLOCKWISE] = 0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_rs_state(struct anv_pipeline *pipeline,
|
|
|
|
|
const VkPipelineRasterizationStateCreateInfo *rs_info,
|
|
|
|
|
const VkPipelineMultisampleStateCreateInfo *ms_info,
|
|
|
|
|
const struct anv_render_pass *pass,
|
|
|
|
|
const struct anv_subpass *subpass)
|
|
|
|
|
{
|
|
|
|
|
struct GENX(3DSTATE_SF) sf = {
|
|
|
|
|
GENX(3DSTATE_SF_header),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
sf.ViewportTransformEnable = true;
|
|
|
|
|
sf.StatisticsEnable = true;
|
|
|
|
|
sf.TriangleStripListProvokingVertexSelect = 0;
|
|
|
|
|
sf.LineStripListProvokingVertexSelect = 0;
|
|
|
|
|
sf.TriangleFanProvokingVertexSelect = 1;
|
anv: Default PointSize to 1.0 if not written by the shader
The Vulkan rules for point size are a bit whacky. If you only have a
vertex shader and you use points, then you must write PointSize in your
vertex shader. If you have a geometry or tessellation shader, then it's
dependent on the shaderTessellationAndGeometryPointSize device feature.
From the Vulkan 1.0.38 specification:
"shaderTessellationAndGeometryPointSize indicates whether the
PointSize built-in decoration is available in the tessellation
control, tessellation evaluation, and geometry shader stages. If this
feature is not enabled, members decorated with the PointSize built-in
decoration must not be read from or written to and all points written
from a tessellation or geometry shader will have a size of 1.0. This
also indicates whether shader modules can declare the
TessellationPointSize capability for tessellation control and
evaluation shaders, or if the shader modules can declare the
GeometryPointSize capability for geometry shaders. An implementation
supporting this feature must also support one or both of the
tessellationShader or geometryShader features."
In other words, if the feature is disbled (the client can disable
features!) then they don't write PointSize and we provide a 1.0 default
but if the feature is enabled, they do write PointSize and we use the
one they wrote in the shader. There are at least two valid ways we can
implement this:
1) Track whether or not shaderTessellationAndGeometryPointSize is
enabled and set the 3DSTATE_SF bits based on that and what stages
are enabled, ignoring the shader source.
2) Just look at the last geometry stage VUE map and see if they wrote
PointSize and set the 3DSTATE_SF accordingly.
The second solution is the easiest and the most robust against invalid
usage of the Vulkan API, so we choose to go with that one.
This fixes all of the dEQP-VK.tessellation.primitive_discard.*point_mode
tests. The tests are also broken because they unconditionally enable
shaderTessellationAndGeometryPointSize if it's supported by the
implementation and then don't write PointSize in the evaluation shader.
However, since this is the "robust against invalid API usage" solution,
the tests happily pass. :-)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-13 09:30:13 -08:00
|
|
|
|
|
|
|
|
const struct brw_vue_prog_data *last_vue_prog_data =
|
|
|
|
|
anv_pipeline_get_last_vue_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
if (last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
|
|
|
|
|
sf.PointWidthSource = Vertex;
|
|
|
|
|
} else {
|
|
|
|
|
sf.PointWidthSource = State;
|
|
|
|
|
sf.PointWidth = 1.0;
|
|
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
struct GENX(3DSTATE_RASTER) raster = {
|
|
|
|
|
GENX(3DSTATE_RASTER_header),
|
|
|
|
|
};
|
|
|
|
|
#else
|
|
|
|
|
# define raster sf
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* For details on 3DSTATE_RASTER multisample state, see the BSpec table
|
|
|
|
|
* "Multisample Modes State".
|
|
|
|
|
*/
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
raster.DXMultisampleRasterizationEnable = true;
|
2016-12-06 17:52:14 -08:00
|
|
|
/* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
|
|
|
|
|
* computations. If we ever set this bit to a different value, they will
|
|
|
|
|
* need to be updated accordingly.
|
|
|
|
|
*/
|
2016-11-12 11:42:09 -08:00
|
|
|
raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
|
|
|
|
|
raster.ForceMultisampling = false;
|
|
|
|
|
#else
|
|
|
|
|
raster.MultisampleRasterizationMode =
|
|
|
|
|
(ms_info && ms_info->rasterizationSamples > 1) ?
|
|
|
|
|
MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
raster.FrontWinding = vk_to_gen_front_face[rs_info->frontFace];
|
|
|
|
|
raster.CullMode = vk_to_gen_cullmode[rs_info->cullMode];
|
|
|
|
|
raster.FrontFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode];
|
|
|
|
|
raster.BackFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode];
|
|
|
|
|
raster.ScissorRectangleEnable = true;
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 9
|
|
|
|
|
/* GEN9+ splits ViewportZClipTestEnable into near and far enable bits */
|
|
|
|
|
raster.ViewportZFarClipTestEnable = !pipeline->depth_clamp_enable;
|
|
|
|
|
raster.ViewportZNearClipTestEnable = !pipeline->depth_clamp_enable;
|
|
|
|
|
#elif GEN_GEN >= 8
|
|
|
|
|
raster.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
raster.GlobalDepthOffsetEnableSolid = rs_info->depthBiasEnable;
|
|
|
|
|
raster.GlobalDepthOffsetEnableWireframe = rs_info->depthBiasEnable;
|
|
|
|
|
raster.GlobalDepthOffsetEnablePoint = rs_info->depthBiasEnable;
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN == 7
|
|
|
|
|
/* Gen7 requires that we provide the depth format in 3DSTATE_SF so that it
|
|
|
|
|
* can get the depth offsets correct.
|
|
|
|
|
*/
|
2017-01-31 16:12:50 -08:00
|
|
|
if (subpass->depth_stencil_attachment.attachment < pass->attachment_count) {
|
2016-11-12 11:42:09 -08:00
|
|
|
VkFormat vk_format =
|
2017-01-31 16:12:50 -08:00
|
|
|
pass->attachments[subpass->depth_stencil_attachment.attachment].format;
|
2016-11-12 11:42:09 -08:00
|
|
|
assert(vk_format_is_depth_or_stencil(vk_format));
|
|
|
|
|
if (vk_format_aspects(vk_format) & VK_IMAGE_ASPECT_DEPTH_BIT) {
|
|
|
|
|
enum isl_format isl_format =
|
|
|
|
|
anv_get_isl_format(&pipeline->device->info, vk_format,
|
|
|
|
|
VK_IMAGE_ASPECT_DEPTH_BIT,
|
|
|
|
|
VK_IMAGE_TILING_OPTIMAL);
|
|
|
|
|
sf.DepthBufferSurfaceFormat =
|
|
|
|
|
isl_format_get_depth_format(isl_format, false);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
GENX(3DSTATE_SF_pack)(NULL, pipeline->gen8.sf, &sf);
|
|
|
|
|
GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gen8.raster, &raster);
|
|
|
|
|
#else
|
|
|
|
|
# undef raster
|
|
|
|
|
GENX(3DSTATE_SF_pack)(NULL, &pipeline->gen7.sf, &sf);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_ms_state(struct anv_pipeline *pipeline,
|
|
|
|
|
const VkPipelineMultisampleStateCreateInfo *info)
|
|
|
|
|
{
|
|
|
|
|
uint32_t samples = 1;
|
|
|
|
|
uint32_t log2_samples = 0;
|
|
|
|
|
|
|
|
|
|
/* From the Vulkan 1.0 spec:
|
|
|
|
|
* If pSampleMask is NULL, it is treated as if the mask has all bits
|
|
|
|
|
* enabled, i.e. no coverage is removed from fragments.
|
|
|
|
|
*
|
|
|
|
|
* 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
|
|
|
|
|
*/
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
uint32_t sample_mask = 0xffff;
|
|
|
|
|
#else
|
|
|
|
|
uint32_t sample_mask = 0xff;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (info) {
|
|
|
|
|
samples = info->rasterizationSamples;
|
|
|
|
|
log2_samples = __builtin_ffs(samples) - 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (info && info->pSampleMask)
|
|
|
|
|
sample_mask &= info->pSampleMask[0];
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
|
|
|
|
|
ms.NumberofMultisamples = log2_samples;
|
|
|
|
|
|
2017-04-19 16:04:12 -07:00
|
|
|
ms.PixelLocation = CENTER;
|
2016-11-12 11:42:09 -08:00
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
/* The PRM says that this bit is valid only for DX9:
|
|
|
|
|
*
|
|
|
|
|
* SW can choose to set this bit only for DX9 API. DX10/OGL API's
|
|
|
|
|
* should not have any effect by setting or not setting this bit.
|
|
|
|
|
*/
|
|
|
|
|
ms.PixelPositionOffsetEnable = false;
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
switch (samples) {
|
|
|
|
|
case 1:
|
|
|
|
|
GEN_SAMPLE_POS_1X(ms.Sample);
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
|
|
|
|
GEN_SAMPLE_POS_2X(ms.Sample);
|
|
|
|
|
break;
|
|
|
|
|
case 4:
|
|
|
|
|
GEN_SAMPLE_POS_4X(ms.Sample);
|
|
|
|
|
break;
|
|
|
|
|
case 8:
|
|
|
|
|
GEN_SAMPLE_POS_8X(ms.Sample);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
|
|
|
|
|
sm.SampleMask = sample_mask;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const uint32_t vk_to_gen_logic_op[] = {
|
|
|
|
|
[VK_LOGIC_OP_COPY] = LOGICOP_COPY,
|
|
|
|
|
[VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
|
|
|
|
|
[VK_LOGIC_OP_AND] = LOGICOP_AND,
|
|
|
|
|
[VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
|
|
|
|
|
[VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
|
|
|
|
|
[VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
|
|
|
|
|
[VK_LOGIC_OP_XOR] = LOGICOP_XOR,
|
|
|
|
|
[VK_LOGIC_OP_OR] = LOGICOP_OR,
|
|
|
|
|
[VK_LOGIC_OP_NOR] = LOGICOP_NOR,
|
|
|
|
|
[VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
|
|
|
|
|
[VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
|
|
|
|
|
[VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
|
|
|
|
|
[VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
|
|
|
|
|
[VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
|
|
|
|
|
[VK_LOGIC_OP_NAND] = LOGICOP_NAND,
|
|
|
|
|
[VK_LOGIC_OP_SET] = LOGICOP_SET,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const uint32_t vk_to_gen_blend[] = {
|
|
|
|
|
[VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
|
|
|
|
|
[VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
|
|
|
|
|
[VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
|
|
|
|
|
[VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
|
|
|
|
|
[VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
|
|
|
|
|
[VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
|
|
|
|
|
[VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
|
|
|
|
|
[VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
|
|
|
|
|
[VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
|
|
|
|
|
[VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
|
|
|
|
|
[VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
|
|
|
|
|
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
|
|
|
|
|
[VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
|
|
|
|
|
[VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
|
|
|
|
|
[VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
|
|
|
|
|
[VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
|
|
|
|
|
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
|
|
|
|
|
[VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
|
|
|
|
|
[VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const uint32_t vk_to_gen_blend_op[] = {
|
|
|
|
|
[VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
|
|
|
|
|
[VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
|
|
|
|
|
[VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
|
|
|
|
|
[VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
|
|
|
|
|
[VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const uint32_t vk_to_gen_compare_op[] = {
|
|
|
|
|
[VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
|
|
|
|
|
[VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
|
|
|
|
|
[VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
|
|
|
|
|
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL,
|
|
|
|
|
[VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
|
|
|
|
|
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
|
|
|
|
|
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL,
|
|
|
|
|
[VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const uint32_t vk_to_gen_stencil_op[] = {
|
|
|
|
|
[VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
|
|
|
|
|
[VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
|
|
|
|
|
[VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
|
|
|
|
|
[VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
|
|
|
|
|
[VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
|
|
|
|
|
[VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
|
|
|
|
|
[VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
|
|
|
|
|
[VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
|
|
|
|
|
};
|
|
|
|
|
|
2016-12-08 17:39:14 -08:00
|
|
|
/* This function sanitizes the VkStencilOpState by looking at the compare ops
|
|
|
|
|
* and trying to determine whether or not a given stencil op can ever actually
|
|
|
|
|
* occur. Stencil ops which can never occur are set to VK_STENCIL_OP_KEEP.
|
|
|
|
|
* This function returns true if, after sanitation, any of the stencil ops are
|
|
|
|
|
* set to something other than VK_STENCIL_OP_KEEP.
|
|
|
|
|
*/
|
|
|
|
|
static bool
|
|
|
|
|
sanitize_stencil_face(VkStencilOpState *face,
|
|
|
|
|
VkCompareOp depthCompareOp)
|
|
|
|
|
{
|
|
|
|
|
/* If compareOp is ALWAYS then the stencil test will never fail and failOp
|
|
|
|
|
* will never happen. Set failOp to KEEP in this case.
|
|
|
|
|
*/
|
|
|
|
|
if (face->compareOp == VK_COMPARE_OP_ALWAYS)
|
|
|
|
|
face->failOp = VK_STENCIL_OP_KEEP;
|
|
|
|
|
|
|
|
|
|
/* If compareOp is NEVER or depthCompareOp is NEVER then one of the depth
|
|
|
|
|
* or stencil tests will fail and passOp will never happen.
|
|
|
|
|
*/
|
|
|
|
|
if (face->compareOp == VK_COMPARE_OP_NEVER ||
|
|
|
|
|
depthCompareOp == VK_COMPARE_OP_NEVER)
|
|
|
|
|
face->passOp = VK_STENCIL_OP_KEEP;
|
|
|
|
|
|
|
|
|
|
/* If compareOp is NEVER or depthCompareOp is ALWAYS then either the
|
|
|
|
|
* stencil test will fail or the depth test will pass. In either case,
|
|
|
|
|
* depthFailOp will never happen.
|
|
|
|
|
*/
|
|
|
|
|
if (face->compareOp == VK_COMPARE_OP_NEVER ||
|
|
|
|
|
depthCompareOp == VK_COMPARE_OP_ALWAYS)
|
|
|
|
|
face->depthFailOp = VK_STENCIL_OP_KEEP;
|
|
|
|
|
|
|
|
|
|
return face->failOp != VK_STENCIL_OP_KEEP ||
|
|
|
|
|
face->depthFailOp != VK_STENCIL_OP_KEEP ||
|
|
|
|
|
face->passOp != VK_STENCIL_OP_KEEP;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Intel hardware is fairly sensitive to whether or not depth/stencil writes
|
|
|
|
|
* are enabled. In the presence of discards, it's fairly easy to get into the
|
|
|
|
|
* non-promoted case which means a fairly big performance hit. From the Iron
|
|
|
|
|
* Lake PRM, Vol 2, pt. 1, section 8.4.3.2, "Early Depth Test Cases":
|
|
|
|
|
*
|
|
|
|
|
* "Non-promoted depth (N) is active whenever the depth test can be done
|
|
|
|
|
* early but it cannot determine whether or not to write source depth to
|
|
|
|
|
* the depth buffer, therefore the depth write must be performed post pixel
|
|
|
|
|
* shader. This includes cases where the pixel shader can kill pixels,
|
|
|
|
|
* including via sampler chroma key, as well as cases where the alpha test
|
|
|
|
|
* function is enabled, which kills pixels based on a programmable alpha
|
|
|
|
|
* test. In this case, even if the depth test fails, the pixel cannot be
|
|
|
|
|
* killed if a stencil write is indicated. Whether or not the stencil write
|
|
|
|
|
* happens depends on whether or not the pixel is killed later. In these
|
|
|
|
|
* cases if stencil test fails and stencil writes are off, the pixels can
|
|
|
|
|
* also be killed early. If stencil writes are enabled, the pixels must be
|
|
|
|
|
* treated as Computed depth (described above)."
|
|
|
|
|
*
|
|
|
|
|
* The same thing as mentioned in the stencil case can happen in the depth
|
|
|
|
|
* case as well if it thinks it writes depth but, thanks to the depth test
|
|
|
|
|
* being GL_EQUAL, the write doesn't actually matter. A little extra work
|
|
|
|
|
* up-front to try and disable depth and stencil writes can make a big
|
|
|
|
|
* difference.
|
|
|
|
|
*
|
|
|
|
|
* Unfortunately, the way depth and stencil testing is specified, there are
|
|
|
|
|
* many case where, regardless of depth/stencil writes being enabled, nothing
|
|
|
|
|
* actually gets written due to some other bit of state being set. This
|
|
|
|
|
* function attempts to "sanitize" the depth stencil state and disable writes
|
|
|
|
|
* and sometimes even testing whenever possible.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
sanitize_ds_state(VkPipelineDepthStencilStateCreateInfo *state,
|
|
|
|
|
bool *stencilWriteEnable,
|
|
|
|
|
VkImageAspectFlags ds_aspects)
|
|
|
|
|
{
|
|
|
|
|
*stencilWriteEnable = state->stencilTestEnable;
|
|
|
|
|
|
|
|
|
|
/* If the depth test is disabled, we won't be writing anything. */
|
|
|
|
|
if (!state->depthTestEnable)
|
|
|
|
|
state->depthWriteEnable = false;
|
|
|
|
|
|
|
|
|
|
/* The Vulkan spec requires that if either depth or stencil is not present,
|
|
|
|
|
* the pipeline is to act as if the test silently passes.
|
|
|
|
|
*/
|
|
|
|
|
if (!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
|
|
|
|
|
state->depthWriteEnable = false;
|
|
|
|
|
state->depthCompareOp = VK_COMPARE_OP_ALWAYS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
|
|
|
|
*stencilWriteEnable = false;
|
|
|
|
|
state->front.compareOp = VK_COMPARE_OP_ALWAYS;
|
|
|
|
|
state->back.compareOp = VK_COMPARE_OP_ALWAYS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If the stencil test is enabled and always fails, then we will never get
|
|
|
|
|
* to the depth test so we can just disable the depth test entirely.
|
|
|
|
|
*/
|
|
|
|
|
if (state->stencilTestEnable &&
|
|
|
|
|
state->front.compareOp == VK_COMPARE_OP_NEVER &&
|
|
|
|
|
state->back.compareOp == VK_COMPARE_OP_NEVER) {
|
|
|
|
|
state->depthTestEnable = false;
|
|
|
|
|
state->depthWriteEnable = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If depthCompareOp is EQUAL then the value we would be writing to the
|
|
|
|
|
* depth buffer is the same as the value that's already there so there's no
|
|
|
|
|
* point in writing it.
|
|
|
|
|
*/
|
|
|
|
|
if (state->depthCompareOp == VK_COMPARE_OP_EQUAL)
|
|
|
|
|
state->depthWriteEnable = false;
|
|
|
|
|
|
|
|
|
|
/* If the stencil ops are such that we don't actually ever modify the
|
|
|
|
|
* stencil buffer, we should disable writes.
|
|
|
|
|
*/
|
|
|
|
|
if (!sanitize_stencil_face(&state->front, state->depthCompareOp) &&
|
|
|
|
|
!sanitize_stencil_face(&state->back, state->depthCompareOp))
|
|
|
|
|
*stencilWriteEnable = false;
|
|
|
|
|
|
|
|
|
|
/* If the depth test always passes and we never write out depth, that's the
|
|
|
|
|
* same as if the depth test is disabled entirely.
|
|
|
|
|
*/
|
|
|
|
|
if (state->depthCompareOp == VK_COMPARE_OP_ALWAYS &&
|
|
|
|
|
!state->depthWriteEnable)
|
|
|
|
|
state->depthTestEnable = false;
|
|
|
|
|
|
|
|
|
|
/* If the stencil test always passes and we never write out stencil, that's
|
|
|
|
|
* the same as if the stencil test is disabled entirely.
|
|
|
|
|
*/
|
|
|
|
|
if (state->front.compareOp == VK_COMPARE_OP_ALWAYS &&
|
|
|
|
|
state->back.compareOp == VK_COMPARE_OP_ALWAYS &&
|
|
|
|
|
!*stencilWriteEnable)
|
|
|
|
|
state->stencilTestEnable = false;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
static void
|
|
|
|
|
emit_ds_state(struct anv_pipeline *pipeline,
|
2016-12-08 18:34:24 -08:00
|
|
|
const VkPipelineDepthStencilStateCreateInfo *pCreateInfo,
|
2016-11-12 11:42:09 -08:00
|
|
|
const struct anv_render_pass *pass,
|
|
|
|
|
const struct anv_subpass *subpass)
|
|
|
|
|
{
|
|
|
|
|
#if GEN_GEN == 7
|
|
|
|
|
# define depth_stencil_dw pipeline->gen7.depth_stencil_state
|
|
|
|
|
#elif GEN_GEN == 8
|
|
|
|
|
# define depth_stencil_dw pipeline->gen8.wm_depth_stencil
|
|
|
|
|
#else
|
|
|
|
|
# define depth_stencil_dw pipeline->gen9.wm_depth_stencil
|
|
|
|
|
#endif
|
|
|
|
|
|
2016-12-08 18:34:24 -08:00
|
|
|
if (pCreateInfo == NULL) {
|
2016-11-12 11:42:09 -08:00
|
|
|
/* We're going to OR this together with the dynamic state. We need
|
|
|
|
|
* to make sure it's initialized to something useful.
|
|
|
|
|
*/
|
2016-12-07 20:31:12 -08:00
|
|
|
pipeline->writes_stencil = false;
|
2017-02-01 16:41:04 -08:00
|
|
|
pipeline->stencil_test_enable = false;
|
2016-12-06 17:52:14 -08:00
|
|
|
pipeline->writes_depth = false;
|
|
|
|
|
pipeline->depth_test_enable = false;
|
2016-11-12 11:42:09 -08:00
|
|
|
memset(depth_stencil_dw, 0, sizeof(depth_stencil_dw));
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-08 17:39:14 -08:00
|
|
|
VkImageAspectFlags ds_aspects = 0;
|
2017-01-31 16:12:50 -08:00
|
|
|
if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
|
2016-12-08 17:39:14 -08:00
|
|
|
VkFormat depth_stencil_format =
|
2017-01-31 16:12:50 -08:00
|
|
|
pass->attachments[subpass->depth_stencil_attachment.attachment].format;
|
2016-12-08 17:39:14 -08:00
|
|
|
ds_aspects = vk_format_aspects(depth_stencil_format);
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-08 18:34:24 -08:00
|
|
|
VkPipelineDepthStencilStateCreateInfo info = *pCreateInfo;
|
2016-12-08 17:39:14 -08:00
|
|
|
sanitize_ds_state(&info, &pipeline->writes_stencil, ds_aspects);
|
2017-02-01 16:41:04 -08:00
|
|
|
pipeline->stencil_test_enable = info.stencilTestEnable;
|
2016-12-08 17:39:14 -08:00
|
|
|
pipeline->writes_depth = info.depthWriteEnable;
|
|
|
|
|
pipeline->depth_test_enable = info.depthTestEnable;
|
2016-12-08 18:34:24 -08:00
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
/* VkBool32 depthBoundsTestEnable; // optional (depth_bounds_test) */
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN <= 7
|
|
|
|
|
struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
|
|
|
|
|
#else
|
|
|
|
|
struct GENX(3DSTATE_WM_DEPTH_STENCIL) depth_stencil = {
|
|
|
|
|
#endif
|
2016-12-08 18:34:24 -08:00
|
|
|
.DepthTestEnable = info.depthTestEnable,
|
|
|
|
|
.DepthBufferWriteEnable = info.depthWriteEnable,
|
|
|
|
|
.DepthTestFunction = vk_to_gen_compare_op[info.depthCompareOp],
|
2016-11-12 11:42:09 -08:00
|
|
|
.DoubleSidedStencilEnable = true,
|
|
|
|
|
|
2016-12-08 18:34:24 -08:00
|
|
|
.StencilTestEnable = info.stencilTestEnable,
|
|
|
|
|
.StencilFailOp = vk_to_gen_stencil_op[info.front.failOp],
|
|
|
|
|
.StencilPassDepthPassOp = vk_to_gen_stencil_op[info.front.passOp],
|
|
|
|
|
.StencilPassDepthFailOp = vk_to_gen_stencil_op[info.front.depthFailOp],
|
|
|
|
|
.StencilTestFunction = vk_to_gen_compare_op[info.front.compareOp],
|
|
|
|
|
.BackfaceStencilFailOp = vk_to_gen_stencil_op[info.back.failOp],
|
|
|
|
|
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info.back.passOp],
|
|
|
|
|
.BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info.back.depthFailOp],
|
|
|
|
|
.BackfaceStencilTestFunction = vk_to_gen_compare_op[info.back.compareOp],
|
2016-11-12 11:42:09 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN <= 7
|
|
|
|
|
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
|
|
|
|
|
#else
|
|
|
|
|
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, depth_stencil_dw, &depth_stencil);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_cb_state(struct anv_pipeline *pipeline,
|
|
|
|
|
const VkPipelineColorBlendStateCreateInfo *info,
|
|
|
|
|
const VkPipelineMultisampleStateCreateInfo *ms_info)
|
|
|
|
|
{
|
|
|
|
|
struct anv_device *device = pipeline->device;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct GENX(BLEND_STATE) blend_state = {
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
.AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
|
|
|
|
|
.AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
|
|
|
|
|
#endif
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
uint32_t surface_count = 0;
|
|
|
|
|
struct anv_pipeline_bind_map *map;
|
|
|
|
|
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
|
|
|
|
map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
|
|
|
|
|
surface_count = map->surface_count;
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-30 11:33:05 -07:00
|
|
|
const uint32_t num_dwords = GENX(BLEND_STATE_length) +
|
|
|
|
|
GENX(BLEND_STATE_ENTRY_length) * surface_count;
|
|
|
|
|
pipeline->blend_state =
|
|
|
|
|
anv_state_pool_alloc(&device->dynamic_state_pool, num_dwords * 4, 64);
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
bool has_writeable_rt = false;
|
2017-03-30 11:33:05 -07:00
|
|
|
uint32_t *state_pos = pipeline->blend_state.map;
|
|
|
|
|
state_pos += GENX(BLEND_STATE_length);
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
|
|
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
for (unsigned i = 0; i < surface_count; i++) {
|
|
|
|
|
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
|
|
|
|
|
|
|
|
|
|
/* All color attachments are at the beginning of the binding table */
|
|
|
|
|
if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* We can have at most 8 attachments */
|
|
|
|
|
assert(i < 8);
|
|
|
|
|
|
2017-03-30 11:33:05 -07:00
|
|
|
if (info == NULL || binding->index >= info->attachmentCount) {
|
|
|
|
|
/* Default everything to disabled */
|
|
|
|
|
struct GENX(BLEND_STATE_ENTRY) entry = {
|
|
|
|
|
.WriteDisableAlpha = true,
|
|
|
|
|
.WriteDisableRed = true,
|
|
|
|
|
.WriteDisableGreen = true,
|
|
|
|
|
.WriteDisableBlue = true,
|
|
|
|
|
};
|
|
|
|
|
GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
|
|
|
|
|
state_pos += GENX(BLEND_STATE_ENTRY_length);
|
2016-11-12 11:42:09 -08:00
|
|
|
continue;
|
2017-03-30 11:33:05 -07:00
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
assert(binding->binding == 0);
|
|
|
|
|
const VkPipelineColorBlendAttachmentState *a =
|
|
|
|
|
&info->pAttachments[binding->index];
|
|
|
|
|
|
2017-03-30 11:33:05 -07:00
|
|
|
struct GENX(BLEND_STATE_ENTRY) entry = {
|
2016-11-12 11:42:09 -08:00
|
|
|
#if GEN_GEN < 8
|
|
|
|
|
.AlphaToCoverageEnable = ms_info && ms_info->alphaToCoverageEnable,
|
|
|
|
|
.AlphaToOneEnable = ms_info && ms_info->alphaToOneEnable,
|
|
|
|
|
#endif
|
|
|
|
|
.LogicOpEnable = info->logicOpEnable,
|
|
|
|
|
.LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
|
|
|
|
|
.ColorBufferBlendEnable = a->blendEnable,
|
|
|
|
|
.ColorClampRange = COLORCLAMP_RTFORMAT,
|
|
|
|
|
.PreBlendColorClampEnable = true,
|
|
|
|
|
.PostBlendColorClampEnable = true,
|
|
|
|
|
.SourceBlendFactor = vk_to_gen_blend[a->srcColorBlendFactor],
|
|
|
|
|
.DestinationBlendFactor = vk_to_gen_blend[a->dstColorBlendFactor],
|
|
|
|
|
.ColorBlendFunction = vk_to_gen_blend_op[a->colorBlendOp],
|
|
|
|
|
.SourceAlphaBlendFactor = vk_to_gen_blend[a->srcAlphaBlendFactor],
|
|
|
|
|
.DestinationAlphaBlendFactor = vk_to_gen_blend[a->dstAlphaBlendFactor],
|
|
|
|
|
.AlphaBlendFunction = vk_to_gen_blend_op[a->alphaBlendOp],
|
|
|
|
|
.WriteDisableAlpha = !(a->colorWriteMask & VK_COLOR_COMPONENT_A_BIT),
|
|
|
|
|
.WriteDisableRed = !(a->colorWriteMask & VK_COLOR_COMPONENT_R_BIT),
|
|
|
|
|
.WriteDisableGreen = !(a->colorWriteMask & VK_COLOR_COMPONENT_G_BIT),
|
|
|
|
|
.WriteDisableBlue = !(a->colorWriteMask & VK_COLOR_COMPONENT_B_BIT),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (a->srcColorBlendFactor != a->srcAlphaBlendFactor ||
|
|
|
|
|
a->dstColorBlendFactor != a->dstAlphaBlendFactor ||
|
|
|
|
|
a->colorBlendOp != a->alphaBlendOp) {
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
blend_state.IndependentAlphaBlendEnable = true;
|
|
|
|
|
#else
|
2017-03-30 11:33:05 -07:00
|
|
|
entry.IndependentAlphaBlendEnable = true;
|
2016-11-12 11:42:09 -08:00
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (a->colorWriteMask != 0)
|
|
|
|
|
has_writeable_rt = true;
|
|
|
|
|
|
|
|
|
|
/* Our hardware applies the blend factor prior to the blend function
|
|
|
|
|
* regardless of what function is used. Technically, this means the
|
|
|
|
|
* hardware can do MORE than GL or Vulkan specify. However, it also
|
|
|
|
|
* means that, for MIN and MAX, we have to stomp the blend factor to
|
|
|
|
|
* ONE to make it a no-op.
|
|
|
|
|
*/
|
|
|
|
|
if (a->colorBlendOp == VK_BLEND_OP_MIN ||
|
|
|
|
|
a->colorBlendOp == VK_BLEND_OP_MAX) {
|
2017-03-30 11:33:05 -07:00
|
|
|
entry.SourceBlendFactor = BLENDFACTOR_ONE;
|
|
|
|
|
entry.DestinationBlendFactor = BLENDFACTOR_ONE;
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
if (a->alphaBlendOp == VK_BLEND_OP_MIN ||
|
|
|
|
|
a->alphaBlendOp == VK_BLEND_OP_MAX) {
|
2017-03-30 11:33:05 -07:00
|
|
|
entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
|
|
|
|
|
entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
2017-03-30 11:33:05 -07:00
|
|
|
GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
|
|
|
|
|
state_pos += GENX(BLEND_STATE_ENTRY_length);
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
if (i == 0)
|
|
|
|
|
bs0 = entry;
|
|
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_BLEND), blend) {
|
|
|
|
|
blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
|
|
|
|
|
blend.HasWriteableRT = has_writeable_rt;
|
2017-03-30 11:33:05 -07:00
|
|
|
blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
|
|
|
|
|
blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor;
|
|
|
|
|
blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor;
|
|
|
|
|
blend.SourceBlendFactor = bs0.SourceBlendFactor;
|
|
|
|
|
blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
|
2016-11-12 11:42:09 -08:00
|
|
|
blend.AlphaTestEnable = false;
|
|
|
|
|
blend.IndependentAlphaBlendEnable =
|
|
|
|
|
blend_state.IndependentAlphaBlendEnable;
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
(void)has_writeable_rt;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
GENX(BLEND_STATE_pack)(NULL, pipeline->blend_state.map, &blend_state);
|
2017-02-20 11:04:12 -08:00
|
|
|
anv_state_flush(device, pipeline->blend_state);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
|
|
|
|
|
bsp.BlendStatePointer = pipeline->blend_state.offset;
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
bsp.BlendStatePointerValid = true;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_3dstate_clip(struct anv_pipeline *pipeline,
|
|
|
|
|
const VkPipelineViewportStateCreateInfo *vp_info,
|
|
|
|
|
const VkPipelineRasterizationStateCreateInfo *rs_info)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
(void) wm_prog_data;
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_CLIP), clip) {
|
|
|
|
|
clip.ClipEnable = true;
|
2017-03-16 12:32:16 -07:00
|
|
|
clip.StatisticsEnable = true;
|
2016-11-12 11:42:09 -08:00
|
|
|
clip.EarlyCullEnable = true;
|
|
|
|
|
clip.APIMode = APIMODE_D3D,
|
|
|
|
|
clip.ViewportXYClipTestEnable = true;
|
|
|
|
|
|
|
|
|
|
clip.ClipMode = CLIPMODE_NORMAL;
|
|
|
|
|
|
|
|
|
|
clip.TriangleStripListProvokingVertexSelect = 0;
|
|
|
|
|
clip.LineStripListProvokingVertexSelect = 0;
|
|
|
|
|
clip.TriangleFanProvokingVertexSelect = 1;
|
|
|
|
|
|
|
|
|
|
clip.MinimumPointWidth = 0.125;
|
|
|
|
|
clip.MaximumPointWidth = 255.875;
|
2017-04-03 12:25:15 -07:00
|
|
|
|
|
|
|
|
const struct brw_vue_prog_data *last =
|
|
|
|
|
anv_pipeline_get_last_vue_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
/* From the Vulkan 1.0.45 spec:
|
|
|
|
|
*
|
|
|
|
|
* "If the last active vertex processing stage shader entry point's
|
|
|
|
|
* interface does not include a variable decorated with
|
|
|
|
|
* ViewportIndex, then the first viewport is used."
|
|
|
|
|
*/
|
|
|
|
|
if (vp_info && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
|
|
|
|
|
clip.MaximumVPIndex = vp_info->viewportCount - 1;
|
|
|
|
|
} else {
|
|
|
|
|
clip.MaximumVPIndex = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* From the Vulkan 1.0.45 spec:
|
|
|
|
|
*
|
|
|
|
|
* "If the last active vertex processing stage shader entry point's
|
|
|
|
|
* interface does not include a variable decorated with Layer, then
|
|
|
|
|
* the first layer is used."
|
|
|
|
|
*/
|
|
|
|
|
clip.ForceZeroRTAIndexEnable =
|
|
|
|
|
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
#if GEN_GEN == 7
|
|
|
|
|
clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace];
|
|
|
|
|
clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode];
|
|
|
|
|
clip.ViewportZClipTestEnable = !pipeline->depth_clamp_enable;
|
2016-10-03 23:44:07 -07:00
|
|
|
if (last) {
|
|
|
|
|
clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
|
|
|
|
|
clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
|
|
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
#else
|
|
|
|
|
clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
|
2017-04-24 11:25:07 -07:00
|
|
|
(wm_prog_data->barycentric_interp_modes &
|
|
|
|
|
BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) != 0 : 0;
|
2016-11-12 11:42:09 -08:00
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_3dstate_streamout(struct anv_pipeline *pipeline,
|
|
|
|
|
const VkPipelineRasterizationStateCreateInfo *rs_info)
|
|
|
|
|
{
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_STREAMOUT), so) {
|
|
|
|
|
so.RenderingDisable = rs_info->rasterizerDiscardEnable;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-06 21:18:03 -07:00
|
|
|
static uint32_t
|
2016-11-12 11:42:09 -08:00
|
|
|
get_sampler_count(const struct anv_shader_bin *bin)
|
|
|
|
|
{
|
|
|
|
|
return DIV_ROUND_UP(bin->bind_map.sampler_count, 4);
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-06 21:18:03 -07:00
|
|
|
static uint32_t
|
2016-11-12 11:42:09 -08:00
|
|
|
get_binding_table_entry_count(const struct anv_shader_bin *bin)
|
|
|
|
|
{
|
|
|
|
|
return DIV_ROUND_UP(bin->bind_map.surface_count, 32);
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-06 21:18:03 -07:00
|
|
|
static struct anv_address
|
2016-11-12 11:42:09 -08:00
|
|
|
get_scratch_address(struct anv_pipeline *pipeline,
|
|
|
|
|
gl_shader_stage stage,
|
|
|
|
|
const struct anv_shader_bin *bin)
|
|
|
|
|
{
|
|
|
|
|
return (struct anv_address) {
|
|
|
|
|
.bo = anv_scratch_pool_alloc(pipeline->device,
|
|
|
|
|
&pipeline->device->scratch_pool,
|
|
|
|
|
stage, bin->prog_data->total_scratch),
|
|
|
|
|
.offset = 0,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-06 21:18:03 -07:00
|
|
|
static uint32_t
|
2016-11-12 11:42:09 -08:00
|
|
|
get_scratch_space(const struct anv_shader_bin *bin)
|
|
|
|
|
{
|
|
|
|
|
return ffs(bin->prog_data->total_scratch / 2048);
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-06 21:18:03 -07:00
|
|
|
static uint32_t
|
2016-11-12 11:42:09 -08:00
|
|
|
get_urb_output_offset()
|
|
|
|
|
{
|
|
|
|
|
/* Skip the VUE header and position slots */
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-25 20:40:01 -07:00
|
|
|
UNUSED static uint32_t
|
2016-11-12 11:42:09 -08:00
|
|
|
get_urb_output_length(const struct anv_shader_bin *bin)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_vue_prog_data *prog_data =
|
|
|
|
|
(const struct brw_vue_prog_data *)bin->prog_data;
|
|
|
|
|
|
|
|
|
|
return (prog_data->vue_map.num_slots + 1) / 2 - get_urb_output_offset();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_3dstate_vs(struct anv_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
const struct gen_device_info *devinfo = &pipeline->device->info;
|
|
|
|
|
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
|
|
|
|
const struct anv_shader_bin *vs_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_VERTEX];
|
|
|
|
|
|
|
|
|
|
assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX));
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) {
|
2017-03-22 16:26:07 -07:00
|
|
|
vs.Enable = true;
|
2016-11-12 11:42:09 -08:00
|
|
|
vs.StatisticsEnable = true;
|
|
|
|
|
vs.KernelStartPointer = vs_bin->kernel.offset;
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
vs.SIMD8DispatchEnable =
|
|
|
|
|
vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
assert(!vs_prog_data->base.base.use_alt_mode);
|
|
|
|
|
vs.SingleVertexDispatch = false;
|
|
|
|
|
vs.VectorMaskEnable = false;
|
|
|
|
|
vs.SamplerCount = get_sampler_count(vs_bin);
|
|
|
|
|
vs.BindingTableEntryCount = get_binding_table_entry_count(vs_bin);
|
|
|
|
|
vs.FloatingPointMode = IEEE754;
|
|
|
|
|
vs.IllegalOpcodeExceptionEnable = false;
|
|
|
|
|
vs.SoftwareExceptionEnable = false;
|
|
|
|
|
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
|
|
|
|
|
vs.VertexCacheDisable = false;
|
|
|
|
|
|
|
|
|
|
vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
|
|
|
|
|
vs.VertexURBEntryReadOffset = 0;
|
|
|
|
|
vs.DispatchGRFStartRegisterForURBData =
|
|
|
|
|
vs_prog_data->base.base.dispatch_grf_start_reg;
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
vs.VertexURBEntryOutputReadOffset = get_urb_output_offset();
|
|
|
|
|
vs.VertexURBEntryOutputLength = get_urb_output_length(vs_bin);
|
|
|
|
|
|
2016-10-03 23:44:07 -07:00
|
|
|
vs.UserClipDistanceClipTestEnableBitmask =
|
|
|
|
|
vs_prog_data->base.clip_distance_mask;
|
|
|
|
|
vs.UserClipDistanceCullTestEnableBitmask =
|
|
|
|
|
vs_prog_data->base.cull_distance_mask;
|
2016-11-12 11:42:09 -08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
vs.PerThreadScratchSpace = get_scratch_space(vs_bin);
|
|
|
|
|
vs.ScratchSpaceBasePointer =
|
|
|
|
|
get_scratch_address(pipeline, MESA_SHADER_VERTEX, vs_bin);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-21 00:45:57 -08:00
|
|
|
static void
|
|
|
|
|
emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs);
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te);
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), ds);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct gen_device_info *devinfo = &pipeline->device->info;
|
|
|
|
|
const struct anv_shader_bin *tcs_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_TESS_CTRL];
|
|
|
|
|
const struct anv_shader_bin *tes_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_TESS_EVAL];
|
|
|
|
|
|
|
|
|
|
const struct brw_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline);
|
|
|
|
|
const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs) {
|
2017-03-31 15:30:49 -07:00
|
|
|
hs.Enable = true;
|
2016-11-21 00:45:57 -08:00
|
|
|
hs.StatisticsEnable = true;
|
|
|
|
|
hs.KernelStartPointer = tcs_bin->kernel.offset;
|
|
|
|
|
|
|
|
|
|
hs.SamplerCount = get_sampler_count(tcs_bin);
|
|
|
|
|
hs.BindingTableEntryCount = get_binding_table_entry_count(tcs_bin);
|
|
|
|
|
hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
|
|
|
|
|
hs.IncludeVertexHandles = true;
|
|
|
|
|
hs.InstanceCount = tcs_prog_data->instances - 1;
|
|
|
|
|
|
|
|
|
|
hs.VertexURBEntryReadLength = 0;
|
|
|
|
|
hs.VertexURBEntryReadOffset = 0;
|
|
|
|
|
hs.DispatchGRFStartRegisterForURBData =
|
|
|
|
|
tcs_prog_data->base.base.dispatch_grf_start_reg;
|
|
|
|
|
|
|
|
|
|
hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
|
|
|
|
|
hs.ScratchSpaceBasePointer =
|
|
|
|
|
get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te) {
|
|
|
|
|
te.Partitioning = tes_prog_data->partitioning;
|
|
|
|
|
te.OutputTopology = tes_prog_data->output_topology;
|
|
|
|
|
te.TEDomain = tes_prog_data->domain;
|
|
|
|
|
te.TEEnable = true;
|
|
|
|
|
te.MaximumTessellationFactorOdd = 63.0;
|
|
|
|
|
te.MaximumTessellationFactorNotOdd = 64.0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), ds) {
|
2017-03-31 15:30:49 -07:00
|
|
|
ds.Enable = true;
|
2016-11-21 00:45:57 -08:00
|
|
|
ds.StatisticsEnable = true;
|
|
|
|
|
ds.KernelStartPointer = tes_bin->kernel.offset;
|
|
|
|
|
|
|
|
|
|
ds.SamplerCount = get_sampler_count(tes_bin);
|
|
|
|
|
ds.BindingTableEntryCount = get_binding_table_entry_count(tes_bin);
|
|
|
|
|
ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
|
|
|
|
|
|
|
|
|
|
ds.ComputeWCoordinateEnable =
|
|
|
|
|
tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
|
|
|
|
|
|
|
|
|
|
ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length;
|
|
|
|
|
ds.PatchURBEntryReadOffset = 0;
|
|
|
|
|
ds.DispatchGRFStartRegisterForURBData =
|
|
|
|
|
tes_prog_data->base.base.dispatch_grf_start_reg;
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
ds.VertexURBEntryOutputReadOffset = 1;
|
|
|
|
|
ds.VertexURBEntryOutputLength =
|
|
|
|
|
(tes_prog_data->base.vue_map.num_slots + 1) / 2 - 1;
|
|
|
|
|
|
|
|
|
|
ds.DispatchMode =
|
|
|
|
|
tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ?
|
|
|
|
|
DISPATCH_MODE_SIMD8_SINGLE_PATCH :
|
|
|
|
|
DISPATCH_MODE_SIMD4X2;
|
|
|
|
|
|
|
|
|
|
ds.UserClipDistanceClipTestEnableBitmask =
|
|
|
|
|
tes_prog_data->base.clip_distance_mask;
|
|
|
|
|
ds.UserClipDistanceCullTestEnableBitmask =
|
|
|
|
|
tes_prog_data->base.cull_distance_mask;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
|
|
|
|
|
ds.ScratchSpaceBasePointer =
|
|
|
|
|
get_scratch_address(pipeline, MESA_SHADER_TESS_EVAL, tes_bin);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
static void
|
|
|
|
|
emit_3dstate_gs(struct anv_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
const struct gen_device_info *devinfo = &pipeline->device->info;
|
|
|
|
|
const struct anv_shader_bin *gs_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_GEOMETRY];
|
|
|
|
|
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) {
|
2017-03-31 15:30:49 -07:00
|
|
|
gs.Enable = true;
|
2016-11-12 11:42:09 -08:00
|
|
|
gs.StatisticsEnable = true;
|
|
|
|
|
gs.KernelStartPointer = gs_bin->kernel.offset;
|
|
|
|
|
gs.DispatchMode = gs_prog_data->base.dispatch_mode;
|
|
|
|
|
|
|
|
|
|
gs.SingleProgramFlow = false;
|
|
|
|
|
gs.VectorMaskEnable = false;
|
|
|
|
|
gs.SamplerCount = get_sampler_count(gs_bin);
|
|
|
|
|
gs.BindingTableEntryCount = get_binding_table_entry_count(gs_bin);
|
|
|
|
|
gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
|
|
|
|
|
gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
|
|
|
|
|
|
|
|
|
|
if (GEN_GEN == 8) {
|
|
|
|
|
/* Broadwell is weird. It needs us to divide by 2. */
|
|
|
|
|
gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1;
|
|
|
|
|
} else {
|
|
|
|
|
gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
|
|
|
|
|
gs.OutputTopology = gs_prog_data->output_topology;
|
|
|
|
|
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
|
|
|
|
|
gs.ControlDataFormat = gs_prog_data->control_data_format;
|
|
|
|
|
gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
|
|
|
|
|
gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
|
|
|
|
|
gs.ReorderMode = TRAILING;
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
|
|
|
|
|
gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
|
|
|
|
|
gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
|
|
|
|
|
gs_prog_data->static_vertex_count : 0;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
gs.VertexURBEntryReadOffset = 0;
|
|
|
|
|
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
|
|
|
|
|
gs.DispatchGRFStartRegisterForURBData =
|
|
|
|
|
gs_prog_data->base.base.dispatch_grf_start_reg;
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
gs.VertexURBEntryOutputReadOffset = get_urb_output_offset();
|
|
|
|
|
gs.VertexURBEntryOutputLength = get_urb_output_length(gs_bin);
|
|
|
|
|
|
2016-10-03 23:44:07 -07:00
|
|
|
gs.UserClipDistanceClipTestEnableBitmask =
|
|
|
|
|
gs_prog_data->base.clip_distance_mask;
|
|
|
|
|
gs.UserClipDistanceCullTestEnableBitmask =
|
|
|
|
|
gs_prog_data->base.cull_distance_mask;
|
2016-11-12 11:42:09 -08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
gs.PerThreadScratchSpace = get_scratch_space(gs_bin);
|
|
|
|
|
gs.ScratchSpaceBasePointer =
|
|
|
|
|
get_scratch_address(pipeline, MESA_SHADER_GEOMETRY, gs_bin);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-07-06 21:18:03 -07:00
|
|
|
static bool
|
2017-02-03 12:11:38 +01:00
|
|
|
has_color_buffer_write_enabled(const struct anv_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
const struct anv_shader_bin *shader_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_FRAGMENT];
|
|
|
|
|
if (!shader_bin)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map;
|
|
|
|
|
for (int i = 0; i < bind_map->surface_count; i++) {
|
|
|
|
|
if (bind_map->surface_to_descriptor[i].set !=
|
|
|
|
|
ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
|
|
|
|
|
continue;
|
|
|
|
|
if (bind_map->surface_to_descriptor[i].index != UINT8_MAX)
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
static void
|
2016-11-16 10:39:15 -08:00
|
|
|
emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass,
|
2016-11-12 11:42:09 -08:00
|
|
|
const VkPipelineMultisampleStateCreateInfo *multisample)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
MAYBE_UNUSED uint32_t samples =
|
|
|
|
|
multisample ? multisample->rasterizationSamples : 1;
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
|
|
|
|
|
wm.StatisticsEnable = true;
|
|
|
|
|
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
|
|
|
|
|
wm.LineAntialiasingRegionWidth = _10pixels;
|
|
|
|
|
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
|
|
|
|
|
|
|
|
|
|
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
|
|
|
|
if (wm_prog_data->early_fragment_tests) {
|
|
|
|
|
wm.EarlyDepthStencilControl = EDSC_PREPS;
|
|
|
|
|
} else if (wm_prog_data->has_side_effects) {
|
|
|
|
|
wm.EarlyDepthStencilControl = EDSC_PSEXEC;
|
|
|
|
|
} else {
|
|
|
|
|
wm.EarlyDepthStencilControl = EDSC_NORMAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
wm.BarycentricInterpolationMode =
|
|
|
|
|
wm_prog_data->barycentric_interp_modes;
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN < 8
|
|
|
|
|
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
|
|
|
|
wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
|
|
|
|
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
|
|
|
|
wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
|
|
|
|
|
|
2016-11-16 10:39:15 -08:00
|
|
|
/* If the subpass has a depth or stencil self-dependency, then we
|
|
|
|
|
* need to force the hardware to do the depth/stencil write *after*
|
|
|
|
|
* fragment shader execution. Otherwise, the writes may hit memory
|
|
|
|
|
* before we get around to fetching from the input attachment and we
|
|
|
|
|
* may get the depth or stencil value from the current draw rather
|
|
|
|
|
* than the previous one.
|
|
|
|
|
*/
|
|
|
|
|
wm.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
|
|
|
|
|
wm_prog_data->uses_kill;
|
|
|
|
|
|
2017-02-03 12:11:38 +01:00
|
|
|
if (wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
|
|
|
|
|
wm_prog_data->has_side_effects ||
|
|
|
|
|
wm.PixelShaderKillsPixel ||
|
|
|
|
|
has_color_buffer_write_enabled(pipeline))
|
|
|
|
|
wm.ThreadDispatchEnable = true;
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
if (samples > 1) {
|
|
|
|
|
wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
|
|
|
|
|
if (wm_prog_data->persample_dispatch) {
|
|
|
|
|
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
|
|
|
|
} else {
|
|
|
|
|
wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
|
|
|
|
|
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-25 20:40:01 -07:00
|
|
|
UNUSED static bool
|
2016-11-23 21:43:38 -05:00
|
|
|
is_dual_src_blend_factor(VkBlendFactor factor)
|
|
|
|
|
{
|
|
|
|
|
return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
|
|
|
|
|
factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
|
|
|
|
|
factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
|
|
|
|
|
factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
static void
|
2016-11-23 21:43:38 -05:00
|
|
|
emit_3dstate_ps(struct anv_pipeline *pipeline,
|
|
|
|
|
const VkPipelineColorBlendStateCreateInfo *blend)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
|
|
|
|
MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info;
|
|
|
|
|
const struct anv_shader_bin *fs_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_FRAGMENT];
|
|
|
|
|
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
|
|
|
|
|
#if GEN_GEN == 7
|
|
|
|
|
/* Even if no fragments are ever dispatched, gen7 hardware hangs if
|
|
|
|
|
* we don't at least set the maximum number of threads.
|
|
|
|
|
*/
|
|
|
|
|
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
2016-11-23 21:43:38 -05:00
|
|
|
#if GEN_GEN < 8
|
|
|
|
|
/* The hardware wedges if you have this bit set but don't turn on any dual
|
|
|
|
|
* source blend factors.
|
|
|
|
|
*/
|
|
|
|
|
bool dual_src_blend = false;
|
2017-03-18 19:20:17 +08:00
|
|
|
if (wm_prog_data->dual_src_blend && blend) {
|
2016-11-23 21:43:38 -05:00
|
|
|
for (uint32_t i = 0; i < blend->attachmentCount; i++) {
|
2016-11-25 22:26:50 -08:00
|
|
|
const VkPipelineColorBlendAttachmentState *bstate =
|
|
|
|
|
&blend->pAttachments[i];
|
|
|
|
|
|
2016-11-23 21:43:38 -05:00
|
|
|
if (bstate->blendEnable &&
|
|
|
|
|
(is_dual_src_blend_factor(bstate->srcColorBlendFactor) ||
|
|
|
|
|
is_dual_src_blend_factor(bstate->dstColorBlendFactor) ||
|
|
|
|
|
is_dual_src_blend_factor(bstate->srcAlphaBlendFactor) ||
|
|
|
|
|
is_dual_src_blend_factor(bstate->dstAlphaBlendFactor))) {
|
|
|
|
|
dual_src_blend = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
|
|
|
|
|
ps.KernelStartPointer0 = fs_bin->kernel.offset;
|
|
|
|
|
ps.KernelStartPointer1 = 0;
|
|
|
|
|
ps.KernelStartPointer2 = fs_bin->kernel.offset +
|
|
|
|
|
wm_prog_data->prog_offset_2;
|
|
|
|
|
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
|
|
|
|
|
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
|
|
|
|
|
ps._32PixelDispatchEnable = false;
|
|
|
|
|
|
|
|
|
|
ps.SingleProgramFlow = false;
|
|
|
|
|
ps.VectorMaskEnable = true;
|
|
|
|
|
ps.SamplerCount = get_sampler_count(fs_bin);
|
|
|
|
|
ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin);
|
|
|
|
|
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0;
|
|
|
|
|
ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
|
|
|
|
POSOFFSET_SAMPLE: POSOFFSET_NONE;
|
|
|
|
|
#if GEN_GEN < 8
|
|
|
|
|
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
|
|
|
|
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
2016-11-23 21:43:38 -05:00
|
|
|
ps.DualSourceBlendEnable = dual_src_blend;
|
2016-11-12 11:42:09 -08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if GEN_IS_HASWELL
|
|
|
|
|
/* Haswell requires the sample mask to be set in this packet as well
|
|
|
|
|
* as in 3DSTATE_SAMPLE_MASK; the values should match.
|
|
|
|
|
*/
|
|
|
|
|
ps.SampleMask = 0xff;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 9
|
|
|
|
|
ps.MaximumNumberofThreadsPerPSD = 64 - 1;
|
|
|
|
|
#elif GEN_GEN >= 8
|
|
|
|
|
ps.MaximumNumberofThreadsPerPSD = 64 - 2;
|
|
|
|
|
#else
|
|
|
|
|
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
|
|
|
|
wm_prog_data->base.dispatch_grf_start_reg;
|
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
|
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
|
|
|
|
wm_prog_data->dispatch_grf_start_reg_2;
|
|
|
|
|
|
|
|
|
|
ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
|
|
|
|
|
ps.ScratchSpaceBasePointer =
|
|
|
|
|
get_scratch_address(pipeline, MESA_SHADER_FRAGMENT, fs_bin);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if GEN_GEN >= 8
|
|
|
|
|
static void
|
2016-11-16 10:39:15 -08:00
|
|
|
emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
|
|
|
|
|
struct anv_subpass *subpass)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
|
|
|
|
|
ps.PixelShaderValid = true;
|
|
|
|
|
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
|
|
|
|
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
|
|
|
|
ps.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
|
|
|
|
|
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
|
|
|
|
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
|
|
|
|
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
|
|
|
|
|
2016-11-16 10:39:15 -08:00
|
|
|
/* If the subpass has a depth or stencil self-dependency, then we need
|
|
|
|
|
* to force the hardware to do the depth/stencil write *after* fragment
|
|
|
|
|
* shader execution. Otherwise, the writes may hit memory before we get
|
|
|
|
|
* around to fetching from the input attachment and we may get the depth
|
|
|
|
|
* or stencil value from the current draw rather than the previous one.
|
|
|
|
|
*/
|
|
|
|
|
ps.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
|
|
|
|
|
wm_prog_data->uses_kill;
|
|
|
|
|
|
2017-01-16 11:51:50 +01:00
|
|
|
/* The stricter cross-primitive coherency guarantees that the hardware
|
|
|
|
|
* gives us with the "Accesses UAV" bit set for at least one shader stage
|
|
|
|
|
* and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
|
|
|
|
|
* redundant within the current image, atomic counter and SSBO GL APIs,
|
|
|
|
|
* which all have very loose ordering and coherency requirements and
|
|
|
|
|
* generally rely on the application to insert explicit barriers when a
|
|
|
|
|
* shader invocation is expected to see the memory writes performed by the
|
|
|
|
|
* invocations of some previous primitive. Regardless of the value of
|
|
|
|
|
* "UAV coherency required", the "Accesses UAV" bits will implicitly cause
|
|
|
|
|
* an in most cases useless DC flush when the lowermost stage with the bit
|
|
|
|
|
* set finishes execution.
|
|
|
|
|
*
|
|
|
|
|
* It would be nice to disable it, but in some cases we can't because on
|
|
|
|
|
* Gen8+ it also has an influence on rasterization via the PS UAV-only
|
|
|
|
|
* signal (which could be set independently from the coherency mechanism
|
|
|
|
|
* in the 3DSTATE_WM command on Gen7), and because in some cases it will
|
|
|
|
|
* determine whether the hardware skips execution of the fragment shader
|
|
|
|
|
* or not via the ThreadDispatchEnable signal. However if we know that
|
|
|
|
|
* GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
|
|
|
|
|
* GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
|
|
|
|
|
* difference so we may just disable it here.
|
|
|
|
|
*
|
|
|
|
|
* Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
|
|
|
|
|
* take into account KillPixels when no depth or stencil writes are
|
|
|
|
|
* enabled. In order for occlusion queries to work correctly with no
|
|
|
|
|
* attachments, we need to force-enable here.
|
|
|
|
|
*/
|
|
|
|
|
if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
|
|
|
|
|
!has_color_buffer_write_enabled(pipeline))
|
|
|
|
|
ps.PixelShaderHasUAV = true;
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
#if GEN_GEN >= 9
|
|
|
|
|
ps.PixelShaderPullsBary = wm_prog_data->pulls_bary;
|
|
|
|
|
ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ?
|
|
|
|
|
ICMS_INNER_CONSERVATIVE : ICMS_NONE;
|
|
|
|
|
#else
|
|
|
|
|
ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_3dstate_vf_topology(struct anv_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
|
|
|
|
|
vft.PrimitiveTopologyType = pipeline->topology;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2016-11-12 11:39:07 -08:00
|
|
|
|
2017-03-16 14:12:03 -07:00
|
|
|
static void
|
|
|
|
|
emit_3dstate_vf_statistics(struct anv_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
|
|
|
|
|
vfs.StatisticsEnable = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-06 17:52:14 -08:00
|
|
|
static void
|
|
|
|
|
compute_kill_pixel(struct anv_pipeline *pipeline,
|
|
|
|
|
const VkPipelineMultisampleStateCreateInfo *ms_info,
|
|
|
|
|
const struct anv_subpass *subpass)
|
|
|
|
|
{
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
|
|
|
|
pipeline->kill_pixel = false;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
/* This computes the KillPixel portion of the computation for whether or
|
2017-02-01 16:41:04 -08:00
|
|
|
* not we want to enable the PMA fix on gen8 or gen9. It's given by this
|
|
|
|
|
* chunk of the giant formula:
|
2016-12-06 17:52:14 -08:00
|
|
|
*
|
|
|
|
|
* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
|
|
|
|
|
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
|
|
|
|
|
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
|
|
|
|
|
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
|
|
|
|
|
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
|
|
|
|
|
*
|
|
|
|
|
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is
|
|
|
|
|
* 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept
|
|
|
|
|
* of an alpha test.
|
|
|
|
|
*/
|
|
|
|
|
pipeline->kill_pixel =
|
|
|
|
|
subpass->has_ds_self_dep || wm_prog_data->uses_kill ||
|
|
|
|
|
wm_prog_data->uses_omask ||
|
|
|
|
|
(ms_info && ms_info->alphaToCoverageEnable);
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-12 11:39:07 -08:00
|
|
|
static VkResult
|
|
|
|
|
genX(graphics_pipeline_create)(
|
|
|
|
|
VkDevice _device,
|
|
|
|
|
struct anv_pipeline_cache * cache,
|
|
|
|
|
const VkGraphicsPipelineCreateInfo* pCreateInfo,
|
|
|
|
|
const VkAllocationCallbacks* pAllocator,
|
|
|
|
|
VkPipeline* pPipeline)
|
|
|
|
|
{
|
|
|
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
|
|
|
ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
|
|
|
|
|
struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
|
|
|
|
|
struct anv_pipeline *pipeline;
|
|
|
|
|
VkResult result;
|
|
|
|
|
|
|
|
|
|
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
|
|
|
|
|
|
|
|
|
|
pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
|
|
|
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
|
if (pipeline == NULL)
|
|
|
|
|
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
|
|
|
|
|
|
result = anv_pipeline_init(pipeline, device, cache,
|
|
|
|
|
pCreateInfo, pAllocator);
|
|
|
|
|
if (result != VK_SUCCESS) {
|
|
|
|
|
vk_free2(&device->alloc, pAllocator, pipeline);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(pCreateInfo->pVertexInputState);
|
|
|
|
|
emit_vertex_input(pipeline, pCreateInfo->pVertexInputState);
|
|
|
|
|
assert(pCreateInfo->pRasterizationState);
|
|
|
|
|
emit_rs_state(pipeline, pCreateInfo->pRasterizationState,
|
|
|
|
|
pCreateInfo->pMultisampleState, pass, subpass);
|
|
|
|
|
emit_ms_state(pipeline, pCreateInfo->pMultisampleState);
|
|
|
|
|
emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass, subpass);
|
|
|
|
|
emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
|
|
|
|
|
pCreateInfo->pMultisampleState);
|
2016-12-06 17:52:14 -08:00
|
|
|
compute_kill_pixel(pipeline, pCreateInfo->pMultisampleState, subpass);
|
2016-11-12 11:39:07 -08:00
|
|
|
|
|
|
|
|
emit_urb_setup(pipeline);
|
|
|
|
|
|
|
|
|
|
emit_3dstate_clip(pipeline, pCreateInfo->pViewportState,
|
|
|
|
|
pCreateInfo->pRasterizationState);
|
|
|
|
|
emit_3dstate_streamout(pipeline, pCreateInfo->pRasterizationState);
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
/* From gen7_vs_state.c */
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
|
|
|
|
|
* Geometry > Geometry Shader > State:
|
|
|
|
|
*
|
|
|
|
|
* "Note: Because of corruption in IVB:GT2, software needs to flush the
|
|
|
|
|
* whole fixed function pipeline when the GS enable changes value in
|
|
|
|
|
* the 3DSTATE_GS."
|
|
|
|
|
*
|
|
|
|
|
* The hardware architects have clarified that in this context "flush the
|
|
|
|
|
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
|
|
|
|
|
* Stall" bit set.
|
|
|
|
|
*/
|
|
|
|
|
if (!brw->is_haswell && !brw->is_baytrail)
|
|
|
|
|
gen7_emit_vs_workaround_flush(brw);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
emit_3dstate_vs(pipeline);
|
2016-11-21 00:45:57 -08:00
|
|
|
emit_3dstate_hs_te_ds(pipeline);
|
2016-11-12 11:39:07 -08:00
|
|
|
emit_3dstate_gs(pipeline);
|
|
|
|
|
emit_3dstate_sbe(pipeline);
|
2016-11-16 10:39:15 -08:00
|
|
|
emit_3dstate_wm(pipeline, subpass, pCreateInfo->pMultisampleState);
|
2016-11-23 21:43:38 -05:00
|
|
|
emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState);
|
2016-11-12 11:39:07 -08:00
|
|
|
#if GEN_GEN >= 8
|
2016-11-16 10:39:15 -08:00
|
|
|
emit_3dstate_ps_extra(pipeline, subpass);
|
2016-11-12 11:39:07 -08:00
|
|
|
emit_3dstate_vf_topology(pipeline);
|
|
|
|
|
#endif
|
2017-03-16 14:12:03 -07:00
|
|
|
emit_3dstate_vf_statistics(pipeline);
|
2016-11-12 11:39:07 -08:00
|
|
|
|
|
|
|
|
*pPipeline = anv_pipeline_to_handle(pipeline);
|
|
|
|
|
|
2017-03-09 14:37:26 +01:00
|
|
|
return pipeline->batch.status;
|
2016-11-12 11:39:07 -08:00
|
|
|
}
|
|
|
|
|
|
2016-10-07 21:55:34 -07:00
|
|
|
static VkResult
|
|
|
|
|
compute_pipeline_create(
|
2016-01-22 15:59:02 -08:00
|
|
|
VkDevice _device,
|
|
|
|
|
struct anv_pipeline_cache * cache,
|
|
|
|
|
const VkComputePipelineCreateInfo* pCreateInfo,
|
|
|
|
|
const VkAllocationCallbacks* pAllocator,
|
|
|
|
|
VkPipeline* pPipeline)
|
|
|
|
|
{
|
|
|
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
2016-09-23 01:04:25 +03:00
|
|
|
const struct anv_physical_device *physical_device =
|
2016-09-07 17:19:35 +01:00
|
|
|
&device->instance->physicalDevice;
|
2016-09-23 01:04:25 +03:00
|
|
|
const struct gen_device_info *devinfo = &physical_device->info;
|
2016-01-22 15:59:02 -08:00
|
|
|
struct anv_pipeline *pipeline;
|
|
|
|
|
VkResult result;
|
|
|
|
|
|
|
|
|
|
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
|
|
|
|
|
|
2016-10-14 13:31:35 +10:00
|
|
|
pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
|
2016-01-22 15:59:02 -08:00
|
|
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
|
|
|
|
if (pipeline == NULL)
|
|
|
|
|
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
|
|
|
|
|
|
pipeline->device = device;
|
|
|
|
|
pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
|
|
|
|
|
|
|
|
|
|
pipeline->blend_state.map = NULL;
|
|
|
|
|
|
|
|
|
|
result = anv_reloc_list_init(&pipeline->batch_relocs,
|
|
|
|
|
pAllocator ? pAllocator : &device->alloc);
|
|
|
|
|
if (result != VK_SUCCESS) {
|
2016-10-14 13:31:35 +10:00
|
|
|
vk_free2(&device->alloc, pAllocator, pipeline);
|
2016-01-22 15:59:02 -08:00
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
|
|
|
|
|
pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
|
|
|
|
|
pipeline->batch.relocs = &pipeline->batch_relocs;
|
2017-03-09 14:37:26 +01:00
|
|
|
pipeline->batch.status = VK_SUCCESS;
|
2016-01-22 15:59:02 -08:00
|
|
|
|
|
|
|
|
/* When we free the pipeline, we detect stages based on the NULL status
|
|
|
|
|
* of various prog_data pointers. Make them NULL by default.
|
|
|
|
|
*/
|
2016-08-25 01:49:49 -07:00
|
|
|
memset(pipeline->shaders, 0, sizeof(pipeline->shaders));
|
2016-01-22 15:59:02 -08:00
|
|
|
|
|
|
|
|
pipeline->active_stages = 0;
|
|
|
|
|
|
2016-04-02 13:44:55 -07:00
|
|
|
pipeline->needs_data_cache = false;
|
|
|
|
|
|
2016-01-22 15:59:02 -08:00
|
|
|
assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
|
|
|
|
|
ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->stage.module);
|
2016-08-29 08:30:37 -07:00
|
|
|
result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
|
|
|
|
|
pCreateInfo->stage.pName,
|
|
|
|
|
pCreateInfo->stage.pSpecializationInfo);
|
|
|
|
|
if (result != VK_SUCCESS) {
|
2016-10-14 13:31:35 +10:00
|
|
|
vk_free2(&device->alloc, pAllocator, pipeline);
|
2016-08-29 08:30:37 -07:00
|
|
|
return result;
|
|
|
|
|
}
|
2016-01-22 15:59:02 -08:00
|
|
|
|
2016-03-04 08:15:16 -08:00
|
|
|
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
2016-01-30 00:25:16 -08:00
|
|
|
|
2016-08-22 16:56:48 -07:00
|
|
|
anv_pipeline_setup_l3_config(pipeline, cs_prog_data->base.total_shared > 0);
|
|
|
|
|
|
2016-01-30 00:25:16 -08:00
|
|
|
uint32_t group_size = cs_prog_data->local_size[0] *
|
|
|
|
|
cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
|
|
|
|
|
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
|
|
|
|
|
|
|
|
|
|
if (remainder > 0)
|
|
|
|
|
pipeline->cs_right_mask = ~0u >> (32 - remainder);
|
|
|
|
|
else
|
|
|
|
|
pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
|
|
|
|
|
|
|
|
|
|
const uint32_t vfe_curbe_allocation =
|
2016-05-27 00:53:27 -07:00
|
|
|
ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
|
|
|
|
|
cs_prog_data->push.cross_thread.regs, 2);
|
2016-01-22 15:59:02 -08:00
|
|
|
|
2016-09-07 17:28:44 +01:00
|
|
|
const uint32_t subslices = MAX2(physical_device->subslice_total, 1);
|
|
|
|
|
|
2016-11-12 13:33:16 -08:00
|
|
|
const struct anv_shader_bin *cs_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_COMPUTE];
|
|
|
|
|
|
2016-04-18 17:03:00 -07:00
|
|
|
anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) {
|
2016-02-20 09:08:27 -08:00
|
|
|
#if GEN_GEN > 7
|
2016-04-18 15:29:42 -07:00
|
|
|
vfe.StackSize = 0;
|
2016-01-30 00:31:58 -08:00
|
|
|
#else
|
2016-04-18 15:29:42 -07:00
|
|
|
vfe.GPGPUMode = true;
|
2016-01-22 15:59:02 -08:00
|
|
|
#endif
|
2016-09-07 17:28:44 +01:00
|
|
|
vfe.MaximumNumberofThreads =
|
2016-09-23 01:04:25 +03:00
|
|
|
devinfo->max_cs_threads * subslices - 1;
|
2016-04-18 15:29:42 -07:00
|
|
|
vfe.NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2;
|
|
|
|
|
vfe.ResetGatewayTimer = true;
|
2016-02-20 09:08:27 -08:00
|
|
|
#if GEN_GEN <= 8
|
2016-04-18 15:29:42 -07:00
|
|
|
vfe.BypassGatewayControl = true;
|
2016-01-22 15:59:02 -08:00
|
|
|
#endif
|
2016-04-18 15:29:42 -07:00
|
|
|
vfe.URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2;
|
|
|
|
|
vfe.CURBEAllocationSize = vfe_curbe_allocation;
|
2016-11-12 13:33:16 -08:00
|
|
|
|
|
|
|
|
vfe.PerThreadScratchSpace = get_scratch_space(cs_bin);
|
|
|
|
|
vfe.ScratchSpaceBasePointer =
|
|
|
|
|
get_scratch_address(pipeline, MESA_SHADER_COMPUTE, cs_bin);
|
2016-04-18 15:29:42 -07:00
|
|
|
}
|
2016-01-22 15:59:02 -08:00
|
|
|
|
2016-11-12 12:13:17 -08:00
|
|
|
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
|
|
|
|
|
.KernelStartPointer = cs_bin->kernel.offset,
|
|
|
|
|
|
|
|
|
|
.SamplerCount = get_sampler_count(cs_bin),
|
|
|
|
|
.BindingTableEntryCount = get_binding_table_entry_count(cs_bin),
|
|
|
|
|
.BarrierEnable = cs_prog_data->uses_barrier,
|
|
|
|
|
.SharedLocalMemorySize =
|
|
|
|
|
encode_slm_size(GEN_GEN, cs_prog_data->base.total_shared),
|
|
|
|
|
|
|
|
|
|
#if !GEN_IS_HASWELL
|
|
|
|
|
.ConstantURBEntryReadOffset = 0,
|
|
|
|
|
#endif
|
|
|
|
|
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
|
|
|
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
|
|
|
|
.CrossThreadConstantDataReadLength =
|
|
|
|
|
cs_prog_data->push.cross_thread.regs,
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
|
|
|
|
|
};
|
|
|
|
|
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
|
|
|
|
|
pipeline->interface_descriptor_data,
|
|
|
|
|
&desc);
|
|
|
|
|
|
2016-01-22 15:59:02 -08:00
|
|
|
*pPipeline = anv_pipeline_to_handle(pipeline);
|
|
|
|
|
|
2017-03-09 14:37:26 +01:00
|
|
|
return pipeline->batch.status;
|
2016-01-22 15:59:02 -08:00
|
|
|
}
|
2016-10-07 21:55:34 -07:00
|
|
|
|
|
|
|
|
VkResult genX(CreateGraphicsPipelines)(
|
|
|
|
|
VkDevice _device,
|
|
|
|
|
VkPipelineCache pipelineCache,
|
|
|
|
|
uint32_t count,
|
|
|
|
|
const VkGraphicsPipelineCreateInfo* pCreateInfos,
|
|
|
|
|
const VkAllocationCallbacks* pAllocator,
|
|
|
|
|
VkPipeline* pPipelines)
|
|
|
|
|
{
|
|
|
|
|
ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache);
|
|
|
|
|
|
|
|
|
|
VkResult result = VK_SUCCESS;
|
|
|
|
|
|
2016-12-23 17:47:40 +00:00
|
|
|
unsigned i;
|
|
|
|
|
for (i = 0; i < count; i++) {
|
2016-10-07 21:55:34 -07:00
|
|
|
result = genX(graphics_pipeline_create)(_device,
|
|
|
|
|
pipeline_cache,
|
|
|
|
|
&pCreateInfos[i],
|
|
|
|
|
pAllocator, &pPipelines[i]);
|
|
|
|
|
|
2016-12-23 17:47:40 +00:00
|
|
|
/* Bail out on the first error as it is not obvious what error should be
|
|
|
|
|
* report upon 2 different failures. */
|
|
|
|
|
if (result != VK_SUCCESS)
|
|
|
|
|
break;
|
2016-10-07 21:55:34 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-23 17:47:40 +00:00
|
|
|
for (; i < count; i++)
|
|
|
|
|
pPipelines[i] = VK_NULL_HANDLE;
|
|
|
|
|
|
|
|
|
|
return result;
|
2016-10-07 21:55:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VkResult genX(CreateComputePipelines)(
|
|
|
|
|
VkDevice _device,
|
|
|
|
|
VkPipelineCache pipelineCache,
|
|
|
|
|
uint32_t count,
|
|
|
|
|
const VkComputePipelineCreateInfo* pCreateInfos,
|
|
|
|
|
const VkAllocationCallbacks* pAllocator,
|
|
|
|
|
VkPipeline* pPipelines)
|
|
|
|
|
{
|
|
|
|
|
ANV_FROM_HANDLE(anv_pipeline_cache, pipeline_cache, pipelineCache);
|
|
|
|
|
|
|
|
|
|
VkResult result = VK_SUCCESS;
|
|
|
|
|
|
2016-12-23 17:47:40 +00:00
|
|
|
unsigned i;
|
|
|
|
|
for (i = 0; i < count; i++) {
|
2016-10-07 21:55:34 -07:00
|
|
|
result = compute_pipeline_create(_device, pipeline_cache,
|
|
|
|
|
&pCreateInfos[i],
|
|
|
|
|
pAllocator, &pPipelines[i]);
|
|
|
|
|
|
2016-12-23 17:47:40 +00:00
|
|
|
/* Bail out on the first error as it is not obvious what error should be
|
|
|
|
|
* report upon 2 different failures. */
|
|
|
|
|
if (result != VK_SUCCESS)
|
|
|
|
|
break;
|
2016-10-07 21:55:34 -07:00
|
|
|
}
|
|
|
|
|
|
2016-12-23 17:47:40 +00:00
|
|
|
for (; i < count; i++)
|
|
|
|
|
pPipelines[i] = VK_NULL_HANDLE;
|
|
|
|
|
|
|
|
|
|
return result;
|
2016-10-07 21:55:34 -07:00
|
|
|
}
|