2016-01-22 15:59:02 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2015 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "anv_private.h"
|
|
|
|
|
|
2016-02-20 09:08:27 -08:00
|
|
|
#include "genxml/gen_macros.h"
|
|
|
|
|
#include "genxml/genX_pack.h"
|
2022-12-07 12:49:21 -08:00
|
|
|
#include "genxml/genX_rt_pack.h"
|
2016-01-22 15:59:02 -08:00
|
|
|
|
2022-12-08 16:31:20 -08:00
|
|
|
#include "common/intel_genX_state.h"
|
2021-03-03 13:20:06 -08:00
|
|
|
#include "common/intel_l3_config.h"
|
|
|
|
|
#include "common/intel_sample_positions.h"
|
2018-09-10 16:17:37 -05:00
|
|
|
#include "nir/nir_xfb_info.h"
|
2017-05-24 11:38:06 -07:00
|
|
|
#include "vk_util.h"
|
2021-02-04 00:58:26 -06:00
|
|
|
#include "vk_format.h"
|
2021-05-13 21:39:09 +03:00
|
|
|
#include "vk_log.h"
|
2022-02-11 11:40:34 -06:00
|
|
|
#include "vk_render_pass.h"
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
|
vertex_element_comp_control(enum isl_format format, unsigned comp)
|
|
|
|
|
{
|
|
|
|
|
uint8_t bits;
|
|
|
|
|
switch (comp) {
|
|
|
|
|
case 0: bits = isl_format_layouts[format].channels.r.bits; break;
|
|
|
|
|
case 1: bits = isl_format_layouts[format].channels.g.bits; break;
|
|
|
|
|
case 2: bits = isl_format_layouts[format].channels.b.bits; break;
|
|
|
|
|
case 3: bits = isl_format_layouts[format].channels.a.bits; break;
|
|
|
|
|
default: unreachable("Invalid component");
|
|
|
|
|
}
|
|
|
|
|
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
/*
|
|
|
|
|
* Take in account hardware restrictions when dealing with 64-bit floats.
|
|
|
|
|
*
|
|
|
|
|
* From Broadwell spec, command reference structures, page 586:
|
|
|
|
|
* "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
|
|
|
|
|
* 64-bit components are stored * in the URB without any conversion. In
|
|
|
|
|
* this case, vertex elements must be written as 128 or 256 bits, with
|
|
|
|
|
* VFCOMP_STORE_0 being used to pad the output as required. E.g., if
|
|
|
|
|
* R64_PASSTHRU is used to copy a 64-bit Red component into the URB,
|
|
|
|
|
* Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3
|
|
|
|
|
* set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or
|
|
|
|
|
* Components 1-3 must be specified as VFCOMP_STORE_0 in order to output
|
|
|
|
|
* a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires
|
|
|
|
|
* Component 3 to be specified as VFCOMP_STORE_0 in order to output a
|
|
|
|
|
* 256-bit vertex element."
|
|
|
|
|
*/
|
2016-11-12 11:42:09 -08:00
|
|
|
if (bits) {
|
|
|
|
|
return VFCOMP_STORE_SRC;
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
} else if (comp >= 2 &&
|
|
|
|
|
!isl_format_layouts[format].channels.b.bits &&
|
|
|
|
|
isl_format_layouts[format].channels.r.type == ISL_RAW) {
|
|
|
|
|
/* When emitting 64-bit attributes, we need to write either 128 or 256
|
|
|
|
|
* bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and
|
|
|
|
|
* VFCOMP_STORE_0 to pad the written chunk */
|
|
|
|
|
return VFCOMP_NOSTORE;
|
|
|
|
|
} else if (comp < 3 ||
|
|
|
|
|
isl_format_layouts[format].channels.r.type == ISL_RAW) {
|
|
|
|
|
/* Note we need to pad with value 0, not 1, due hardware restrictions
|
|
|
|
|
* (see comment above) */
|
2016-11-12 11:42:09 -08:00
|
|
|
return VFCOMP_STORE_0;
|
|
|
|
|
} else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
|
|
|
|
|
isl_format_layouts[format].channels.r.type == ISL_SINT) {
|
|
|
|
|
assert(comp == 3);
|
|
|
|
|
return VFCOMP_STORE_1_INT;
|
|
|
|
|
} else {
|
|
|
|
|
assert(comp == 3);
|
|
|
|
|
return VFCOMP_STORE_1_FP;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-31 19:55:59 +01:00
|
|
|
void
|
|
|
|
|
genX(emit_vertex_input)(struct anv_batch *batch,
|
|
|
|
|
uint32_t *vertex_element_dws,
|
|
|
|
|
const struct anv_graphics_pipeline *pipeline,
|
|
|
|
|
const struct vk_vertex_input_state *vi)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
|
|
|
|
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
|
|
|
|
const uint64_t inputs_read = vs_prog_data->inputs_read;
|
2018-09-04 13:58:01 -05:00
|
|
|
const uint64_t double_inputs_read =
|
|
|
|
|
vs_prog_data->double_inputs_read & inputs_read;
|
2016-11-12 11:42:09 -08:00
|
|
|
assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
|
|
|
|
|
const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2023-01-31 19:55:59 +01:00
|
|
|
for (uint32_t i = 0; i < pipeline->vs_input_elements; i++) {
|
2018-08-25 17:08:04 -05:00
|
|
|
/* The SKL docs for VERTEX_ELEMENT_STATE say:
|
|
|
|
|
*
|
|
|
|
|
* "All elements must be valid from Element[0] to the last valid
|
|
|
|
|
* element. (I.e. if Element[2] is valid then Element[1] and
|
|
|
|
|
* Element[0] must also be valid)."
|
|
|
|
|
*
|
|
|
|
|
* The SKL docs for 3D_Vertex_Component_Control say:
|
|
|
|
|
*
|
|
|
|
|
* "Don't store this component. (Not valid for Component 0, but can
|
|
|
|
|
* be used for Component 1-3)."
|
|
|
|
|
*
|
|
|
|
|
* So we can't just leave a vertex element blank and hope for the best.
|
|
|
|
|
* We have to tell the VF hardware to put something in it; so we just
|
|
|
|
|
* store a bunch of zero.
|
|
|
|
|
*
|
|
|
|
|
* TODO: Compact vertex elements so we never end up with holes.
|
|
|
|
|
*/
|
|
|
|
|
struct GENX(VERTEX_ELEMENT_STATE) element = {
|
|
|
|
|
.Valid = true,
|
|
|
|
|
.Component0Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component1Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component2Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component3Control = VFCOMP_STORE_0,
|
|
|
|
|
};
|
2023-01-31 19:55:59 +01:00
|
|
|
GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
|
|
|
|
|
&vertex_element_dws[i * 2],
|
|
|
|
|
&element);
|
2018-08-25 17:08:04 -05:00
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2022-07-12 16:16:55 -05:00
|
|
|
u_foreach_bit(a, vi->attributes_valid) {
|
2022-08-04 12:56:17 -07:00
|
|
|
enum isl_format format = anv_get_isl_format(pipeline->base.device->info,
|
2022-07-12 16:16:55 -05:00
|
|
|
vi->attributes[a].format,
|
2016-11-12 11:42:09 -08:00
|
|
|
VK_IMAGE_ASPECT_COLOR_BIT,
|
|
|
|
|
VK_IMAGE_TILING_LINEAR);
|
|
|
|
|
|
2022-07-12 16:16:55 -05:00
|
|
|
uint32_t binding = vi->attributes[a].binding;
|
|
|
|
|
assert(binding < MAX_VBS);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2022-07-12 16:16:55 -05:00
|
|
|
if ((elements & (1 << a)) == 0)
|
2016-11-12 11:42:09 -08:00
|
|
|
continue; /* Binding unused */
|
|
|
|
|
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
uint32_t slot =
|
2022-07-12 16:16:55 -05:00
|
|
|
__builtin_popcount(elements & ((1 << a) - 1)) -
|
nir/i965: use two slots from inputs_read for dvec3/dvec4 vertex input attributes
So far, input_reads was a bitmap tracking which vertex input locations
were being used.
In OpenGL, an attribute bigger than a vec4 (like a dvec3 or dvec4)
consumes just one location, any other small attribute. So we mark the
proper bit in inputs_read, and also the same bit in double_inputs_read
if the attribute is a dvec3/dvec4.
But in Vulkan, this is slightly different: a dvec3/dvec4 attribute
consumes two locations, not just one. And hence two bits would be marked
in inputs_read for the same vertex input attribute.
To avoid handling two different situations in NIR, we just choose the
latest one: in OpenGL, when creating NIR from GLSL/IR, any dvec3/dvec4
vertex input attribute is marked with two bits in the inputs_read bitmap
(and also in the double_inputs_read), and following attributes are
adjusted accordingly.
As example, if in our GLSL/IR shader we have three attributes:
layout(location = 0) vec3 attr0;
layout(location = 1) dvec4 attr1;
layout(location = 2) dvec3 attr2;
then in our NIR shader we put attr0 in location 0, attr1 in locations 1
and 2, and attr2 in location 3 and 4.
Checking carefully, basically we are using slots rather than locations
in NIR.
When emitting the vertices, we do a inverse map to know the
corresponding location for each slot.
v2 (Jason):
- use two slots from inputs_read for dvec3/dvec4 NIR from GLSL/IR.
v3 (Jason):
- Fix commit log error.
- Use ladder ifs and fix braces.
- elements_double is divisible by 2, don't need DIV_ROUND_UP().
- Use if ladder instead of a switch.
- Add comment about hardware restriction in 64bit vertex attributes.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-12-16 10:24:43 +01:00
|
|
|
DIV_ROUND_UP(__builtin_popcount(elements_double &
|
2022-07-12 16:16:55 -05:00
|
|
|
((1 << a) -1)), 2);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
struct GENX(VERTEX_ELEMENT_STATE) element = {
|
2022-07-12 16:16:55 -05:00
|
|
|
.VertexBufferIndex = vi->attributes[a].binding,
|
2016-11-12 11:42:09 -08:00
|
|
|
.Valid = true,
|
2018-02-13 18:13:51 -08:00
|
|
|
.SourceElementFormat = format,
|
2016-11-12 11:42:09 -08:00
|
|
|
.EdgeFlagEnable = false,
|
2022-07-12 16:16:55 -05:00
|
|
|
.SourceElementOffset = vi->attributes[a].offset,
|
2016-11-12 11:42:09 -08:00
|
|
|
.Component0Control = vertex_element_comp_control(format, 0),
|
|
|
|
|
.Component1Control = vertex_element_comp_control(format, 1),
|
|
|
|
|
.Component2Control = vertex_element_comp_control(format, 2),
|
|
|
|
|
.Component3Control = vertex_element_comp_control(format, 3),
|
|
|
|
|
};
|
2023-01-31 19:55:59 +01:00
|
|
|
GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
|
|
|
|
|
&vertex_element_dws[slot * 2],
|
|
|
|
|
&element);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
/* On Broadwell and later, we have a separate VF_INSTANCING packet
|
|
|
|
|
* that controls instancing. On Haswell and prior, that's part of
|
|
|
|
|
* VERTEX_BUFFER_STATE which we emit later.
|
|
|
|
|
*/
|
2023-01-31 19:55:59 +01:00
|
|
|
anv_batch_emit(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
2023-01-31 20:09:36 +01:00
|
|
|
bool per_instance = vi->bindings[binding].input_rate ==
|
|
|
|
|
VK_VERTEX_INPUT_RATE_INSTANCE;
|
|
|
|
|
uint32_t divisor = vi->bindings[binding].divisor *
|
2022-07-07 11:43:12 -05:00
|
|
|
pipeline->instance_multiplier;
|
|
|
|
|
|
|
|
|
|
vfi.InstancingEnable = per_instance;
|
2016-11-12 11:42:09 -08:00
|
|
|
vfi.VertexElementIndex = slot;
|
2022-07-07 11:43:12 -05:00
|
|
|
vfi.InstanceDataStepRate = per_instance ? divisor : 1;
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
}
|
2023-01-31 19:55:59 +01:00
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2023-01-31 19:55:59 +01:00
|
|
|
static void
|
|
|
|
|
emit_vertex_input(struct anv_graphics_pipeline *pipeline,
|
2023-01-31 22:15:11 +01:00
|
|
|
const struct vk_graphics_pipeline_state *state,
|
2023-01-31 19:55:59 +01:00
|
|
|
const struct vk_vertex_input_state *vi)
|
|
|
|
|
{
|
2023-01-31 22:15:11 +01:00
|
|
|
/* Only pack the VERTEX_ELEMENT_STATE if not dynamic so we can just memcpy
|
|
|
|
|
* everything in gfx8_cmd_buffer.c
|
|
|
|
|
*/
|
|
|
|
|
if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI)) {
|
|
|
|
|
genX(emit_vertex_input)(&pipeline->base.batch,
|
|
|
|
|
pipeline->vertex_input_data,
|
|
|
|
|
pipeline, vi);
|
|
|
|
|
}
|
2023-01-31 19:55:59 +01:00
|
|
|
|
|
|
|
|
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
|
|
|
|
const bool needs_svgs_elem = pipeline->svgs_count > 1 ||
|
|
|
|
|
!vs_prog_data->uses_drawid;
|
|
|
|
|
const uint32_t id_slot = pipeline->vs_input_elements;
|
|
|
|
|
const uint32_t drawid_slot = id_slot + needs_svgs_elem;
|
|
|
|
|
if (pipeline->svgs_count > 0) {
|
2023-01-31 22:15:11 +01:00
|
|
|
assert(pipeline->vertex_input_elems >= pipeline->svgs_count);
|
|
|
|
|
uint32_t slot_offset =
|
|
|
|
|
pipeline->vertex_input_elems - pipeline->svgs_count;
|
2023-01-31 19:55:59 +01:00
|
|
|
if (needs_svgs_elem) {
|
2019-12-14 18:00:56 -06:00
|
|
|
#if GFX_VER < 11
|
2023-01-31 19:55:59 +01:00
|
|
|
/* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
|
|
|
|
|
* "Within a VERTEX_ELEMENT_STATE structure, if a Component
|
|
|
|
|
* Control field is set to something other than VFCOMP_STORE_SRC,
|
|
|
|
|
* no higher-numbered Component Control fields may be set to
|
|
|
|
|
* VFCOMP_STORE_SRC"
|
|
|
|
|
*
|
|
|
|
|
* This means, that if we have BaseInstance, we need BaseVertex as
|
|
|
|
|
* well. Just do all or nothing.
|
|
|
|
|
*/
|
|
|
|
|
uint32_t base_ctrl = (vs_prog_data->uses_firstvertex ||
|
|
|
|
|
vs_prog_data->uses_baseinstance) ?
|
|
|
|
|
VFCOMP_STORE_SRC : VFCOMP_STORE_0;
|
2019-12-14 18:00:56 -06:00
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2023-01-31 19:55:59 +01:00
|
|
|
struct GENX(VERTEX_ELEMENT_STATE) element = {
|
|
|
|
|
.VertexBufferIndex = ANV_SVGS_VB_INDEX,
|
|
|
|
|
.Valid = true,
|
|
|
|
|
.SourceElementFormat = ISL_FORMAT_R32G32_UINT,
|
2019-12-14 18:00:56 -06:00
|
|
|
#if GFX_VER >= 11
|
2023-01-31 19:55:59 +01:00
|
|
|
/* On gen11, these are taken care of by extra parameter slots */
|
|
|
|
|
.Component0Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component1Control = VFCOMP_STORE_0,
|
2019-12-14 18:00:56 -06:00
|
|
|
#else
|
2023-01-31 19:55:59 +01:00
|
|
|
.Component0Control = base_ctrl,
|
|
|
|
|
.Component1Control = base_ctrl,
|
2019-12-14 18:00:56 -06:00
|
|
|
#endif
|
2023-01-31 19:55:59 +01:00
|
|
|
.Component2Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component3Control = VFCOMP_STORE_0,
|
|
|
|
|
};
|
|
|
|
|
GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
|
2023-01-31 22:15:11 +01:00
|
|
|
&pipeline->vertex_input_data[slot_offset * 2],
|
2023-01-31 19:55:59 +01:00
|
|
|
&element);
|
2023-01-31 22:15:11 +01:00
|
|
|
slot_offset++;
|
2020-05-12 22:31:54 -05:00
|
|
|
|
2023-01-31 19:55:59 +01:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
|
|
|
|
vfi.VertexElementIndex = id_slot;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (vs_prog_data->uses_drawid) {
|
|
|
|
|
struct GENX(VERTEX_ELEMENT_STATE) element = {
|
|
|
|
|
.VertexBufferIndex = ANV_DRAWID_VB_INDEX,
|
|
|
|
|
.Valid = true,
|
|
|
|
|
.SourceElementFormat = ISL_FORMAT_R32_UINT,
|
|
|
|
|
#if GFX_VER >= 11
|
|
|
|
|
/* On gen11, this is taken care of by extra parameter slots */
|
|
|
|
|
.Component0Control = VFCOMP_STORE_0,
|
|
|
|
|
#else
|
|
|
|
|
.Component0Control = VFCOMP_STORE_SRC,
|
|
|
|
|
#endif
|
|
|
|
|
.Component1Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component2Control = VFCOMP_STORE_0,
|
|
|
|
|
.Component3Control = VFCOMP_STORE_0,
|
|
|
|
|
};
|
|
|
|
|
GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
|
2023-01-31 22:15:11 +01:00
|
|
|
&pipeline->vertex_input_data[slot_offset * 2],
|
2023-01-31 19:55:59 +01:00
|
|
|
&element);
|
2023-01-31 22:15:11 +01:00
|
|
|
slot_offset++;
|
2023-01-31 19:55:59 +01:00
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
|
|
|
|
vfi.VertexElementIndex = drawid_slot;
|
|
|
|
|
}
|
2020-05-12 22:31:54 -05:00
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_SGVS), sgvs) {
|
2016-11-12 11:42:09 -08:00
|
|
|
sgvs.VertexIDEnable = vs_prog_data->uses_vertexid;
|
|
|
|
|
sgvs.VertexIDComponentNumber = 2;
|
|
|
|
|
sgvs.VertexIDElementOffset = id_slot;
|
|
|
|
|
sgvs.InstanceIDEnable = vs_prog_data->uses_instanceid;
|
|
|
|
|
sgvs.InstanceIDComponentNumber = 3;
|
|
|
|
|
sgvs.InstanceIDElementOffset = id_slot;
|
|
|
|
|
}
|
2017-01-29 03:15:03 +00:00
|
|
|
|
2019-12-14 18:00:56 -06:00
|
|
|
#if GFX_VER >= 11
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_SGVS_2), sgvs) {
|
|
|
|
|
/* gl_BaseVertex */
|
|
|
|
|
sgvs.XP0Enable = vs_prog_data->uses_firstvertex;
|
|
|
|
|
sgvs.XP0SourceSelect = XP0_PARAMETER;
|
|
|
|
|
sgvs.XP0ComponentNumber = 0;
|
|
|
|
|
sgvs.XP0ElementOffset = id_slot;
|
|
|
|
|
|
|
|
|
|
/* gl_BaseInstance */
|
|
|
|
|
sgvs.XP1Enable = vs_prog_data->uses_baseinstance;
|
|
|
|
|
sgvs.XP1SourceSelect = StartingInstanceLocation;
|
|
|
|
|
sgvs.XP1ComponentNumber = 1;
|
|
|
|
|
sgvs.XP1ElementOffset = id_slot;
|
|
|
|
|
|
|
|
|
|
/* gl_DrawID */
|
|
|
|
|
sgvs.XP2Enable = vs_prog_data->uses_drawid;
|
|
|
|
|
sgvs.XP2ComponentNumber = 0;
|
|
|
|
|
sgvs.XP2ElementOffset = drawid_slot;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
|
2021-03-03 13:49:18 -08:00
|
|
|
const struct intel_l3_config *l3_config,
|
2016-11-12 11:42:09 -08:00
|
|
|
VkShaderStageFlags active_stages,
|
2020-01-17 14:14:03 -06:00
|
|
|
const unsigned entry_size[4],
|
2021-03-03 13:49:18 -08:00
|
|
|
enum intel_urb_deref_block_size *deref_block_size)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
2022-08-04 12:56:17 -07:00
|
|
|
const struct intel_device_info *devinfo = device->info;
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2016-11-15 11:43:07 -08:00
|
|
|
unsigned entries[4];
|
|
|
|
|
unsigned start[4];
|
2021-01-25 21:41:48 -08:00
|
|
|
bool constrained;
|
2021-03-03 13:49:18 -08:00
|
|
|
intel_get_urb_config(devinfo, l3_config,
|
2021-03-09 09:44:02 -08:00
|
|
|
active_stages &
|
|
|
|
|
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
|
|
|
|
|
active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
|
|
|
|
|
entry_size, entries, start, deref_block_size,
|
|
|
|
|
&constrained);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2016-11-15 11:43:07 -08:00
|
|
|
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
|
|
|
|
anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
|
|
|
|
|
urb._3DCommandSubOpcode += i;
|
|
|
|
|
urb.VSURBStartingAddress = start[i];
|
|
|
|
|
urb.VSURBEntryAllocationSize = entry_size[i] - 1;
|
|
|
|
|
urb.VSNumberofURBEntries = entries[i];
|
|
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
2021-05-20 12:07:34 -07:00
|
|
|
#if GFX_VERx10 >= 125
|
2022-05-08 02:19:35 +02:00
|
|
|
if (device->physical->vk.supported_extensions.NV_mesh_shader ||
|
|
|
|
|
device->physical->vk.supported_extensions.EXT_mesh_shader) {
|
2021-05-20 12:07:34 -07:00
|
|
|
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_MESH), zero);
|
|
|
|
|
anv_batch_emit(batch, GENX(3DSTATE_URB_ALLOC_TASK), zero);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
static void
|
|
|
|
|
emit_urb_setup_mesh(struct anv_graphics_pipeline *pipeline,
|
|
|
|
|
enum intel_urb_deref_block_size *deref_block_size)
|
|
|
|
|
{
|
2022-08-04 12:56:17 -07:00
|
|
|
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
2021-05-20 12:07:34 -07:00
|
|
|
|
|
|
|
|
const struct brw_task_prog_data *task_prog_data =
|
|
|
|
|
anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK) ?
|
|
|
|
|
get_task_prog_data(pipeline) : NULL;
|
|
|
|
|
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
const struct intel_mesh_urb_allocation alloc =
|
|
|
|
|
intel_get_mesh_urb_config(devinfo, pipeline->base.l3_config,
|
|
|
|
|
task_prog_data ? task_prog_data->map.size_dw : 0,
|
|
|
|
|
mesh_prog_data->map.size_dw);
|
|
|
|
|
|
|
|
|
|
/* Zero out the primitive pipeline URB allocations. */
|
|
|
|
|
for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_VS), urb) {
|
|
|
|
|
urb._3DCommandSubOpcode += i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_TASK), urb) {
|
|
|
|
|
if (task_prog_data) {
|
|
|
|
|
urb.TASKURBEntryAllocationSize = alloc.task_entry_size_64b - 1;
|
|
|
|
|
urb.TASKNumberofURBEntriesSlice0 = alloc.task_entries;
|
|
|
|
|
urb.TASKNumberofURBEntriesSliceN = alloc.task_entries;
|
|
|
|
|
urb.TASKURBStartingAddressSlice0 = alloc.task_starting_address_8kb;
|
|
|
|
|
urb.TASKURBStartingAddressSliceN = alloc.task_starting_address_8kb;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_URB_ALLOC_MESH), urb) {
|
|
|
|
|
urb.MESHURBEntryAllocationSize = alloc.mesh_entry_size_64b - 1;
|
|
|
|
|
urb.MESHNumberofURBEntriesSlice0 = alloc.mesh_entries;
|
|
|
|
|
urb.MESHNumberofURBEntriesSliceN = alloc.mesh_entries;
|
|
|
|
|
urb.MESHURBStartingAddressSlice0 = alloc.mesh_starting_address_8kb;
|
|
|
|
|
urb.MESHURBStartingAddressSliceN = alloc.mesh_starting_address_8kb;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*deref_block_size = alloc.deref_block_size;
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
2021-05-20 12:07:34 -07:00
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2017-07-06 21:18:03 -07:00
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_urb_setup(struct anv_graphics_pipeline *pipeline,
|
2021-03-03 13:49:18 -08:00
|
|
|
enum intel_urb_deref_block_size *deref_block_size)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
2021-05-20 12:07:34 -07:00
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
if (anv_pipeline_is_mesh(pipeline)) {
|
|
|
|
|
emit_urb_setup_mesh(pipeline, deref_block_size);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2019-04-27 11:35:32 +08:00
|
|
|
unsigned entry_size[4];
|
2016-11-15 11:43:07 -08:00
|
|
|
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
|
|
|
|
|
const struct brw_vue_prog_data *prog_data =
|
|
|
|
|
!anv_pipeline_has_stage(pipeline, i) ? NULL :
|
|
|
|
|
(const struct brw_vue_prog_data *) pipeline->shaders[i]->prog_data;
|
|
|
|
|
|
2019-04-27 11:35:32 +08:00
|
|
|
entry_size[i] = prog_data ? prog_data->urb_entry_size : 1;
|
2016-11-15 11:43:07 -08:00
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
genX(emit_urb_setup)(pipeline->base.device, &pipeline->base.batch,
|
|
|
|
|
pipeline->base.l3_config,
|
2020-01-17 14:14:03 -06:00
|
|
|
pipeline->active_stages, entry_size,
|
|
|
|
|
deref_block_size);
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
|
2021-05-20 12:07:34 -07:00
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
if (anv_pipeline_is_mesh(pipeline))
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh);
|
2016-11-12 11:42:09 -08:00
|
|
|
#endif
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct GENX(3DSTATE_SBE) sbe = {
|
|
|
|
|
GENX(3DSTATE_SBE_header),
|
2021-05-20 12:07:34 -07:00
|
|
|
/* TODO(mesh): Figure out cases where we need attribute swizzling. See also
|
|
|
|
|
* calculate_urb_setup() and related functions.
|
|
|
|
|
*/
|
|
|
|
|
.AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline),
|
2016-11-12 11:42:09 -08:00
|
|
|
.PointSpriteTextureCoordinateOrigin = UPPERLEFT,
|
|
|
|
|
.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
|
|
|
|
|
.ConstantInterpolationEnable = wm_prog_data->flat_inputs,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 32; i++)
|
|
|
|
|
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
|
|
|
|
|
|
|
|
|
/* On Broadwell, they broke 3DSTATE_SBE into two packets */
|
|
|
|
|
struct GENX(3DSTATE_SBE_SWIZ) swiz = {
|
|
|
|
|
GENX(3DSTATE_SBE_SWIZ_header),
|
|
|
|
|
};
|
|
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
if (anv_pipeline_is_primitive(pipeline)) {
|
|
|
|
|
const struct brw_vue_map *fs_input_map =
|
|
|
|
|
&anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
int first_slot = brw_compute_first_urb_slot_required(wm_prog_data->inputs,
|
|
|
|
|
fs_input_map);
|
|
|
|
|
assert(first_slot % 2 == 0);
|
|
|
|
|
unsigned urb_entry_read_offset = first_slot / 2;
|
|
|
|
|
int max_source_attr = 0;
|
|
|
|
|
for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
|
|
|
|
|
uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
|
|
|
|
|
int input_index = wm_prog_data->urb_setup[attr];
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
assert(0 <= input_index);
|
2017-01-05 13:17:53 +01:00
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
/* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
|
|
|
|
|
* VUE header
|
|
|
|
|
*/
|
|
|
|
|
if (attr == VARYING_SLOT_VIEWPORT ||
|
|
|
|
|
attr == VARYING_SLOT_LAYER ||
|
|
|
|
|
attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (attr == VARYING_SLOT_PNTC) {
|
|
|
|
|
sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const int slot = fs_input_map->varying_to_slot[attr];
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
if (slot == -1) {
|
|
|
|
|
/* This attribute does not exist in the VUE--that means that the
|
|
|
|
|
* vertex shader did not write to it. It could be that it's a
|
|
|
|
|
* regular varying read by the fragment shader but not written by
|
|
|
|
|
* the vertex shader or it's gl_PrimitiveID. In the first case the
|
|
|
|
|
* value is undefined, in the second it needs to be
|
|
|
|
|
* gl_PrimitiveID.
|
|
|
|
|
*/
|
|
|
|
|
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
|
|
|
|
|
swiz.Attribute[input_index].ComponentOverrideX = true;
|
|
|
|
|
swiz.Attribute[input_index].ComponentOverrideY = true;
|
|
|
|
|
swiz.Attribute[input_index].ComponentOverrideZ = true;
|
|
|
|
|
swiz.Attribute[input_index].ComponentOverrideW = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2022-06-22 18:31:08 +02:00
|
|
|
/* We have to subtract two slots to account for the URB entry output
|
2021-07-08 14:47:08 -07:00
|
|
|
* read offset in the VS and GS stages.
|
2016-11-12 11:42:09 -08:00
|
|
|
*/
|
2021-07-08 14:47:08 -07:00
|
|
|
const int source_attr = slot - 2 * urb_entry_read_offset;
|
|
|
|
|
assert(source_attr >= 0 && source_attr < 32);
|
|
|
|
|
max_source_attr = MAX2(max_source_attr, source_attr);
|
|
|
|
|
/* The hardware can only do overrides on 16 overrides at a time, and the
|
|
|
|
|
* other up to 16 have to be lined up so that the input index = the
|
|
|
|
|
* output index. We'll need to do some tweaking to make sure that's the
|
|
|
|
|
* case.
|
|
|
|
|
*/
|
|
|
|
|
if (input_index < 16)
|
|
|
|
|
swiz.Attribute[input_index].SourceAttribute = source_attr;
|
|
|
|
|
else
|
|
|
|
|
assert(source_attr == input_index);
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
2020-03-31 10:59:20 +00:00
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
|
|
|
|
|
sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
|
|
|
|
|
sbe.ForceVertexURBEntryReadOffset = true;
|
|
|
|
|
sbe.ForceVertexURBEntryReadLength = true;
|
2021-05-20 12:07:34 -07:00
|
|
|
} else {
|
|
|
|
|
assert(anv_pipeline_is_mesh(pipeline));
|
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_MESH), sbe_mesh) {
|
|
|
|
|
const struct brw_mue_map *mue = &mesh_prog_data->map;
|
|
|
|
|
|
|
|
|
|
assert(mue->per_vertex_header_size_dw % 8 == 0);
|
|
|
|
|
sbe_mesh.PerVertexURBEntryOutputReadOffset = mue->per_vertex_header_size_dw / 8;
|
|
|
|
|
sbe_mesh.PerVertexURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_vertex_data_size_dw, 8);
|
|
|
|
|
|
2021-12-09 16:50:24 +01:00
|
|
|
/* Clip distance array is passed in the per-vertex header so that
|
|
|
|
|
* it can be consumed by the HW. If user wants to read it in the FS,
|
|
|
|
|
* adjust the offset and length to cover it. Conveniently it is at
|
|
|
|
|
* the end of the per-vertex header, right before per-vertex
|
|
|
|
|
* attributes.
|
|
|
|
|
*
|
|
|
|
|
* Note that FS attribute reading must be aware that the clip
|
|
|
|
|
* distances have fixed position.
|
|
|
|
|
*/
|
|
|
|
|
if (mue->per_vertex_header_size_dw > 8 &&
|
|
|
|
|
(wm_prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] >= 0 ||
|
|
|
|
|
wm_prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] >= 0)) {
|
|
|
|
|
sbe_mesh.PerVertexURBEntryOutputReadOffset -= 1;
|
|
|
|
|
sbe_mesh.PerVertexURBEntryOutputReadLength += 1;
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-20 12:07:34 -07:00
|
|
|
assert(mue->per_primitive_header_size_dw % 8 == 0);
|
|
|
|
|
sbe_mesh.PerPrimitiveURBEntryOutputReadOffset = mue->per_primitive_header_size_dw / 8;
|
|
|
|
|
sbe_mesh.PerPrimitiveURBEntryOutputReadLength = DIV_ROUND_UP(mue->per_primitive_data_size_dw, 8);
|
2021-12-09 17:10:55 +01:00
|
|
|
|
2022-04-12 15:07:20 +02:00
|
|
|
/* Just like with clip distances, if Primitive Shading Rate,
|
|
|
|
|
* Viewport Index or Layer is read back in the FS, adjust
|
|
|
|
|
* the offset and length to cover the Primitive Header, where
|
|
|
|
|
* PSR, Viewport Index & Layer are stored.
|
2021-12-09 17:10:55 +01:00
|
|
|
*/
|
|
|
|
|
if (wm_prog_data->urb_setup[VARYING_SLOT_VIEWPORT] >= 0 ||
|
2022-04-12 15:07:20 +02:00
|
|
|
wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_SHADING_RATE] >= 0 ||
|
2021-12-09 17:10:55 +01:00
|
|
|
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] >= 0) {
|
|
|
|
|
assert(sbe_mesh.PerPrimitiveURBEntryOutputReadOffset > 0);
|
|
|
|
|
sbe_mesh.PerPrimitiveURBEntryOutputReadOffset -= 1;
|
|
|
|
|
sbe_mesh.PerPrimitiveURBEntryOutputReadLength += 1;
|
|
|
|
|
}
|
2021-05-20 12:07:34 -07:00
|
|
|
}
|
2017-01-05 13:17:53 +01:00
|
|
|
#endif
|
2021-07-08 14:47:08 -07:00
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
uint32_t *dw = anv_batch_emit_dwords(&pipeline->base.batch,
|
2016-11-12 11:42:09 -08:00
|
|
|
GENX(3DSTATE_SBE_length));
|
2017-03-09 14:22:25 +01:00
|
|
|
if (!dw)
|
|
|
|
|
return;
|
2020-03-03 15:31:50 -08:00
|
|
|
GENX(3DSTATE_SBE_pack)(&pipeline->base.batch, dw, &sbe);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
dw = anv_batch_emit_dwords(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ_length));
|
2017-03-09 14:22:25 +01:00
|
|
|
if (!dw)
|
|
|
|
|
return;
|
2020-03-03 15:31:50 -08:00
|
|
|
GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->base.batch, dw, &swiz);
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
2019-06-19 17:07:43 -05:00
|
|
|
/** Returns the final polygon mode for rasterization
|
|
|
|
|
*
|
|
|
|
|
* This function takes into account polygon mode, primitive topology and the
|
|
|
|
|
* different shader stages which might generate their own type of primitives.
|
|
|
|
|
*/
|
2021-06-15 09:55:51 +03:00
|
|
|
VkPolygonMode
|
|
|
|
|
genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
|
2022-09-04 18:59:22 +03:00
|
|
|
VkPolygonMode polygon_mode,
|
2021-06-15 09:55:51 +03:00
|
|
|
VkPrimitiveTopology primitive_topology)
|
2019-06-19 17:07:43 -05:00
|
|
|
{
|
2021-05-20 12:07:34 -07:00
|
|
|
if (anv_pipeline_is_mesh(pipeline)) {
|
|
|
|
|
switch (get_mesh_prog_data(pipeline)->primitive_type) {
|
|
|
|
|
case SHADER_PRIM_POINTS:
|
|
|
|
|
return VK_POLYGON_MODE_POINT;
|
|
|
|
|
case SHADER_PRIM_LINES:
|
|
|
|
|
return VK_POLYGON_MODE_LINE;
|
|
|
|
|
case SHADER_PRIM_TRIANGLES:
|
2022-09-04 18:59:22 +03:00
|
|
|
return polygon_mode;
|
2021-05-20 12:07:34 -07:00
|
|
|
default:
|
|
|
|
|
unreachable("invalid primitive type for mesh");
|
|
|
|
|
}
|
|
|
|
|
} else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
|
2019-06-19 17:07:43 -05:00
|
|
|
switch (get_gs_prog_data(pipeline)->output_topology) {
|
|
|
|
|
case _3DPRIM_POINTLIST:
|
|
|
|
|
return VK_POLYGON_MODE_POINT;
|
|
|
|
|
|
|
|
|
|
case _3DPRIM_LINELIST:
|
|
|
|
|
case _3DPRIM_LINESTRIP:
|
|
|
|
|
case _3DPRIM_LINELOOP:
|
|
|
|
|
return VK_POLYGON_MODE_LINE;
|
|
|
|
|
|
|
|
|
|
case _3DPRIM_TRILIST:
|
|
|
|
|
case _3DPRIM_TRIFAN:
|
|
|
|
|
case _3DPRIM_TRISTRIP:
|
|
|
|
|
case _3DPRIM_RECTLIST:
|
|
|
|
|
case _3DPRIM_QUADLIST:
|
|
|
|
|
case _3DPRIM_QUADSTRIP:
|
|
|
|
|
case _3DPRIM_POLYGON:
|
2022-09-04 18:59:22 +03:00
|
|
|
return polygon_mode;
|
2019-06-19 17:07:43 -05:00
|
|
|
}
|
|
|
|
|
unreachable("Unsupported GS output topology");
|
|
|
|
|
} else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
|
|
|
|
|
switch (get_tes_prog_data(pipeline)->output_topology) {
|
|
|
|
|
case BRW_TESS_OUTPUT_TOPOLOGY_POINT:
|
|
|
|
|
return VK_POLYGON_MODE_POINT;
|
|
|
|
|
|
|
|
|
|
case BRW_TESS_OUTPUT_TOPOLOGY_LINE:
|
|
|
|
|
return VK_POLYGON_MODE_LINE;
|
|
|
|
|
|
|
|
|
|
case BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW:
|
|
|
|
|
case BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
|
2022-09-04 18:59:22 +03:00
|
|
|
return polygon_mode;
|
2019-06-19 17:07:43 -05:00
|
|
|
}
|
|
|
|
|
unreachable("Unsupported TCS output topology");
|
|
|
|
|
} else {
|
2021-06-15 09:55:51 +03:00
|
|
|
switch (primitive_topology) {
|
2019-06-19 17:07:43 -05:00
|
|
|
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
|
|
|
|
|
return VK_POLYGON_MODE_POINT;
|
|
|
|
|
|
|
|
|
|
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
|
|
|
|
|
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
|
|
|
|
|
case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
|
|
|
|
|
case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
|
|
|
|
|
return VK_POLYGON_MODE_LINE;
|
|
|
|
|
|
|
|
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
|
|
|
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
|
|
|
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
|
|
|
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
|
|
|
|
|
case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
|
2022-09-04 18:59:22 +03:00
|
|
|
return polygon_mode;
|
2019-06-19 17:07:43 -05:00
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported primitive topology");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-19 15:50:14 -07:00
|
|
|
const uint32_t genX(vk_to_intel_cullmode)[] = {
|
2020-08-11 00:30:10 +03:00
|
|
|
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
|
|
|
|
|
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
|
|
|
|
|
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
|
|
|
|
|
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
|
|
|
|
|
};
|
|
|
|
|
|
2021-04-19 15:50:14 -07:00
|
|
|
const uint32_t genX(vk_to_intel_fillmode)[] = {
|
2020-08-11 00:30:10 +03:00
|
|
|
[VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
|
|
|
|
|
[VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
|
|
|
|
|
[VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
|
|
|
|
|
};
|
|
|
|
|
|
2021-04-19 15:50:14 -07:00
|
|
|
const uint32_t genX(vk_to_intel_front_face)[] = {
|
2020-08-11 00:30:10 +03:00
|
|
|
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
|
|
|
|
|
[VK_FRONT_FACE_CLOCKWISE] = 0
|
|
|
|
|
};
|
|
|
|
|
|
2021-06-15 09:55:51 +03:00
|
|
|
void
|
|
|
|
|
genX(rasterization_mode)(VkPolygonMode raster_mode,
|
|
|
|
|
VkLineRasterizationModeEXT line_mode,
|
2021-04-22 11:14:02 -07:00
|
|
|
float line_width,
|
2021-06-15 09:55:51 +03:00
|
|
|
uint32_t *api_mode,
|
|
|
|
|
bool *msaa_rasterization_enable)
|
|
|
|
|
{
|
|
|
|
|
if (raster_mode == VK_POLYGON_MODE_LINE) {
|
|
|
|
|
/* Unfortunately, configuring our line rasterization hardware on gfx8
|
|
|
|
|
* and later is rather painful. Instead of giving us bits to tell the
|
|
|
|
|
* hardware what line mode to use like we had on gfx7, we now have an
|
|
|
|
|
* arcane combination of API Mode and MSAA enable bits which do things
|
|
|
|
|
* in a table which are expected to magically put the hardware into the
|
|
|
|
|
* right mode for your API. Sadly, Vulkan isn't any of the APIs the
|
|
|
|
|
* hardware people thought of so nothing works the way you want it to.
|
|
|
|
|
*
|
|
|
|
|
* Look at the table titled "Multisample Rasterization Modes" in Vol 7
|
|
|
|
|
* of the Skylake PRM for more details.
|
|
|
|
|
*/
|
|
|
|
|
switch (line_mode) {
|
|
|
|
|
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
|
2022-11-22 08:04:58 -08:00
|
|
|
*api_mode = DX101;
|
2021-04-22 11:14:02 -07:00
|
|
|
#if GFX_VER <= 9
|
|
|
|
|
/* Prior to ICL, the algorithm the HW uses to draw wide lines
|
|
|
|
|
* doesn't quite match what the CTS expects, at least for rectangular
|
|
|
|
|
* lines, so we set this to false here, making it draw parallelograms
|
|
|
|
|
* instead, which work well enough.
|
|
|
|
|
*/
|
|
|
|
|
*msaa_rasterization_enable = line_width < 1.0078125;
|
|
|
|
|
#else
|
2021-06-15 09:55:51 +03:00
|
|
|
*msaa_rasterization_enable = true;
|
2021-04-22 11:14:02 -07:00
|
|
|
#endif
|
2021-06-15 09:55:51 +03:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
|
|
|
|
|
case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
|
|
|
|
|
*api_mode = DX9OGL;
|
|
|
|
|
*msaa_rasterization_enable = false;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Unsupported line rasterization mode");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2022-11-22 08:04:58 -08:00
|
|
|
*api_mode = DX101;
|
2021-06-15 09:55:51 +03:00
|
|
|
*msaa_rasterization_enable = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_input_assembly_state *ia,
|
|
|
|
|
const struct vk_rasterization_state *rs,
|
|
|
|
|
const struct vk_multisample_state *ms,
|
|
|
|
|
const struct vk_render_pass_state *rp,
|
2021-03-03 13:49:18 -08:00
|
|
|
enum intel_urb_deref_block_size urb_deref_block_size)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
|
|
|
|
struct GENX(3DSTATE_SF) sf = {
|
|
|
|
|
GENX(3DSTATE_SF_header),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
sf.ViewportTransformEnable = true;
|
|
|
|
|
sf.StatisticsEnable = true;
|
anv: advertise 8 subpixel precision bits
On one side, when emitting 3DSTATE_SF, VertexSubPixelPrecisionSelect is
used to select between 8 bit subpixel precision (value 0) or 4 bit
subpixel precision (value 1). As this value is not set, means it is
taking the value 0, so 8 bit are used.
On the other side, in the Vulkan CTS tests, if the reference rasterizer,
which uses 8 bit precision, as it is used to check what should be the
expected value for the tests, is changed to use 4 bit as ANV was
advertising so far, some of the tests will fail.
So it seems ANV is actually using 8 bits.
v2: explicitly set 3DSTATE_SF::VertexSubPixelPrecisionSelect (Jason)
v3: use _8Bit definition as value (Jason)
v4: (by Jason)
anv: Explicitly set 3DSTATE_CLIP::VertexSubPixelPrecisionSelect
This field was added on gen8 even though there's an identically defined
one in 3DSTATE_SF.
CC: Jason Ekstrand <jason@jlekstrand.net>
CC: Kenneth Graunke <kenneth@whitecape.org>
CC: 18.3 19.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2019-02-22 16:47:53 +01:00
|
|
|
sf.VertexSubPixelPrecisionSelect = _8Bit;
|
2019-05-22 22:44:59 -05:00
|
|
|
sf.AALineDistanceMode = true;
|
|
|
|
|
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER >= 12
|
2020-01-17 14:14:03 -06:00
|
|
|
sf.DerefBlockSize = urb_deref_block_size;
|
2020-01-16 17:59:43 -06:00
|
|
|
#endif
|
|
|
|
|
|
2021-05-20 12:07:34 -07:00
|
|
|
bool point_from_shader;
|
2021-07-08 14:47:08 -07:00
|
|
|
if (anv_pipeline_is_primitive(pipeline)) {
|
|
|
|
|
const struct brw_vue_prog_data *last_vue_prog_data =
|
|
|
|
|
anv_pipeline_get_last_vue_prog_data(pipeline);
|
2021-05-20 12:07:34 -07:00
|
|
|
point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
|
|
|
|
|
} else {
|
|
|
|
|
assert(anv_pipeline_is_mesh(pipeline));
|
|
|
|
|
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
|
|
|
|
point_from_shader = mesh_prog_data->map.start_dw[VARYING_SLOT_PSIZ] >= 0;
|
|
|
|
|
}
|
anv: Default PointSize to 1.0 if not written by the shader
The Vulkan rules for point size are a bit whacky. If you only have a
vertex shader and you use points, then you must write PointSize in your
vertex shader. If you have a geometry or tessellation shader, then it's
dependent on the shaderTessellationAndGeometryPointSize device feature.
From the Vulkan 1.0.38 specification:
"shaderTessellationAndGeometryPointSize indicates whether the
PointSize built-in decoration is available in the tessellation
control, tessellation evaluation, and geometry shader stages. If this
feature is not enabled, members decorated with the PointSize built-in
decoration must not be read from or written to and all points written
from a tessellation or geometry shader will have a size of 1.0. This
also indicates whether shader modules can declare the
TessellationPointSize capability for tessellation control and
evaluation shaders, or if the shader modules can declare the
GeometryPointSize capability for geometry shaders. An implementation
supporting this feature must also support one or both of the
tessellationShader or geometryShader features."
In other words, if the feature is disbled (the client can disable
features!) then they don't write PointSize and we provide a 1.0 default
but if the feature is enabled, they do write PointSize and we use the
one they wrote in the shader. There are at least two valid ways we can
implement this:
1) Track whether or not shaderTessellationAndGeometryPointSize is
enabled and set the 3DSTATE_SF bits based on that and what stages
are enabled, ignoring the shader source.
2) Just look at the last geometry stage VUE map and see if they wrote
PointSize and set the 3DSTATE_SF accordingly.
The second solution is the easiest and the most robust against invalid
usage of the Vulkan API, so we choose to go with that one.
This fixes all of the dEQP-VK.tessellation.primitive_discard.*point_mode
tests. The tests are also broken because they unconditionally enable
shaderTessellationAndGeometryPointSize if it's supported by the
implementation and then don't write PointSize in the evaluation shader.
However, since this is the "robust against invalid API usage" solution,
the tests happily pass. :-)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-13 09:30:13 -08:00
|
|
|
|
2021-05-20 12:07:34 -07:00
|
|
|
if (point_from_shader) {
|
|
|
|
|
sf.PointWidthSource = Vertex;
|
|
|
|
|
} else {
|
|
|
|
|
sf.PointWidthSource = State;
|
|
|
|
|
sf.PointWidth = 1.0;
|
anv: Default PointSize to 1.0 if not written by the shader
The Vulkan rules for point size are a bit whacky. If you only have a
vertex shader and you use points, then you must write PointSize in your
vertex shader. If you have a geometry or tessellation shader, then it's
dependent on the shaderTessellationAndGeometryPointSize device feature.
From the Vulkan 1.0.38 specification:
"shaderTessellationAndGeometryPointSize indicates whether the
PointSize built-in decoration is available in the tessellation
control, tessellation evaluation, and geometry shader stages. If this
feature is not enabled, members decorated with the PointSize built-in
decoration must not be read from or written to and all points written
from a tessellation or geometry shader will have a size of 1.0. This
also indicates whether shader modules can declare the
TessellationPointSize capability for tessellation control and
evaluation shaders, or if the shader modules can declare the
GeometryPointSize capability for geometry shaders. An implementation
supporting this feature must also support one or both of the
tessellationShader or geometryShader features."
In other words, if the feature is disbled (the client can disable
features!) then they don't write PointSize and we provide a 1.0 default
but if the feature is enabled, they do write PointSize and we use the
one they wrote in the shader. There are at least two valid ways we can
implement this:
1) Track whether or not shaderTessellationAndGeometryPointSize is
enabled and set the 3DSTATE_SF bits based on that and what stages
are enabled, ignoring the shader source.
2) Just look at the last geometry stage VUE map and see if they wrote
PointSize and set the 3DSTATE_SF accordingly.
The second solution is the easiest and the most robust against invalid
usage of the Vulkan API, so we choose to go with that one.
This fixes all of the dEQP-VK.tessellation.primitive_discard.*point_mode
tests. The tests are also broken because they unconditionally enable
shaderTessellationAndGeometryPointSize if it's supported by the
implementation and then don't write PointSize in the evaluation shader.
However, since this is the "robust against invalid API usage" solution,
the tests happily pass. :-)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-13 09:30:13 -08:00
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
struct GENX(3DSTATE_RASTER) raster = {
|
|
|
|
|
GENX(3DSTATE_RASTER_header),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* For details on 3DSTATE_RASTER multisample state, see the BSpec table
|
|
|
|
|
* "Multisample Modes State".
|
|
|
|
|
*/
|
2016-12-06 17:52:14 -08:00
|
|
|
/* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
|
|
|
|
|
* computations. If we ever set this bit to a different value, they will
|
|
|
|
|
* need to be updated accordingly.
|
|
|
|
|
*/
|
2016-11-12 11:42:09 -08:00
|
|
|
raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
|
|
|
|
|
raster.ForceMultisampling = false;
|
|
|
|
|
|
|
|
|
|
raster.ScissorRectangleEnable = true;
|
|
|
|
|
|
2021-03-31 17:45:42 -05:00
|
|
|
raster.ConservativeRasterizationEnable =
|
2022-07-12 16:16:55 -05:00
|
|
|
rs->conservative_mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
|
2022-11-22 08:04:58 -08:00
|
|
|
raster.APIMode = DX101;
|
2021-03-31 17:45:42 -05:00
|
|
|
|
2021-03-29 15:40:04 -07:00
|
|
|
GENX(3DSTATE_SF_pack)(NULL, pipeline->gfx8.sf, &sf);
|
|
|
|
|
GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gfx8.raster, &raster);
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_ms_state(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_multisample_state *ms)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
2022-03-16 00:25:17 +02:00
|
|
|
/* On Gfx8+ 3DSTATE_MULTISAMPLE only holds the number of samples. */
|
|
|
|
|
genX(emit_multisample)(&pipeline->base.batch,
|
2022-09-19 15:52:38 +03:00
|
|
|
pipeline->rasterization_samples);
|
2021-02-05 21:16:38 +02:00
|
|
|
}
|
2020-10-19 10:12:43 +03:00
|
|
|
|
2021-05-03 11:11:24 +03:00
|
|
|
const uint32_t genX(vk_to_intel_logic_op)[] = {
|
2016-11-12 11:42:09 -08:00
|
|
|
[VK_LOGIC_OP_COPY] = LOGICOP_COPY,
|
|
|
|
|
[VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
|
|
|
|
|
[VK_LOGIC_OP_AND] = LOGICOP_AND,
|
|
|
|
|
[VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
|
|
|
|
|
[VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
|
|
|
|
|
[VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
|
|
|
|
|
[VK_LOGIC_OP_XOR] = LOGICOP_XOR,
|
|
|
|
|
[VK_LOGIC_OP_OR] = LOGICOP_OR,
|
|
|
|
|
[VK_LOGIC_OP_NOR] = LOGICOP_NOR,
|
|
|
|
|
[VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
|
|
|
|
|
[VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
|
|
|
|
|
[VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
|
|
|
|
|
[VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
|
|
|
|
|
[VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
|
|
|
|
|
[VK_LOGIC_OP_NAND] = LOGICOP_NAND,
|
|
|
|
|
[VK_LOGIC_OP_SET] = LOGICOP_SET,
|
|
|
|
|
};
|
|
|
|
|
|
2021-04-19 15:50:14 -07:00
|
|
|
const uint32_t genX(vk_to_intel_compare_op)[] = {
|
2021-05-05 11:18:23 +10:00
|
|
|
[VK_COMPARE_OP_NEVER] = PREFILTEROP_NEVER,
|
|
|
|
|
[VK_COMPARE_OP_LESS] = PREFILTEROP_LESS,
|
|
|
|
|
[VK_COMPARE_OP_EQUAL] = PREFILTEROP_EQUAL,
|
|
|
|
|
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROP_LEQUAL,
|
|
|
|
|
[VK_COMPARE_OP_GREATER] = PREFILTEROP_GREATER,
|
|
|
|
|
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROP_NOTEQUAL,
|
|
|
|
|
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROP_GEQUAL,
|
|
|
|
|
[VK_COMPARE_OP_ALWAYS] = PREFILTEROP_ALWAYS,
|
2016-11-12 11:42:09 -08:00
|
|
|
};
|
|
|
|
|
|
2021-04-19 15:50:14 -07:00
|
|
|
const uint32_t genX(vk_to_intel_stencil_op)[] = {
|
2016-11-12 11:42:09 -08:00
|
|
|
[VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
|
|
|
|
|
[VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
|
|
|
|
|
[VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
|
|
|
|
|
[VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
|
|
|
|
|
[VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
|
|
|
|
|
[VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
|
|
|
|
|
[VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
|
|
|
|
|
[VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
|
|
|
|
|
};
|
|
|
|
|
|
2021-04-19 15:50:14 -07:00
|
|
|
const uint32_t genX(vk_to_intel_primitive_type)[] = {
|
2020-08-11 00:30:10 +03:00
|
|
|
[VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
|
|
|
|
|
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
|
|
|
|
|
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
|
|
|
|
|
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
|
|
|
|
|
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
|
|
|
|
|
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
|
|
|
|
|
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
|
|
|
|
|
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
|
|
|
|
|
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
|
|
|
|
|
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
|
|
|
|
|
};
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_input_assembly_state *ia,
|
|
|
|
|
const struct vk_viewport_state *vp,
|
|
|
|
|
const struct vk_rasterization_state *rs)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
(void) wm_prog_data;
|
2020-06-10 10:52:29 +03:00
|
|
|
|
|
|
|
|
struct GENX(3DSTATE_CLIP) clip = {
|
|
|
|
|
GENX(3DSTATE_CLIP_header),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
clip.ClipEnable = true;
|
|
|
|
|
clip.StatisticsEnable = true;
|
|
|
|
|
clip.EarlyCullEnable = true;
|
|
|
|
|
clip.GuardbandClipTestEnable = true;
|
|
|
|
|
|
|
|
|
|
clip.VertexSubPixelPrecisionSelect = _8Bit;
|
|
|
|
|
clip.ClipMode = CLIPMODE_NORMAL;
|
anv: advertise 8 subpixel precision bits
On one side, when emitting 3DSTATE_SF, VertexSubPixelPrecisionSelect is
used to select between 8 bit subpixel precision (value 0) or 4 bit
subpixel precision (value 1). As this value is not set, means it is
taking the value 0, so 8 bit are used.
On the other side, in the Vulkan CTS tests, if the reference rasterizer,
which uses 8 bit precision, as it is used to check what should be the
expected value for the tests, is changed to use 4 bit as ANV was
advertising so far, some of the tests will fail.
So it seems ANV is actually using 8 bits.
v2: explicitly set 3DSTATE_SF::VertexSubPixelPrecisionSelect (Jason)
v3: use _8Bit definition as value (Jason)
v4: (by Jason)
anv: Explicitly set 3DSTATE_CLIP::VertexSubPixelPrecisionSelect
This field was added on gen8 even though there's an identically defined
one in 3DSTATE_SF.
CC: Jason Ekstrand <jason@jlekstrand.net>
CC: Kenneth Graunke <kenneth@whitecape.org>
CC: 18.3 19.0 <mesa-stable@lists.freedesktop.org>
Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2019-02-22 16:47:53 +01:00
|
|
|
|
2020-06-10 10:52:29 +03:00
|
|
|
clip.MinimumPointWidth = 0.125;
|
|
|
|
|
clip.MaximumPointWidth = 255.875;
|
2017-04-03 12:25:15 -07:00
|
|
|
|
2021-05-20 12:07:34 -07:00
|
|
|
/* TODO(mesh): Multiview. */
|
2021-07-08 14:47:08 -07:00
|
|
|
if (anv_pipeline_is_primitive(pipeline)) {
|
|
|
|
|
const struct brw_vue_prog_data *last =
|
|
|
|
|
anv_pipeline_get_last_vue_prog_data(pipeline);
|
2017-04-03 12:25:15 -07:00
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
/* From the Vulkan 1.0.45 spec:
|
|
|
|
|
*
|
|
|
|
|
* "If the last active vertex processing stage shader entry point's
|
|
|
|
|
* interface does not include a variable decorated with
|
|
|
|
|
* ViewportIndex, then the first viewport is used."
|
|
|
|
|
*/
|
2022-07-12 16:16:55 -05:00
|
|
|
if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
|
|
|
|
|
clip.MaximumVPIndex = vp->viewport_count > 0 ?
|
|
|
|
|
vp->viewport_count - 1 : 0;
|
2021-07-08 14:47:08 -07:00
|
|
|
} else {
|
|
|
|
|
clip.MaximumVPIndex = 0;
|
|
|
|
|
}
|
2017-04-03 12:25:15 -07:00
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
/* From the Vulkan 1.0.45 spec:
|
|
|
|
|
*
|
|
|
|
|
* "If the last active vertex processing stage shader entry point's
|
|
|
|
|
* interface does not include a variable decorated with Layer, then
|
|
|
|
|
* the first layer is used."
|
|
|
|
|
*/
|
|
|
|
|
clip.ForceZeroRTAIndexEnable =
|
|
|
|
|
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
|
|
|
|
|
|
2021-12-09 17:13:29 +01:00
|
|
|
} else if (anv_pipeline_is_mesh(pipeline)) {
|
|
|
|
|
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
2022-07-12 16:16:55 -05:00
|
|
|
if (vp && vp->viewport_count > 0 &&
|
|
|
|
|
mesh_prog_data->map.start_dw[VARYING_SLOT_VIEWPORT] >= 0) {
|
|
|
|
|
clip.MaximumVPIndex = vp->viewport_count - 1;
|
2023-01-27 15:58:35 +01:00
|
|
|
} else {
|
|
|
|
|
clip.MaximumVPIndex = 0;
|
2021-12-09 17:13:29 +01:00
|
|
|
}
|
2023-01-27 15:58:35 +01:00
|
|
|
|
|
|
|
|
clip.ForceZeroRTAIndexEnable =
|
|
|
|
|
mesh_prog_data->map.start_dw[VARYING_SLOT_LAYER] < 0;
|
2021-07-08 14:47:08 -07:00
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2020-06-10 10:52:29 +03:00
|
|
|
clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
|
2022-07-07 12:24:38 +03:00
|
|
|
wm_prog_data->uses_nonperspective_interp_modes : 0;
|
2020-06-10 10:52:29 +03:00
|
|
|
|
2022-08-03 12:38:39 +03:00
|
|
|
GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx8.clip, &clip);
|
2021-05-20 12:07:34 -07:00
|
|
|
|
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
if (anv_pipeline_is_mesh(pipeline)) {
|
|
|
|
|
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_CLIP_MESH), clip_mesh) {
|
|
|
|
|
clip_mesh.PrimitiveHeaderEnable = mesh_prog_data->map.per_primitive_header_size_dw > 0;
|
2021-12-09 16:48:03 +01:00
|
|
|
clip_mesh.UserClipDistanceClipTestEnableBitmask = mesh_prog_data->clip_distance_mask;
|
|
|
|
|
clip_mesh.UserClipDistanceCullTestEnableBitmask = mesh_prog_data->cull_distance_mask;
|
2021-05-20 12:07:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_rasterization_state *rs)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
2018-09-10 16:17:37 -05:00
|
|
|
const struct brw_vue_prog_data *prog_data =
|
|
|
|
|
anv_pipeline_get_last_vue_prog_data(pipeline);
|
|
|
|
|
const struct brw_vue_map *vue_map = &prog_data->vue_map;
|
|
|
|
|
|
|
|
|
|
nir_xfb_info *xfb_info;
|
|
|
|
|
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
|
|
|
|
|
xfb_info = pipeline->shaders[MESA_SHADER_GEOMETRY]->xfb_info;
|
|
|
|
|
else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
|
|
|
|
|
xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info;
|
|
|
|
|
else
|
|
|
|
|
xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info;
|
|
|
|
|
|
|
|
|
|
if (xfb_info) {
|
|
|
|
|
struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128];
|
|
|
|
|
int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0};
|
|
|
|
|
int decls[MAX_XFB_STREAMS] = {0, 0, 0, 0};
|
|
|
|
|
|
|
|
|
|
memset(so_decl, 0, sizeof(so_decl));
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < xfb_info->output_count; i++) {
|
|
|
|
|
const nir_xfb_output_info *output = &xfb_info->outputs[i];
|
|
|
|
|
unsigned buffer = output->buffer;
|
|
|
|
|
unsigned stream = xfb_info->buffer_to_stream[buffer];
|
|
|
|
|
|
|
|
|
|
/* Our hardware is unusual in that it requires us to program SO_DECLs
|
|
|
|
|
* for fake "hole" components, rather than simply taking the offset
|
|
|
|
|
* for each real varying. Each hole can have size 1, 2, 3, or 4; we
|
|
|
|
|
* program as many size = 4 holes as we can, then a final hole to
|
|
|
|
|
* accommodate the final 1, 2, or 3 remaining.
|
|
|
|
|
*/
|
|
|
|
|
int hole_dwords = (output->offset - next_offset[buffer]) / 4;
|
|
|
|
|
while (hole_dwords > 0) {
|
|
|
|
|
so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
|
|
|
|
|
.HoleFlag = 1,
|
|
|
|
|
.OutputBufferSlot = buffer,
|
|
|
|
|
.ComponentMask = (1 << MIN2(hole_dwords, 4)) - 1,
|
|
|
|
|
};
|
|
|
|
|
hole_dwords -= 4;
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-01 12:21:38 +02:00
|
|
|
int varying = output->location;
|
|
|
|
|
uint8_t component_mask = output->component_mask;
|
2020-10-19 10:12:43 +03:00
|
|
|
/* VARYING_SLOT_PSIZ contains four scalar fields packed together:
|
|
|
|
|
* - VARYING_SLOT_PRIMITIVE_SHADING_RATE in VARYING_SLOT_PSIZ.x
|
|
|
|
|
* - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y
|
|
|
|
|
* - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z
|
|
|
|
|
* - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w
|
2019-02-01 12:21:38 +02:00
|
|
|
*/
|
2020-10-19 10:12:43 +03:00
|
|
|
if (varying == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
|
|
|
|
|
varying = VARYING_SLOT_PSIZ;
|
|
|
|
|
component_mask = 1 << 0; // SO_DECL_COMPMASK_X
|
|
|
|
|
} else if (varying == VARYING_SLOT_LAYER) {
|
2019-02-01 12:21:38 +02:00
|
|
|
varying = VARYING_SLOT_PSIZ;
|
|
|
|
|
component_mask = 1 << 1; // SO_DECL_COMPMASK_Y
|
|
|
|
|
} else if (varying == VARYING_SLOT_VIEWPORT) {
|
|
|
|
|
varying = VARYING_SLOT_PSIZ;
|
|
|
|
|
component_mask = 1 << 2; // SO_DECL_COMPMASK_Z
|
|
|
|
|
} else if (varying == VARYING_SLOT_PSIZ) {
|
|
|
|
|
component_mask = 1 << 3; // SO_DECL_COMPMASK_W
|
|
|
|
|
}
|
|
|
|
|
|
2018-09-10 16:17:37 -05:00
|
|
|
next_offset[buffer] = output->offset +
|
2019-02-01 12:21:38 +02:00
|
|
|
__builtin_popcount(component_mask) * 4;
|
2018-09-10 16:17:37 -05:00
|
|
|
|
2020-01-22 14:26:24 -06:00
|
|
|
const int slot = vue_map->varying_to_slot[varying];
|
|
|
|
|
if (slot < 0) {
|
|
|
|
|
/* This can happen if the shader never writes to the varying.
|
|
|
|
|
* Insert a hole instead of actual varying data.
|
|
|
|
|
*/
|
|
|
|
|
so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
|
|
|
|
|
.HoleFlag = true,
|
|
|
|
|
.OutputBufferSlot = buffer,
|
|
|
|
|
.ComponentMask = component_mask,
|
|
|
|
|
};
|
|
|
|
|
} else {
|
|
|
|
|
so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
|
|
|
|
|
.OutputBufferSlot = buffer,
|
|
|
|
|
.RegisterIndex = slot,
|
|
|
|
|
.ComponentMask = component_mask,
|
|
|
|
|
};
|
|
|
|
|
}
|
2018-09-10 16:17:37 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int max_decls = 0;
|
|
|
|
|
for (unsigned s = 0; s < MAX_XFB_STREAMS; s++)
|
|
|
|
|
max_decls = MAX2(max_decls, decls[s]);
|
|
|
|
|
|
|
|
|
|
uint8_t sbs[MAX_XFB_STREAMS] = { };
|
|
|
|
|
for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) {
|
|
|
|
|
if (xfb_info->buffers_written & (1 << b))
|
|
|
|
|
sbs[xfb_info->buffer_to_stream[b]] |= 1 << b;
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-03 17:39:28 -07:00
|
|
|
/* Wa_16011773973:
|
|
|
|
|
* If SOL is enabled and SO_DECL state has to be programmed,
|
|
|
|
|
* 1. Send 3D State SOL state with SOL disabled
|
|
|
|
|
* 2. Send SO_DECL NP state
|
|
|
|
|
* 3. Send 3D State SOL with SOL Enabled
|
|
|
|
|
*/
|
2022-08-04 12:56:17 -07:00
|
|
|
if (intel_device_info_is_dg2(pipeline->base.device->info))
|
2021-05-03 17:39:28 -07:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so);
|
|
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
uint32_t *dw = anv_batch_emitn(&pipeline->base.batch, 3 + 2 * max_decls,
|
2018-09-10 16:17:37 -05:00
|
|
|
GENX(3DSTATE_SO_DECL_LIST),
|
|
|
|
|
.StreamtoBufferSelects0 = sbs[0],
|
|
|
|
|
.StreamtoBufferSelects1 = sbs[1],
|
|
|
|
|
.StreamtoBufferSelects2 = sbs[2],
|
|
|
|
|
.StreamtoBufferSelects3 = sbs[3],
|
|
|
|
|
.NumEntries0 = decls[0],
|
|
|
|
|
.NumEntries1 = decls[1],
|
|
|
|
|
.NumEntries2 = decls[2],
|
|
|
|
|
.NumEntries3 = decls[3]);
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < max_decls; i++) {
|
|
|
|
|
GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2,
|
|
|
|
|
&(struct GENX(SO_DECL_ENTRY)) {
|
|
|
|
|
.Stream0Decl = so_decl[0][i],
|
|
|
|
|
.Stream1Decl = so_decl[1][i],
|
|
|
|
|
.Stream2Decl = so_decl[2][i],
|
|
|
|
|
.Stream3Decl = so_decl[3][i],
|
|
|
|
|
});
|
|
|
|
|
}
|
2022-09-05 08:23:20 +03:00
|
|
|
|
|
|
|
|
#if GFX_VERx10 == 125
|
|
|
|
|
/* Wa_14015946265: Send PC with CS stall after SO_DECL. */
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(PIPE_CONTROL), pc) {
|
|
|
|
|
pc.CommandStreamerStallEnable = true;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
2021-05-03 17:39:28 -07:00
|
|
|
|
|
|
|
|
struct GENX(3DSTATE_STREAMOUT) so = {
|
|
|
|
|
GENX(3DSTATE_STREAMOUT_header),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (xfb_info) {
|
2022-12-27 15:30:23 +02:00
|
|
|
pipeline->uses_xfb = true;
|
|
|
|
|
|
2021-05-03 17:39:28 -07:00
|
|
|
so.SOFunctionEnable = true;
|
|
|
|
|
so.SOStatisticsEnable = true;
|
|
|
|
|
|
|
|
|
|
so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
|
|
|
|
|
so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
|
|
|
|
|
so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
|
|
|
|
|
so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
|
|
|
|
|
|
|
|
|
|
int urb_entry_read_offset = 0;
|
|
|
|
|
int urb_entry_read_length =
|
|
|
|
|
(prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset;
|
|
|
|
|
|
|
|
|
|
/* We always read the whole vertex. This could be reduced at some
|
|
|
|
|
* point by reading less and offsetting the register index in the
|
|
|
|
|
* SO_DECLs.
|
|
|
|
|
*/
|
|
|
|
|
so.Stream0VertexReadOffset = urb_entry_read_offset;
|
|
|
|
|
so.Stream0VertexReadLength = urb_entry_read_length - 1;
|
|
|
|
|
so.Stream1VertexReadOffset = urb_entry_read_offset;
|
|
|
|
|
so.Stream1VertexReadLength = urb_entry_read_length - 1;
|
|
|
|
|
so.Stream2VertexReadOffset = urb_entry_read_offset;
|
|
|
|
|
so.Stream2VertexReadLength = urb_entry_read_length - 1;
|
|
|
|
|
so.Stream3VertexReadOffset = urb_entry_read_offset;
|
|
|
|
|
so.Stream3VertexReadLength = urb_entry_read_length - 1;
|
2022-11-02 12:38:52 +02:00
|
|
|
|
|
|
|
|
#if INTEL_NEEDS_WA_14017076903
|
|
|
|
|
/* Wa_14017076903 : SOL should be programmed to force the
|
|
|
|
|
* rendering to be enabled.
|
|
|
|
|
*
|
|
|
|
|
* This fixes a rare case where SOL must render to get correct
|
|
|
|
|
* occlusion query results even when no PS and depth buffers are
|
|
|
|
|
* bound.
|
|
|
|
|
*/
|
|
|
|
|
so.ForceRendering = Force_on;
|
|
|
|
|
#endif
|
2021-05-03 17:39:28 -07:00
|
|
|
}
|
|
|
|
|
|
2022-08-03 12:38:39 +03:00
|
|
|
GENX(3DSTATE_STREAMOUT_pack)(NULL, pipeline->gfx8.streamout_state, &so);
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
2017-07-06 21:18:03 -07:00
|
|
|
static uint32_t
|
2016-11-12 11:42:09 -08:00
|
|
|
get_sampler_count(const struct anv_shader_bin *bin)
|
|
|
|
|
{
|
2017-12-15 11:27:39 -08:00
|
|
|
uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4);
|
|
|
|
|
|
|
|
|
|
/* We can potentially have way more than 32 samplers and that's ok.
|
|
|
|
|
* However, the 3DSTATE_XS packets only have 3 bits to specify how
|
|
|
|
|
* many to pre-fetch and all values above 4 are marked reserved.
|
|
|
|
|
*/
|
|
|
|
|
return MIN2(count_by_4, 4);
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
2020-10-20 16:11:45 -05:00
|
|
|
static UNUSED struct anv_address
|
2016-11-12 11:42:09 -08:00
|
|
|
get_scratch_address(struct anv_pipeline *pipeline,
|
|
|
|
|
gl_shader_stage stage,
|
|
|
|
|
const struct anv_shader_bin *bin)
|
|
|
|
|
{
|
|
|
|
|
return (struct anv_address) {
|
|
|
|
|
.bo = anv_scratch_pool_alloc(pipeline->device,
|
|
|
|
|
&pipeline->device->scratch_pool,
|
|
|
|
|
stage, bin->prog_data->total_scratch),
|
|
|
|
|
.offset = 0,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-20 16:11:45 -05:00
|
|
|
static UNUSED uint32_t
|
2016-11-12 11:42:09 -08:00
|
|
|
get_scratch_space(const struct anv_shader_bin *bin)
|
|
|
|
|
{
|
|
|
|
|
return ffs(bin->prog_data->total_scratch / 2048);
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-20 16:11:45 -05:00
|
|
|
static UNUSED uint32_t
|
|
|
|
|
get_scratch_surf(struct anv_pipeline *pipeline,
|
2021-10-27 13:06:07 +03:00
|
|
|
gl_shader_stage stage,
|
2020-10-20 16:11:45 -05:00
|
|
|
const struct anv_shader_bin *bin)
|
|
|
|
|
{
|
2021-10-27 13:06:07 +03:00
|
|
|
if (bin->prog_data->total_scratch == 0)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
struct anv_bo *bo =
|
|
|
|
|
anv_scratch_pool_alloc(pipeline->device,
|
|
|
|
|
&pipeline->device->scratch_pool,
|
|
|
|
|
stage, bin->prog_data->total_scratch);
|
|
|
|
|
anv_reloc_list_add_bo(pipeline->batch.relocs,
|
|
|
|
|
pipeline->batch.alloc, bo);
|
2020-10-20 16:11:45 -05:00
|
|
|
return anv_scratch_pool_get_surf(pipeline->device,
|
|
|
|
|
&pipeline->device->scratch_pool,
|
|
|
|
|
bin->prog_data->total_scratch) >> 4;
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
2022-08-04 12:56:17 -07:00
|
|
|
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
2016-11-12 11:42:09 -08:00
|
|
|
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
|
|
|
|
|
const struct anv_shader_bin *vs_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_VERTEX];
|
|
|
|
|
|
|
|
|
|
assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX));
|
|
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VS), vs) {
|
2017-03-22 16:26:07 -07:00
|
|
|
vs.Enable = true;
|
2016-11-12 11:42:09 -08:00
|
|
|
vs.StatisticsEnable = true;
|
|
|
|
|
vs.KernelStartPointer = vs_bin->kernel.offset;
|
|
|
|
|
vs.SIMD8DispatchEnable =
|
|
|
|
|
vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
|
|
|
|
|
|
|
|
|
|
assert(!vs_prog_data->base.base.use_alt_mode);
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER < 11
|
2016-11-12 11:42:09 -08:00
|
|
|
vs.SingleVertexDispatch = false;
|
2017-05-26 15:42:02 -07:00
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
vs.VectorMaskEnable = false;
|
2021-03-29 17:24:46 -07:00
|
|
|
/* Wa_1606682166:
|
2018-10-24 11:35:42 -07:00
|
|
|
* Incorrect TDL's SSP address shift in SARB for 16:6 & 18:8 modes.
|
|
|
|
|
* Disable the Sampler state prefetch functionality in the SARB by
|
|
|
|
|
* programming 0xB000[30] to '1'.
|
|
|
|
|
*/
|
2021-03-16 10:14:30 -07:00
|
|
|
vs.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(vs_bin);
|
2021-03-11 19:03:17 -08:00
|
|
|
vs.BindingTableEntryCount = vs_bin->bind_map.surface_count;
|
2016-11-12 11:42:09 -08:00
|
|
|
vs.FloatingPointMode = IEEE754;
|
|
|
|
|
vs.IllegalOpcodeExceptionEnable = false;
|
|
|
|
|
vs.SoftwareExceptionEnable = false;
|
|
|
|
|
vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
|
2018-08-21 20:40:50 -05:00
|
|
|
|
2021-03-16 10:14:30 -07:00
|
|
|
if (GFX_VER == 9 && devinfo->gt == 4 &&
|
2018-08-21 20:40:50 -05:00
|
|
|
anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
|
|
|
|
|
/* On Sky Lake GT4, we have experienced some hangs related to the VS
|
|
|
|
|
* cache and tessellation. It is unknown exactly what is happening
|
|
|
|
|
* but the Haswell docs for the "VS Reference Count Full Force Miss
|
|
|
|
|
* Enable" field of the "Thread Mode" register refer to a HSW bug in
|
|
|
|
|
* which the VUE handle reference count would overflow resulting in
|
|
|
|
|
* internal reference counting bugs. My (Jason's) best guess is that
|
|
|
|
|
* this bug cropped back up on SKL GT4 when we suddenly had more
|
2021-03-29 15:40:04 -07:00
|
|
|
* threads in play than any previous gfx9 hardware.
|
2018-08-21 20:40:50 -05:00
|
|
|
*
|
|
|
|
|
* What we do know for sure is that setting this bit when
|
|
|
|
|
* tessellation shaders are in use fixes a GPU hang in Batman: Arkham
|
|
|
|
|
* City when playing with DXVK (https://bugs.freedesktop.org/107280).
|
|
|
|
|
* Disabling the vertex cache with tessellation shaders should only
|
|
|
|
|
* have a minor performance impact as the tessellation shaders are
|
|
|
|
|
* likely generating and processing far more geometry than the vertex
|
|
|
|
|
* stage.
|
|
|
|
|
*/
|
|
|
|
|
vs.VertexCacheDisable = true;
|
|
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
|
|
|
|
|
vs.VertexURBEntryReadOffset = 0;
|
|
|
|
|
vs.DispatchGRFStartRegisterForURBData =
|
|
|
|
|
vs_prog_data->base.base.dispatch_grf_start_reg;
|
|
|
|
|
|
2016-10-03 23:44:07 -07:00
|
|
|
vs.UserClipDistanceClipTestEnableBitmask =
|
|
|
|
|
vs_prog_data->base.clip_distance_mask;
|
|
|
|
|
vs.UserClipDistanceCullTestEnableBitmask =
|
|
|
|
|
vs_prog_data->base.cull_distance_mask;
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2020-10-20 16:11:45 -05:00
|
|
|
#if GFX_VERx10 >= 125
|
2021-10-27 13:06:07 +03:00
|
|
|
vs.ScratchSpaceBuffer =
|
|
|
|
|
get_scratch_surf(&pipeline->base, MESA_SHADER_VERTEX, vs_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#else
|
2016-11-12 11:42:09 -08:00
|
|
|
vs.PerThreadScratchSpace = get_scratch_space(vs_bin);
|
|
|
|
|
vs.ScratchSpaceBasePointer =
|
2020-03-03 15:31:50 -08:00
|
|
|
get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-21 00:45:57 -08:00
|
|
|
static void
|
2022-09-27 19:20:25 +03:00
|
|
|
emit_3dstate_hs_ds(struct anv_graphics_pipeline *pipeline,
|
|
|
|
|
const struct vk_tessellation_state *ts)
|
2016-11-21 00:45:57 -08:00
|
|
|
{
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs);
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds);
|
2016-11-21 00:45:57 -08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-04 12:56:17 -07:00
|
|
|
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
2016-11-21 00:45:57 -08:00
|
|
|
const struct anv_shader_bin *tcs_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_TESS_CTRL];
|
|
|
|
|
const struct anv_shader_bin *tes_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_TESS_EVAL];
|
|
|
|
|
|
|
|
|
|
const struct brw_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline);
|
|
|
|
|
const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
|
|
|
|
|
|
2023-02-09 16:34:54 +02:00
|
|
|
struct GENX(3DSTATE_HS) hs = {
|
|
|
|
|
GENX(3DSTATE_HS_header),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
hs.Enable = true;
|
|
|
|
|
hs.StatisticsEnable = true;
|
|
|
|
|
hs.KernelStartPointer = tcs_bin->kernel.offset;
|
|
|
|
|
/* Wa_1606682166 */
|
|
|
|
|
hs.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(tcs_bin);
|
|
|
|
|
hs.BindingTableEntryCount = tcs_bin->bind_map.surface_count;
|
2020-01-21 17:54:01 +02:00
|
|
|
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER >= 12
|
2023-02-09 16:34:54 +02:00
|
|
|
/* Wa_1604578095:
|
|
|
|
|
*
|
|
|
|
|
* Hang occurs when the number of max threads is less than 2 times
|
|
|
|
|
* the number of instance count. The number of max threads must be
|
|
|
|
|
* more than 2 times the number of instance count.
|
|
|
|
|
*/
|
|
|
|
|
assert((devinfo->max_tcs_threads / 2) > tcs_prog_data->instances);
|
2020-01-21 17:54:01 +02:00
|
|
|
#endif
|
|
|
|
|
|
2023-02-09 16:34:54 +02:00
|
|
|
hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
|
|
|
|
|
hs.IncludeVertexHandles = true;
|
|
|
|
|
hs.InstanceCount = tcs_prog_data->instances - 1;
|
2016-11-21 00:45:57 -08:00
|
|
|
|
2023-02-09 16:34:54 +02:00
|
|
|
hs.VertexURBEntryReadLength = 0;
|
|
|
|
|
hs.VertexURBEntryReadOffset = 0;
|
|
|
|
|
hs.DispatchGRFStartRegisterForURBData =
|
|
|
|
|
tcs_prog_data->base.base.dispatch_grf_start_reg & 0x1f;
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER >= 12
|
2023-02-09 16:34:54 +02:00
|
|
|
hs.DispatchGRFStartRegisterForURBData5 =
|
|
|
|
|
tcs_prog_data->base.base.dispatch_grf_start_reg >> 5;
|
2020-04-30 23:12:07 +00:00
|
|
|
#endif
|
|
|
|
|
|
2020-10-20 16:11:45 -05:00
|
|
|
#if GFX_VERx10 >= 125
|
2023-02-09 16:34:54 +02:00
|
|
|
hs.ScratchSpaceBuffer =
|
|
|
|
|
get_scratch_surf(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#else
|
2023-02-09 16:34:54 +02:00
|
|
|
hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
|
|
|
|
|
hs.ScratchSpaceBasePointer =
|
|
|
|
|
get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#endif
|
intel/compiler: Implement TCS 8_PATCH mode and INTEL_DEBUG=tcs8
Our tessellation control shaders can be dispatched in several modes.
- SINGLE_PATCH (Gen7+) processes a single patch per thread, with each
channel corresponding to a different patch vertex. PATCHLIST_N will
launch (N / 8) threads. If N is less than 8, some channels will be
disabled, leaving some untapped hardware capabilities. Conditionals
based on gl_InvocationID are non-uniform, which means that they'll
often have to execute both paths. However, if there are fewer than
8 vertices, all invocations will happen within a single thread, so
barriers can become no-ops, which is nice. We also burn a maximum
of 4 registers for ICP handles, so we can compile without regard for
the value of N. It also works in all cases.
- DUAL_PATCH mode processes up to two patches at a time, where the first
four channels come from patch 1, and the second group of four come
from patch 2. This tries to provide better EU utilization for small
patches (N <= 4). It cannot be used in all cases.
- 8_PATCH mode processes 8 patches at a time, with a thread launched per
vertex in the patch. Each channel corresponds to the same vertex, but
in each of the 8 patches. This utilizes all channels even for small
patches. It also makes conditions on gl_InvocationID uniform, leading
to proper jumps. Barriers, unfortunately, become real. Worse, for
PATCHLIST_N, the thread payload burns N registers for ICP handles.
This can burn up to 32 registers, or 1/4 of our register file, for
URB handles. For Vulkan (and DX), we know the number of vertices at
compile time, so we can limit the amount of waste. In GL, the patch
dimension is dynamic state, so we either would have to waste all 32
(not reasonable) or guess (badly) and recompile. This is unfortunate.
Because we can only spawn 16 thread instances, we can only use this
mode for PATCHLIST_16 and smaller. The rest must use SINGLE_PATCH.
This patch implements the new 8_PATCH TCS mode, but leaves us using
SINGLE_PATCH by default. A new INTEL_DEBUG=tcs8 flag will switch to
using 8_PATCH mode for testing and benchmarking purposes. We may
want to consider using 8_PATCH mode in Vulkan in some cases.
The data I've seen shows that 8_PATCH mode can be more efficient in
some cases, but SINGLE_PATCH mode (the one we use today) is faster
in other cases. Ultimately, the TES matters much more than the TCS
for performance, so the decision may not matter much.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2019-05-03 14:57:54 -07:00
|
|
|
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER == 12
|
2023-02-09 16:34:54 +02:00
|
|
|
/* Patch Count threshold specifies the maximum number of patches that
|
|
|
|
|
* will be accumulated before a thread dispatch is forced.
|
|
|
|
|
*/
|
|
|
|
|
hs.PatchCountThreshold = tcs_prog_data->patch_count_threshold;
|
2020-01-23 22:27:53 -08:00
|
|
|
#endif
|
|
|
|
|
|
2023-02-09 16:34:54 +02:00
|
|
|
hs.DispatchMode = tcs_prog_data->base.dispatch_mode;
|
|
|
|
|
hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id;
|
|
|
|
|
|
2023-03-02 14:39:31 +02:00
|
|
|
GENX(3DSTATE_HS_pack)(&pipeline->base.batch, pipeline->gfx8.hs, &hs);
|
2016-11-21 00:45:57 -08:00
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds) {
|
2017-03-31 15:30:49 -07:00
|
|
|
ds.Enable = true;
|
2016-11-21 00:45:57 -08:00
|
|
|
ds.StatisticsEnable = true;
|
|
|
|
|
ds.KernelStartPointer = tes_bin->kernel.offset;
|
2021-03-29 17:24:46 -07:00
|
|
|
/* Wa_1606682166 */
|
2021-03-16 10:14:30 -07:00
|
|
|
ds.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(tes_bin);
|
2021-03-11 19:03:17 -08:00
|
|
|
ds.BindingTableEntryCount = tes_bin->bind_map.surface_count;
|
2016-11-21 00:45:57 -08:00
|
|
|
ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
|
|
|
|
|
|
|
|
|
|
ds.ComputeWCoordinateEnable =
|
|
|
|
|
tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
|
|
|
|
|
|
|
|
|
|
ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length;
|
|
|
|
|
ds.PatchURBEntryReadOffset = 0;
|
|
|
|
|
ds.DispatchGRFStartRegisterForURBData =
|
|
|
|
|
tes_prog_data->base.base.dispatch_grf_start_reg;
|
|
|
|
|
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER < 11
|
2016-11-21 00:45:57 -08:00
|
|
|
ds.DispatchMode =
|
|
|
|
|
tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ?
|
|
|
|
|
DISPATCH_MODE_SIMD8_SINGLE_PATCH :
|
|
|
|
|
DISPATCH_MODE_SIMD4X2;
|
2017-05-26 15:40:55 -07:00
|
|
|
#else
|
|
|
|
|
assert(tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8);
|
|
|
|
|
ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
|
|
|
|
|
#endif
|
2016-11-21 00:45:57 -08:00
|
|
|
|
|
|
|
|
ds.UserClipDistanceClipTestEnableBitmask =
|
|
|
|
|
tes_prog_data->base.clip_distance_mask;
|
|
|
|
|
ds.UserClipDistanceCullTestEnableBitmask =
|
|
|
|
|
tes_prog_data->base.cull_distance_mask;
|
|
|
|
|
|
2021-10-21 12:48:47 -07:00
|
|
|
#if GFX_VER >= 12
|
|
|
|
|
ds.PrimitiveIDNotRequired = !tes_prog_data->include_primitive_id;
|
|
|
|
|
#endif
|
2020-10-20 16:11:45 -05:00
|
|
|
#if GFX_VERx10 >= 125
|
2021-10-27 13:06:07 +03:00
|
|
|
ds.ScratchSpaceBuffer =
|
|
|
|
|
get_scratch_surf(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#else
|
2016-11-21 00:45:57 -08:00
|
|
|
ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
|
|
|
|
|
ds.ScratchSpaceBasePointer =
|
2020-03-03 15:31:50 -08:00
|
|
|
get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#endif
|
2016-11-21 00:45:57 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_3dstate_gs(struct anv_graphics_pipeline *pipeline)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
2022-08-04 12:56:17 -07:00
|
|
|
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
2016-11-12 11:42:09 -08:00
|
|
|
const struct anv_shader_bin *gs_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_GEOMETRY];
|
|
|
|
|
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs);
|
2016-11-12 11:42:09 -08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
|
|
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs) {
|
2017-03-31 15:30:49 -07:00
|
|
|
gs.Enable = true;
|
2016-11-12 11:42:09 -08:00
|
|
|
gs.StatisticsEnable = true;
|
|
|
|
|
gs.KernelStartPointer = gs_bin->kernel.offset;
|
|
|
|
|
gs.DispatchMode = gs_prog_data->base.dispatch_mode;
|
|
|
|
|
|
|
|
|
|
gs.SingleProgramFlow = false;
|
|
|
|
|
gs.VectorMaskEnable = false;
|
2021-03-29 17:24:46 -07:00
|
|
|
/* Wa_1606682166 */
|
2021-03-16 10:14:30 -07:00
|
|
|
gs.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(gs_bin);
|
2021-03-11 19:03:17 -08:00
|
|
|
gs.BindingTableEntryCount = gs_bin->bind_map.surface_count;
|
2016-11-12 11:42:09 -08:00
|
|
|
gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
|
|
|
|
|
gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
|
|
|
|
|
|
2022-08-30 14:50:51 -07:00
|
|
|
gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
|
|
|
|
|
gs.OutputTopology = gs_prog_data->output_topology;
|
|
|
|
|
gs.ControlDataFormat = gs_prog_data->control_data_format;
|
|
|
|
|
gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
|
|
|
|
|
gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
|
|
|
|
|
gs.ReorderMode = TRAILING;
|
|
|
|
|
|
|
|
|
|
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
|
|
|
|
|
gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
|
|
|
|
|
gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
|
|
|
|
|
gs_prog_data->static_vertex_count : 0;
|
|
|
|
|
|
|
|
|
|
gs.VertexURBEntryReadOffset = 0;
|
|
|
|
|
gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
|
|
|
|
|
gs.DispatchGRFStartRegisterForURBData =
|
|
|
|
|
gs_prog_data->base.base.dispatch_grf_start_reg;
|
|
|
|
|
|
2016-10-03 23:44:07 -07:00
|
|
|
gs.UserClipDistanceClipTestEnableBitmask =
|
|
|
|
|
gs_prog_data->base.clip_distance_mask;
|
|
|
|
|
gs.UserClipDistanceCullTestEnableBitmask =
|
|
|
|
|
gs_prog_data->base.cull_distance_mask;
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2020-10-20 16:11:45 -05:00
|
|
|
#if GFX_VERx10 >= 125
|
2021-10-27 13:06:07 +03:00
|
|
|
gs.ScratchSpaceBuffer =
|
|
|
|
|
get_scratch_surf(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#else
|
2016-11-12 11:42:09 -08:00
|
|
|
gs.PerThreadScratchSpace = get_scratch_space(gs_bin);
|
|
|
|
|
gs.ScratchSpaceBasePointer =
|
2020-03-03 15:31:50 -08:00
|
|
|
get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2022-02-11 11:40:34 -06:00
|
|
|
emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_input_assembly_state *ia,
|
|
|
|
|
const struct vk_rasterization_state *rs,
|
|
|
|
|
const struct vk_multisample_state *ms,
|
|
|
|
|
const struct vk_color_blend_state *cb,
|
|
|
|
|
const struct vk_render_pass_state *rp)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
2021-03-31 18:50:00 +03:00
|
|
|
struct GENX(3DSTATE_WM) wm = {
|
|
|
|
|
GENX(3DSTATE_WM_header),
|
|
|
|
|
};
|
|
|
|
|
wm.StatisticsEnable = true;
|
|
|
|
|
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
|
|
|
|
|
wm.LineAntialiasingRegionWidth = _10pixels;
|
2022-10-04 20:02:25 -07:00
|
|
|
wm.PointRasterizationRule = RASTRULE_UPPER_LEFT;
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2021-03-31 18:50:00 +03:00
|
|
|
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
|
|
|
|
if (wm_prog_data->early_fragment_tests) {
|
2016-11-12 11:42:09 -08:00
|
|
|
wm.EarlyDepthStencilControl = EDSC_PREPS;
|
2021-03-31 18:50:00 +03:00
|
|
|
} else if (wm_prog_data->has_side_effects) {
|
|
|
|
|
wm.EarlyDepthStencilControl = EDSC_PSEXEC;
|
|
|
|
|
} else {
|
|
|
|
|
wm.EarlyDepthStencilControl = EDSC_NORMAL;
|
|
|
|
|
}
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2021-03-31 18:50:00 +03:00
|
|
|
/* Gen8 hardware tries to compute ThreadDispatchEnable for us but
|
|
|
|
|
* doesn't take into account KillPixels when no depth or stencil
|
|
|
|
|
* writes are enabled. In order for occlusion queries to work
|
|
|
|
|
* correctly with no attachments, we need to force-enable PS thread
|
|
|
|
|
* dispatch.
|
|
|
|
|
*
|
|
|
|
|
* The BDW docs are pretty clear that that this bit isn't validated
|
|
|
|
|
* and probably shouldn't be used in production:
|
|
|
|
|
*
|
|
|
|
|
* "This must always be set to Normal. This field should not be
|
|
|
|
|
* tested for functional validation."
|
|
|
|
|
*
|
|
|
|
|
* Unfortunately, however, the other mechanism we have for doing this
|
|
|
|
|
* is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
|
|
|
|
|
* Given two bad options, we choose the one which works.
|
|
|
|
|
*/
|
|
|
|
|
pipeline->force_fragment_thread_dispatch =
|
|
|
|
|
wm_prog_data->has_side_effects ||
|
|
|
|
|
wm_prog_data->uses_kill;
|
2018-07-11 16:31:02 -07:00
|
|
|
|
2021-03-31 18:50:00 +03:00
|
|
|
wm.BarycentricInterpolationMode =
|
2021-11-19 16:34:19 -06:00
|
|
|
wm_prog_data_barycentric_modes(wm_prog_data, 0);
|
2021-03-31 18:50:00 +03:00
|
|
|
}
|
|
|
|
|
|
2022-08-03 12:38:39 +03:00
|
|
|
GENX(3DSTATE_WM_pack)(NULL, pipeline->gfx8.wm, &wm);
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_multisample_state *ms,
|
|
|
|
|
const struct vk_color_blend_state *cb)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
2021-04-12 20:17:16 -07:00
|
|
|
UNUSED const struct intel_device_info *devinfo =
|
2022-08-04 12:56:17 -07:00
|
|
|
pipeline->base.device->info;
|
2016-11-12 11:42:09 -08:00
|
|
|
const struct anv_shader_bin *fs_bin =
|
|
|
|
|
pipeline->shaders[MESA_SHADER_FRAGMENT];
|
|
|
|
|
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
|
2022-12-08 16:31:20 -08:00
|
|
|
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
|
2021-11-19 16:32:24 -06:00
|
|
|
ms != NULL ? ms->rasterization_samples : 1,
|
|
|
|
|
0 /* msaa_flags */);
|
2018-05-18 16:39:21 -07:00
|
|
|
|
2018-05-17 23:17:17 -07:00
|
|
|
ps.KernelStartPointer0 = fs_bin->kernel.offset +
|
|
|
|
|
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
|
|
|
|
|
ps.KernelStartPointer1 = fs_bin->kernel.offset +
|
|
|
|
|
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
|
|
|
|
|
ps.KernelStartPointer2 = fs_bin->kernel.offset +
|
|
|
|
|
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
|
|
|
|
|
|
2016-11-12 11:42:09 -08:00
|
|
|
ps.SingleProgramFlow = false;
|
2022-08-03 12:38:39 +03:00
|
|
|
ps.VectorMaskEnable = wm_prog_data->uses_vmask;
|
2021-03-29 17:24:46 -07:00
|
|
|
/* Wa_1606682166 */
|
2021-03-16 10:14:30 -07:00
|
|
|
ps.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(fs_bin);
|
2021-03-11 19:03:17 -08:00
|
|
|
ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
|
2017-12-01 14:28:46 -08:00
|
|
|
ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
|
|
|
|
|
wm_prog_data->base.ubo_ranges[0].length;
|
2016-11-12 11:42:09 -08:00
|
|
|
ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
|
|
|
|
POSOFFSET_SAMPLE: POSOFFSET_NONE;
|
|
|
|
|
|
2022-08-30 14:50:51 -07:00
|
|
|
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
|
2016-11-12 11:42:09 -08:00
|
|
|
|
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
2018-05-17 23:17:17 -07:00
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
|
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
|
2016-11-12 11:42:09 -08:00
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
2018-05-17 23:17:17 -07:00
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
|
2016-11-12 11:42:09 -08:00
|
|
|
|
2020-10-20 16:11:45 -05:00
|
|
|
#if GFX_VERx10 >= 125
|
2021-10-27 13:06:07 +03:00
|
|
|
ps.ScratchSpaceBuffer =
|
|
|
|
|
get_scratch_surf(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#else
|
2016-11-12 11:42:09 -08:00
|
|
|
ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
|
|
|
|
|
ps.ScratchSpaceBasePointer =
|
2020-03-03 15:31:50 -08:00
|
|
|
get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin);
|
2020-10-20 16:11:45 -05:00
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_rasterization_state *rs,
|
|
|
|
|
const struct vk_render_pass_state *rp)
|
2016-11-12 11:42:09 -08:00
|
|
|
{
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps);
|
2016-11-12 11:42:09 -08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps) {
|
2016-11-12 11:42:09 -08:00
|
|
|
ps.PixelShaderValid = true;
|
|
|
|
|
ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
|
|
|
|
|
ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
|
2021-11-19 16:32:24 -06:00
|
|
|
ps.PixelShaderIsPerSample =
|
|
|
|
|
brw_wm_prog_data_is_persample(wm_prog_data, 0);
|
2016-11-12 11:42:09 -08:00
|
|
|
ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
|
|
|
|
|
ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
|
|
|
|
|
ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
|
|
|
|
|
|
2016-11-16 10:39:15 -08:00
|
|
|
/* If the subpass has a depth or stencil self-dependency, then we need
|
|
|
|
|
* to force the hardware to do the depth/stencil write *after* fragment
|
|
|
|
|
* shader execution. Otherwise, the writes may hit memory before we get
|
|
|
|
|
* around to fetching from the input attachment and we may get the depth
|
|
|
|
|
* or stencil value from the current draw rather than the previous one.
|
|
|
|
|
*/
|
2022-07-12 16:16:55 -05:00
|
|
|
ps.PixelShaderKillsPixel = rp->depth_self_dependency ||
|
|
|
|
|
rp->stencil_self_dependency ||
|
2016-11-16 10:39:15 -08:00
|
|
|
wm_prog_data->uses_kill;
|
|
|
|
|
|
2018-03-19 23:06:45 -07:00
|
|
|
ps.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
|
2016-11-12 11:42:09 -08:00
|
|
|
ps.PixelShaderPullsBary = wm_prog_data->pulls_bary;
|
2018-07-20 15:50:02 -06:00
|
|
|
|
2021-04-02 21:40:19 -05:00
|
|
|
ps.InputCoverageMaskState = ICMS_NONE;
|
|
|
|
|
assert(!wm_prog_data->inner_coverage); /* Not available in SPIR-V */
|
2021-03-31 17:45:42 -05:00
|
|
|
if (!wm_prog_data->uses_sample_mask)
|
2021-04-02 21:40:19 -05:00
|
|
|
ps.InputCoverageMaskState = ICMS_NONE;
|
2021-11-19 16:32:24 -06:00
|
|
|
else if (brw_wm_prog_data_is_coarse(wm_prog_data, 0))
|
2020-10-19 10:12:43 +03:00
|
|
|
ps.InputCoverageMaskState = ICMS_NORMAL;
|
2021-03-31 17:45:42 -05:00
|
|
|
else if (wm_prog_data->post_depth_coverage)
|
2021-04-02 21:40:19 -05:00
|
|
|
ps.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
|
2021-03-31 17:45:42 -05:00
|
|
|
else
|
2021-04-02 21:40:19 -05:00
|
|
|
ps.InputCoverageMaskState = ICMS_NORMAL;
|
2020-10-19 10:12:43 +03:00
|
|
|
|
|
|
|
|
#if GFX_VER >= 11
|
|
|
|
|
ps.PixelShaderRequiresSourceDepthandorWPlaneCoefficients =
|
|
|
|
|
wm_prog_data->uses_depth_w_coefficients;
|
2021-11-19 16:34:19 -06:00
|
|
|
ps.PixelShaderIsPerCoarsePixel =
|
|
|
|
|
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
|
2021-02-05 21:16:38 +02:00
|
|
|
#endif
|
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
/* TODO: We should only require this when the last geometry shader uses
|
|
|
|
|
* a fragment shading rate that is not constant.
|
|
|
|
|
*/
|
2021-11-19 16:32:24 -06:00
|
|
|
ps.EnablePSDependencyOnCPsizeChange =
|
|
|
|
|
brw_wm_prog_data_is_coarse(wm_prog_data, 0);
|
2020-10-19 10:12:43 +03:00
|
|
|
#endif
|
2016-11-12 11:42:09 -08:00
|
|
|
}
|
|
|
|
|
}
|
2016-11-12 11:39:07 -08:00
|
|
|
|
2017-03-16 14:12:03 -07:00
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
emit_3dstate_vf_statistics(struct anv_graphics_pipeline *pipeline)
|
2017-03-16 14:12:03 -07:00
|
|
|
{
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
|
2017-03-16 14:12:03 -07:00
|
|
|
vfs.StatisticsEnable = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-12-06 17:52:14 -08:00
|
|
|
static void
|
2020-03-03 15:31:50 -08:00
|
|
|
compute_kill_pixel(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_multisample_state *ms,
|
|
|
|
|
const struct vk_render_pass_state *rp)
|
2016-12-06 17:52:14 -08:00
|
|
|
{
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
|
|
|
|
|
pipeline->kill_pixel = false;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
|
|
|
|
|
|
|
|
|
/* This computes the KillPixel portion of the computation for whether or
|
2021-03-29 15:40:04 -07:00
|
|
|
* not we want to enable the PMA fix on gfx8 or gfx9. It's given by this
|
2017-02-01 16:41:04 -08:00
|
|
|
* chunk of the giant formula:
|
2016-12-06 17:52:14 -08:00
|
|
|
*
|
|
|
|
|
* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
|
|
|
|
|
* 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
|
|
|
|
|
* 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
|
|
|
|
|
* 3DSTATE_PS_BLEND::AlphaTestEnable ||
|
|
|
|
|
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
|
|
|
|
|
*
|
|
|
|
|
* 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is
|
|
|
|
|
* 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept
|
|
|
|
|
* of an alpha test.
|
|
|
|
|
*/
|
|
|
|
|
pipeline->kill_pixel =
|
2022-07-12 16:16:55 -05:00
|
|
|
rp->depth_self_dependency ||
|
|
|
|
|
rp->stencil_self_dependency ||
|
2022-02-11 11:40:34 -06:00
|
|
|
wm_prog_data->uses_kill ||
|
2016-12-06 17:52:14 -08:00
|
|
|
wm_prog_data->uses_omask ||
|
2022-07-12 16:16:55 -05:00
|
|
|
(ms && ms->alpha_to_coverage_enable);
|
2016-12-06 17:52:14 -08:00
|
|
|
}
|
|
|
|
|
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER == 12
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 10:10:34 -07:00
|
|
|
static void
|
2022-02-11 11:40:34 -06:00
|
|
|
emit_3dstate_primitive_replication(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_render_pass_state *rp)
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 10:10:34 -07:00
|
|
|
{
|
2022-09-08 16:18:43 +02:00
|
|
|
if (anv_pipeline_is_mesh(pipeline)) {
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-18 11:34:59 -05:00
|
|
|
const int replication_count =
|
|
|
|
|
anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots;
|
|
|
|
|
|
|
|
|
|
assert(replication_count >= 1);
|
|
|
|
|
if (replication_count == 1) {
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 10:10:34 -07:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-10 13:17:37 +03:00
|
|
|
assert(replication_count == util_bitcount(rp->view_mask));
|
2022-07-18 11:34:59 -05:00
|
|
|
assert(replication_count <= MAX_VIEWS_FOR_PRIMITIVE_REPLICATION);
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 10:10:34 -07:00
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) {
|
2022-07-18 11:34:59 -05:00
|
|
|
pr.ReplicaMask = (1 << replication_count) - 1;
|
|
|
|
|
pr.ReplicationCount = replication_count - 1;
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 10:10:34 -07:00
|
|
|
|
2021-02-22 10:14:03 -05:00
|
|
|
int i = 0;
|
2022-07-12 16:16:55 -05:00
|
|
|
u_foreach_bit(view_index, rp->view_mask) {
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 10:10:34 -07:00
|
|
|
pr.RTAIOffset[i] = view_index;
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2021-05-20 12:07:34 -07:00
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
static void
|
|
|
|
|
emit_task_state(struct anv_graphics_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
assert(anv_pipeline_is_mesh(pipeline));
|
|
|
|
|
|
|
|
|
|
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct anv_shader_bin *task_bin = pipeline->shaders[MESA_SHADER_TASK];
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), tc) {
|
|
|
|
|
tc.TaskShaderEnable = true;
|
|
|
|
|
tc.ScratchSpaceBuffer =
|
|
|
|
|
get_scratch_surf(&pipeline->base, MESA_SHADER_TASK, task_bin);
|
2022-10-27 09:50:51 +02:00
|
|
|
tc.MaximumNumberofThreadGroups = 511;
|
2021-05-20 12:07:34 -07:00
|
|
|
}
|
|
|
|
|
|
2022-08-04 12:56:17 -07:00
|
|
|
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
2021-05-20 12:07:34 -07:00
|
|
|
const struct brw_task_prog_data *task_prog_data = get_task_prog_data(pipeline);
|
|
|
|
|
const struct brw_cs_dispatch_info task_dispatch =
|
|
|
|
|
brw_cs_get_dispatch_info(devinfo, &task_prog_data->base, NULL);
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_SHADER), task) {
|
|
|
|
|
task.KernelStartPointer = task_bin->kernel.offset;
|
|
|
|
|
task.SIMDSize = task_dispatch.simd_size / 16;
|
|
|
|
|
task.MessageSIMD = task.SIMDSize;
|
|
|
|
|
task.NumberofThreadsinGPGPUThreadGroup = task_dispatch.threads;
|
|
|
|
|
task.ExecutionMask = task_dispatch.right_mask;
|
|
|
|
|
task.LocalXMaximum = task_dispatch.group_size - 1;
|
|
|
|
|
task.EmitLocalIDX = true;
|
|
|
|
|
|
|
|
|
|
task.NumberofBarriers = task_prog_data->base.uses_barrier;
|
|
|
|
|
task.SharedLocalMemorySize =
|
|
|
|
|
encode_slm_size(GFX_VER, task_prog_data->base.base.total_shared);
|
|
|
|
|
|
2021-07-12 13:46:31 +02:00
|
|
|
/*
|
|
|
|
|
* 3DSTATE_TASK_SHADER_DATA.InlineData[0:1] will be used for an address
|
2021-12-13 14:14:04 +01:00
|
|
|
* of a buffer with push constants and descriptor set table and
|
|
|
|
|
* InlineData[2:7] will be used for first few push constants.
|
2021-07-12 13:46:31 +02:00
|
|
|
*/
|
|
|
|
|
task.EmitInlineParameter = true;
|
2021-07-16 15:06:44 +02:00
|
|
|
|
|
|
|
|
task.XP0Required = task_prog_data->uses_drawid;
|
2021-07-12 13:46:31 +02:00
|
|
|
}
|
2021-05-20 12:07:34 -07:00
|
|
|
|
|
|
|
|
/* Recommended values from "Task and Mesh Distribution Programming". */
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_REDISTRIB), redistrib) {
|
|
|
|
|
redistrib.LocalBOTAccumulatorThreshold = MULTIPLIER_1;
|
2022-04-21 17:26:52 +02:00
|
|
|
redistrib.SmallTaskThreshold = 1; /* 2^N */
|
|
|
|
|
redistrib.TargetMeshBatchSize = devinfo->num_slices > 2 ? 3 : 5; /* 2^N */
|
2021-05-20 12:07:34 -07:00
|
|
|
redistrib.TaskRedistributionLevel = TASKREDISTRIB_BOM;
|
2023-01-30 17:18:04 +01:00
|
|
|
redistrib.TaskRedistributionMode = TASKREDISTRIB_RR_STRICT;
|
2021-05-20 12:07:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_mesh_state(struct anv_graphics_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
assert(anv_pipeline_is_mesh(pipeline));
|
|
|
|
|
|
|
|
|
|
const struct anv_shader_bin *mesh_bin = pipeline->shaders[MESA_SHADER_MESH];
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), mc) {
|
|
|
|
|
mc.MeshShaderEnable = true;
|
|
|
|
|
mc.ScratchSpaceBuffer =
|
|
|
|
|
get_scratch_surf(&pipeline->base, MESA_SHADER_MESH, mesh_bin);
|
2022-10-27 09:50:51 +02:00
|
|
|
mc.MaximumNumberofThreadGroups = 511;
|
2021-05-20 12:07:34 -07:00
|
|
|
}
|
|
|
|
|
|
2022-08-04 12:56:17 -07:00
|
|
|
const struct intel_device_info *devinfo = pipeline->base.device->info;
|
2021-05-20 12:07:34 -07:00
|
|
|
const struct brw_mesh_prog_data *mesh_prog_data = get_mesh_prog_data(pipeline);
|
|
|
|
|
const struct brw_cs_dispatch_info mesh_dispatch =
|
|
|
|
|
brw_cs_get_dispatch_info(devinfo, &mesh_prog_data->base, NULL);
|
|
|
|
|
|
|
|
|
|
const unsigned output_topology =
|
|
|
|
|
mesh_prog_data->primitive_type == SHADER_PRIM_POINTS ? OUTPUT_POINT :
|
|
|
|
|
mesh_prog_data->primitive_type == SHADER_PRIM_LINES ? OUTPUT_LINE :
|
|
|
|
|
OUTPUT_TRI;
|
|
|
|
|
|
|
|
|
|
uint32_t index_format;
|
|
|
|
|
switch (mesh_prog_data->index_format) {
|
|
|
|
|
case BRW_INDEX_FORMAT_U32:
|
|
|
|
|
index_format = INDEX_U32;
|
|
|
|
|
break;
|
2023-01-25 15:06:23 +01:00
|
|
|
case BRW_INDEX_FORMAT_U888X:
|
|
|
|
|
index_format = INDEX_U888X;
|
|
|
|
|
break;
|
2021-05-20 12:07:34 -07:00
|
|
|
default:
|
|
|
|
|
unreachable("invalid index format");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_SHADER), mesh) {
|
|
|
|
|
mesh.KernelStartPointer = mesh_bin->kernel.offset;
|
|
|
|
|
mesh.SIMDSize = mesh_dispatch.simd_size / 16;
|
|
|
|
|
mesh.MessageSIMD = mesh.SIMDSize;
|
|
|
|
|
mesh.NumberofThreadsinGPGPUThreadGroup = mesh_dispatch.threads;
|
|
|
|
|
mesh.ExecutionMask = mesh_dispatch.right_mask;
|
|
|
|
|
mesh.LocalXMaximum = mesh_dispatch.group_size - 1;
|
|
|
|
|
mesh.EmitLocalIDX = true;
|
|
|
|
|
|
2022-12-12 14:28:05 +01:00
|
|
|
mesh.MaximumPrimitiveCount = MAX2(mesh_prog_data->map.max_primitives, 1) - 1;
|
2021-05-20 12:07:34 -07:00
|
|
|
mesh.OutputTopology = output_topology;
|
|
|
|
|
mesh.PerVertexDataPitch = mesh_prog_data->map.per_vertex_pitch_dw / 8;
|
|
|
|
|
mesh.PerPrimitiveDataPresent = mesh_prog_data->map.per_primitive_pitch_dw > 0;
|
|
|
|
|
mesh.PerPrimitiveDataPitch = mesh_prog_data->map.per_primitive_pitch_dw / 8;
|
|
|
|
|
mesh.IndexFormat = index_format;
|
|
|
|
|
|
|
|
|
|
mesh.NumberofBarriers = mesh_prog_data->base.uses_barrier;
|
|
|
|
|
mesh.SharedLocalMemorySize =
|
|
|
|
|
encode_slm_size(GFX_VER, mesh_prog_data->base.base.total_shared);
|
|
|
|
|
|
2021-07-12 13:46:31 +02:00
|
|
|
/*
|
|
|
|
|
* 3DSTATE_MESH_SHADER_DATA.InlineData[0:1] will be used for an address
|
2021-12-13 14:14:04 +01:00
|
|
|
* of a buffer with push constants and descriptor set table and
|
|
|
|
|
* InlineData[2:7] will be used for first few push constants.
|
2021-07-12 13:46:31 +02:00
|
|
|
*/
|
|
|
|
|
mesh.EmitInlineParameter = true;
|
2021-07-16 15:06:44 +02:00
|
|
|
|
|
|
|
|
mesh.XP0Required = mesh_prog_data->uses_drawid;
|
2021-07-12 13:46:31 +02:00
|
|
|
}
|
2021-05-20 12:07:34 -07:00
|
|
|
|
|
|
|
|
/* Recommended values from "Task and Mesh Distribution Programming". */
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_DISTRIB), distrib) {
|
|
|
|
|
distrib.DistributionMode = MESH_RR_FREE;
|
2022-10-28 17:03:30 +02:00
|
|
|
distrib.TaskDistributionBatchSize = devinfo->num_slices > 2 ? 4 : 9; /* 2^N thread groups */
|
|
|
|
|
distrib.MeshDistributionBatchSize = devinfo->num_slices > 2 ? 3 : 3; /* 2^N thread groups */
|
2021-05-20 12:07:34 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2022-01-22 22:26:46 +02:00
|
|
|
void
|
|
|
|
|
genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
|
2022-07-12 16:16:55 -05:00
|
|
|
const struct vk_graphics_pipeline_state *state)
|
2016-11-12 11:39:07 -08:00
|
|
|
{
|
2021-03-03 13:49:18 -08:00
|
|
|
enum intel_urb_deref_block_size urb_deref_block_size;
|
2020-01-17 14:14:03 -06:00
|
|
|
emit_urb_setup(pipeline, &urb_deref_block_size);
|
2020-01-16 17:05:10 -06:00
|
|
|
|
2022-07-12 16:16:55 -05:00
|
|
|
assert(state->rs != NULL);
|
|
|
|
|
emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp,
|
2022-01-21 14:35:04 +02:00
|
|
|
urb_deref_block_size);
|
2022-07-12 16:16:55 -05:00
|
|
|
emit_ms_state(pipeline, state->ms);
|
|
|
|
|
compute_kill_pixel(pipeline, state->ms, state->rp);
|
2016-11-12 11:39:07 -08:00
|
|
|
|
2022-07-12 16:16:55 -05:00
|
|
|
emit_3dstate_clip(pipeline, state->ia, state->vp, state->rs);
|
2016-11-12 11:39:07 -08:00
|
|
|
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER == 12
|
2022-07-12 16:16:55 -05:00
|
|
|
emit_3dstate_primitive_replication(pipeline, state->rp);
|
anv/gen12: Lower VK_KHR_multiview using Primitive Replication
Identify if view_index is used only for position calculation, and use
Primitive Replication to implement Multiview in Gen12. This feature
allows storing per-view position information in a single execution of
the shader, treating position as an array.
The shader is transformed by adding a for-loop around it, that have an
iteration per active view (in the view_mask). Stores to the position
now store into the position array for the current index in the loop,
and load_view_index() will return the view index corresponding to the
current index in the loop.
The feature is controlled by setting the environment variable
ANV_PRIMITIVE_REPLICATION_MAX_VIEWS, which defaults to 2 if unset.
For pipelines with view counts larger than that, the regular
instancing will be used instead of Primitive Replication. To disable
it completely set the variable to 0.
v2: Don't assume position is set in vertex shader; remove only stores
for position; don't apply optimizations since other passes will
do; clone shader body without extract/reinsert; don't use
last_block (potentially stale). (Jason)
Fix view_index immediate to contain the view index, not its order.
Check for maximum number of views supported.
Add guard for gen12.
v3: Clone the entire shader function and change it before reinsert;
disable optimization when shader has memory writes. (Jason)
Use a single environment variable with _DEBUG on the name.
v4: Change to use new nir_deref_instr.
When removing stores, look for mode nir_var_shader_out instead
of the walking the list of outputs.
Ensure unused derefs are removed in the non-position part of the
shader.
Remove dead control flow when identifying if can use or not
primitive replication.
v5: Consider all the active shaders (including fragment) when deciding
that Primitive Replication can be used.
Change environment variable to ANV_PRIMITIVE_REPLICATION.
Squash the emission of 3DSTATE_PRIMITIVE_REPLICATION into this patch.
Disable Prim Rep in blorp_exec_3d.
v6: Use a loop around the shader, instead of manually unrolling, since
the regular unroll pass will kick in.
Document that we don't expect to see copy_deref or load_deref
involving the position variable.
Recover use_primitive_replication value when loading pipeline from
the cache.
Set VARYING_SLOT_LAYER to 0 in the shader. Earlier versions were
relying on ForceZeroRTAIndexEnable but that might not be
sufficient.
Disable Prim Rep in cmd_buffer_so_memcpy.
v7: Don't use Primitive Replication if position is not set, fallback
to instancing; change environment variable to be
ANV_PRIMITVE_REPLICATION_MAX_VIEWS and default it to 2 based on
experiments.
Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
2018-03-27 10:10:34 -07:00
|
|
|
#endif
|
|
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
if (anv_pipeline_is_primitive(pipeline)) {
|
2023-01-31 22:15:11 +01:00
|
|
|
emit_vertex_input(pipeline, state, state->vi);
|
2021-07-08 14:44:17 -07:00
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
emit_3dstate_vs(pipeline);
|
2022-09-27 19:20:25 +03:00
|
|
|
emit_3dstate_hs_ds(pipeline, state->ts);
|
2021-07-08 14:47:08 -07:00
|
|
|
emit_3dstate_gs(pipeline);
|
2021-07-08 14:44:17 -07:00
|
|
|
|
2021-07-08 14:47:08 -07:00
|
|
|
emit_3dstate_vf_statistics(pipeline);
|
2021-07-08 14:44:17 -07:00
|
|
|
|
2022-07-12 16:16:55 -05:00
|
|
|
emit_3dstate_streamout(pipeline, state->rs);
|
2022-01-22 22:26:46 +02:00
|
|
|
|
2021-05-20 12:07:34 -07:00
|
|
|
#if GFX_VERx10 >= 125
|
2022-01-22 22:26:46 +02:00
|
|
|
const struct anv_device *device = pipeline->base.device;
|
2021-05-20 12:07:34 -07:00
|
|
|
/* Disable Mesh. */
|
2022-05-08 02:19:35 +02:00
|
|
|
if (device->physical->vk.supported_extensions.NV_mesh_shader ||
|
|
|
|
|
device->physical->vk.supported_extensions.EXT_mesh_shader) {
|
2021-05-20 12:07:34 -07:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_MESH_CONTROL), zero);
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TASK_CONTROL), zero);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
} else {
|
|
|
|
|
assert(anv_pipeline_is_mesh(pipeline));
|
2022-04-27 16:05:44 +02:00
|
|
|
|
|
|
|
|
/* BSpec 46303 forbids both 3DSTATE_MESH_CONTROL.MeshShaderEnable
|
|
|
|
|
* and 3DSTATE_STREAMOUT.SOFunctionEnable to be 1.
|
|
|
|
|
*/
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so) {}
|
|
|
|
|
|
2021-05-20 12:07:34 -07:00
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
emit_task_state(pipeline);
|
|
|
|
|
emit_mesh_state(pipeline);
|
|
|
|
|
#endif
|
2021-07-08 14:47:08 -07:00
|
|
|
}
|
2021-07-08 14:44:17 -07:00
|
|
|
|
2016-11-12 11:39:07 -08:00
|
|
|
emit_3dstate_sbe(pipeline);
|
2022-07-12 16:16:55 -05:00
|
|
|
emit_3dstate_wm(pipeline, state->ia, state->rs,
|
|
|
|
|
state->ms, state->cb, state->rp);
|
|
|
|
|
emit_3dstate_ps(pipeline, state->ms, state->cb);
|
|
|
|
|
emit_3dstate_ps_extra(pipeline, state->rs, state->rp);
|
2016-11-12 11:39:07 -08:00
|
|
|
}
|
|
|
|
|
|
2021-03-16 10:09:00 -07:00
|
|
|
#if GFX_VERx10 >= 125
|
2018-11-14 11:04:15 -08:00
|
|
|
|
2022-01-22 22:33:35 +02:00
|
|
|
void
|
|
|
|
|
genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
2018-11-14 11:04:15 -08:00
|
|
|
{
|
2022-01-22 22:33:35 +02:00
|
|
|
struct anv_device *device = pipeline->base.device;
|
2018-11-14 11:04:15 -08:00
|
|
|
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
|
|
|
|
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
|
|
|
|
|
|
2021-01-24 17:03:40 +02:00
|
|
|
const UNUSED struct anv_shader_bin *cs_bin = pipeline->cs;
|
2022-08-04 12:56:17 -07:00
|
|
|
const struct intel_device_info *devinfo = device->info;
|
2018-11-14 11:04:15 -08:00
|
|
|
|
|
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) {
|
|
|
|
|
cfe.MaximumNumberofThreads =
|
2022-06-24 10:19:03 +08:00
|
|
|
devinfo->max_cs_threads * devinfo->subslice_total;
|
2021-10-27 13:06:07 +03:00
|
|
|
cfe.ScratchSpaceBuffer =
|
|
|
|
|
get_scratch_surf(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin);
|
2018-11-14 11:04:15 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-16 10:09:00 -07:00
|
|
|
#else /* #if GFX_VERx10 >= 125 */
|
2018-11-14 11:04:15 -08:00
|
|
|
|
2022-01-22 22:33:35 +02:00
|
|
|
void
|
|
|
|
|
genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
|
2016-01-22 15:59:02 -08:00
|
|
|
{
|
2022-01-22 22:33:35 +02:00
|
|
|
struct anv_device *device = pipeline->base.device;
|
2022-08-04 12:56:17 -07:00
|
|
|
const struct intel_device_info *devinfo = device->info;
|
2016-03-04 08:15:16 -08:00
|
|
|
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
2016-01-30 00:25:16 -08:00
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
|
2016-08-22 16:56:48 -07:00
|
|
|
|
2021-04-28 10:56:58 -07:00
|
|
|
const struct brw_cs_dispatch_info dispatch =
|
|
|
|
|
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
|
2016-01-30 00:25:16 -08:00
|
|
|
const uint32_t vfe_curbe_allocation =
|
2021-04-28 10:56:58 -07:00
|
|
|
ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
|
2016-05-27 00:53:27 -07:00
|
|
|
cs_prog_data->push.cross_thread.regs, 2);
|
2016-01-22 15:59:02 -08:00
|
|
|
|
2020-03-03 13:43:39 -08:00
|
|
|
const struct anv_shader_bin *cs_bin = pipeline->cs;
|
2016-11-12 13:33:16 -08:00
|
|
|
|
2020-03-03 15:31:50 -08:00
|
|
|
anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
|
2016-04-18 15:29:42 -07:00
|
|
|
vfe.StackSize = 0;
|
2016-09-07 17:28:44 +01:00
|
|
|
vfe.MaximumNumberofThreads =
|
2021-09-08 16:20:24 -07:00
|
|
|
devinfo->max_cs_threads * devinfo->subslice_total - 1;
|
2022-08-03 12:38:39 +03:00
|
|
|
vfe.NumberofURBEntries = 2;
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER < 11
|
2016-04-18 15:29:42 -07:00
|
|
|
vfe.ResetGatewayTimer = true;
|
2016-01-22 15:59:02 -08:00
|
|
|
#endif
|
2022-08-03 12:38:39 +03:00
|
|
|
vfe.URBEntryAllocationSize = 2;
|
2016-04-18 15:29:42 -07:00
|
|
|
vfe.CURBEAllocationSize = vfe_curbe_allocation;
|
2016-11-12 13:33:16 -08:00
|
|
|
|
2019-04-10 14:47:12 -05:00
|
|
|
if (cs_bin->prog_data->total_scratch) {
|
2022-08-03 12:38:39 +03:00
|
|
|
/* Broadwell's Per Thread Scratch Space is in the range [0, 11]
|
|
|
|
|
* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
|
|
|
|
|
*/
|
|
|
|
|
vfe.PerThreadScratchSpace =
|
|
|
|
|
ffs(cs_bin->prog_data->total_scratch) - 11;
|
2019-04-10 14:47:12 -05:00
|
|
|
vfe.ScratchSpaceBasePointer =
|
2020-03-03 15:31:50 -08:00
|
|
|
get_scratch_address(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin);
|
2019-04-10 14:47:12 -05:00
|
|
|
}
|
2016-04-18 15:29:42 -07:00
|
|
|
}
|
2016-01-22 15:59:02 -08:00
|
|
|
|
2016-11-12 12:13:17 -08:00
|
|
|
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
|
2020-05-21 00:17:27 -07:00
|
|
|
.KernelStartPointer =
|
|
|
|
|
cs_bin->kernel.offset +
|
2021-04-28 10:56:58 -07:00
|
|
|
brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size),
|
2020-05-21 00:17:27 -07:00
|
|
|
|
2021-03-29 17:24:46 -07:00
|
|
|
/* Wa_1606682166 */
|
2021-03-16 10:14:30 -07:00
|
|
|
.SamplerCount = GFX_VER == 11 ? 0 : get_sampler_count(cs_bin),
|
2020-01-01 17:38:01 -08:00
|
|
|
/* We add 1 because the CS indirect parameters buffer isn't accounted
|
2018-02-12 19:34:48 -08:00
|
|
|
* for in bind_map.surface_count.
|
2022-09-06 17:31:51 +02:00
|
|
|
*
|
|
|
|
|
* Typically set to 0 to avoid prefetching on every thread dispatch.
|
2018-02-12 19:34:48 -08:00
|
|
|
*/
|
2022-09-06 17:31:51 +02:00
|
|
|
.BindingTableEntryCount = devinfo->verx10 == 125 ?
|
|
|
|
|
0 : 1 + MIN2(pipeline->cs->bind_map.surface_count, 30),
|
2016-11-12 12:13:17 -08:00
|
|
|
.BarrierEnable = cs_prog_data->uses_barrier,
|
|
|
|
|
.SharedLocalMemorySize =
|
2021-03-16 10:14:30 -07:00
|
|
|
encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
|
2016-11-12 12:13:17 -08:00
|
|
|
|
|
|
|
|
.ConstantURBEntryReadOffset = 0,
|
|
|
|
|
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
|
|
|
|
.CrossThreadConstantDataReadLength =
|
|
|
|
|
cs_prog_data->push.cross_thread.regs,
|
2021-03-16 10:14:30 -07:00
|
|
|
#if GFX_VER >= 12
|
2020-03-03 08:07:32 -08:00
|
|
|
/* TODO: Check if we are missing workarounds and enable mid-thread
|
|
|
|
|
* preemption.
|
|
|
|
|
*
|
|
|
|
|
* We still have issues with mid-thread preemption (it was already
|
2021-03-29 15:40:04 -07:00
|
|
|
* disabled by the kernel on gfx11, due to missing workarounds). It's
|
2020-03-03 08:07:32 -08:00
|
|
|
* possible that we are just missing some workarounds, and could enable
|
|
|
|
|
* it later, but for now let's disable it to fix a GPU in compute in Car
|
|
|
|
|
* Chase (and possibly more).
|
|
|
|
|
*/
|
|
|
|
|
.ThreadPreemptionDisable = true,
|
|
|
|
|
#endif
|
2016-11-12 12:13:17 -08:00
|
|
|
|
2021-04-28 10:56:58 -07:00
|
|
|
.NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
|
2016-11-12 12:13:17 -08:00
|
|
|
};
|
|
|
|
|
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
|
|
|
|
|
pipeline->interface_descriptor_data,
|
|
|
|
|
&desc);
|
2018-11-14 10:49:05 -08:00
|
|
|
}
|
|
|
|
|
|
2021-03-16 10:09:00 -07:00
|
|
|
#endif /* #if GFX_VERx10 >= 125 */
|
2018-11-14 11:04:15 -08:00
|
|
|
|
2020-08-06 18:56:54 -05:00
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
|
|
2022-01-28 13:22:26 +02:00
|
|
|
void
|
|
|
|
|
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline)
|
2020-08-06 18:56:54 -05:00
|
|
|
{
|
2021-01-21 15:37:21 -06:00
|
|
|
for (uint32_t i = 0; i < pipeline->group_count; i++) {
|
|
|
|
|
struct anv_rt_shader_group *group = &pipeline->groups[i];
|
|
|
|
|
|
|
|
|
|
switch (group->type) {
|
|
|
|
|
case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: {
|
2022-11-25 12:29:09 +02:00
|
|
|
struct GENX(RT_GENERAL_SBT_HANDLE) sh = {};
|
2021-01-21 15:37:21 -06:00
|
|
|
sh.General = anv_shader_bin_get_bsr(group->general, 32);
|
2022-11-25 12:29:09 +02:00
|
|
|
GENX(RT_GENERAL_SBT_HANDLE_pack)(NULL, group->handle, &sh);
|
2021-01-21 15:37:21 -06:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: {
|
2022-11-25 12:29:09 +02:00
|
|
|
struct GENX(RT_TRIANGLES_SBT_HANDLE) sh = {};
|
2021-01-21 15:37:21 -06:00
|
|
|
if (group->closest_hit)
|
|
|
|
|
sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32);
|
|
|
|
|
if (group->any_hit)
|
|
|
|
|
sh.AnyHit = anv_shader_bin_get_bsr(group->any_hit, 24);
|
2022-11-25 12:29:09 +02:00
|
|
|
GENX(RT_TRIANGLES_SBT_HANDLE_pack)(NULL, group->handle, &sh);
|
2021-01-21 15:37:21 -06:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
|
2022-11-25 12:29:09 +02:00
|
|
|
struct GENX(RT_PROCEDURAL_SBT_HANDLE) sh = {};
|
2021-01-21 15:37:21 -06:00
|
|
|
if (group->closest_hit)
|
|
|
|
|
sh.ClosestHit = anv_shader_bin_get_bsr(group->closest_hit, 32);
|
|
|
|
|
sh.Intersection = anv_shader_bin_get_bsr(group->intersection, 24);
|
2022-11-25 12:29:09 +02:00
|
|
|
GENX(RT_PROCEDURAL_SBT_HANDLE_pack)(NULL, group->handle, &sh);
|
2021-01-21 15:37:21 -06:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
unreachable("Invalid shader group type");
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-08-06 18:56:54 -05:00
|
|
|
}
|
|
|
|
|
|
2022-01-28 13:22:26 +02:00
|
|
|
#else
|
2020-08-06 18:56:54 -05:00
|
|
|
|
2022-01-28 13:22:26 +02:00
|
|
|
void
|
|
|
|
|
genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline)
|
|
|
|
|
{
|
|
|
|
|
unreachable("Ray tracing not supported");
|
2020-08-05 16:53:12 -05:00
|
|
|
}
|
2022-01-28 13:22:26 +02:00
|
|
|
|
2020-08-06 18:56:54 -05:00
|
|
|
#endif /* GFX_VERx10 >= 125 */
|