2024-02-01 13:17:42 -08:00
|
|
|
/*
|
|
|
|
|
* Copyright 2024 Intel Corporation
|
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
|
*/
|
|
|
|
|
|
2024-12-06 14:25:29 -08:00
|
|
|
#pragma once
|
2024-02-01 13:17:42 -08:00
|
|
|
|
2024-12-03 14:17:12 +02:00
|
|
|
#ifndef __OPENCL_VERSION__
|
2024-02-01 15:39:52 -08:00
|
|
|
#include <stdint.h>
|
2024-12-03 14:17:12 +02:00
|
|
|
#include "util/bitscan.h"
|
|
|
|
|
#endif
|
2024-02-01 15:39:52 -08:00
|
|
|
|
|
|
|
|
#include "compiler/shader_enums.h"
|
2024-02-01 13:17:42 -08:00
|
|
|
#include "util/enum_operators.h"
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
2024-11-18 10:58:46 +02:00
|
|
|
/** A tri-state value to track states that are potentially dynamic */
|
|
|
|
|
enum intel_sometimes {
|
|
|
|
|
INTEL_NEVER = 0,
|
|
|
|
|
INTEL_SOMETIMES,
|
|
|
|
|
INTEL_ALWAYS
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static inline enum intel_sometimes
|
|
|
|
|
intel_sometimes_invert(enum intel_sometimes x)
|
|
|
|
|
{
|
|
|
|
|
return (enum intel_sometimes)((int)INTEL_ALWAYS - (int)x);
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-10 23:18:30 +02:00
|
|
|
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET (20)
|
|
|
|
|
#define INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_MESH (32)
|
|
|
|
|
|
2024-02-01 13:17:42 -08:00
|
|
|
enum intel_msaa_flags {
|
|
|
|
|
/** Must be set whenever any dynamic MSAA is used
|
|
|
|
|
*
|
|
|
|
|
* This flag mostly exists to let us assert that the driver understands
|
|
|
|
|
* dynamic MSAA so we don't run into trouble with drivers that don't.
|
|
|
|
|
*/
|
|
|
|
|
INTEL_MSAA_FLAG_ENABLE_DYNAMIC = (1 << 0),
|
|
|
|
|
|
|
|
|
|
/** True if the framebuffer is multisampled */
|
|
|
|
|
INTEL_MSAA_FLAG_MULTISAMPLE_FBO = (1 << 1),
|
|
|
|
|
|
|
|
|
|
/** True if this shader has been dispatched per-sample */
|
|
|
|
|
INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
|
|
|
|
|
|
|
|
|
|
/** True if inputs should be interpolated per-sample by default */
|
|
|
|
|
INTEL_MSAA_FLAG_PERSAMPLE_INTERP = (1 << 3),
|
|
|
|
|
|
|
|
|
|
/** True if this shader has been dispatched with alpha-to-coverage */
|
|
|
|
|
INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE = (1 << 4),
|
|
|
|
|
|
|
|
|
|
/** True if this shader has been dispatched coarse
|
|
|
|
|
*
|
|
|
|
|
* This is intentionally chose to be bit 15 to correspond to the coarse bit
|
|
|
|
|
* in the pixel interpolator messages.
|
|
|
|
|
*/
|
|
|
|
|
INTEL_MSAA_FLAG_COARSE_PI_MSG = (1 << 15),
|
|
|
|
|
|
|
|
|
|
/** True if this shader has been dispatched coarse
|
|
|
|
|
*
|
|
|
|
|
* This is intentionally chose to be bit 18 to correspond to the coarse bit
|
|
|
|
|
* in the render target messages.
|
|
|
|
|
*/
|
|
|
|
|
INTEL_MSAA_FLAG_COARSE_RT_WRITES = (1 << 18),
|
2025-03-10 23:18:30 +02:00
|
|
|
|
|
|
|
|
/** Index of the PrimitiveID attribute relative to the first read
|
|
|
|
|
* attribute.
|
|
|
|
|
*
|
|
|
|
|
* This is not a flag but a value that cover bits 20:31. Value 32 means the
|
|
|
|
|
* PrimitiveID is coming from the PerPrimitive block, written by the Mesh
|
|
|
|
|
* shader.
|
|
|
|
|
*/
|
|
|
|
|
INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX = (1 << INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET),
|
2024-02-01 13:17:42 -08:00
|
|
|
};
|
|
|
|
|
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(intel_msaa_flags)
|
|
|
|
|
|
2024-02-01 13:45:01 -08:00
|
|
|
/**
|
|
|
|
|
* @defgroup Tessellator parameter enumerations.
|
|
|
|
|
*
|
|
|
|
|
* These correspond to the hardware values in 3DSTATE_TE, and are provided
|
|
|
|
|
* as part of the tessellation evaluation shader.
|
|
|
|
|
*
|
|
|
|
|
* @{
|
|
|
|
|
*/
|
|
|
|
|
enum intel_tess_partitioning {
|
|
|
|
|
INTEL_TESS_PARTITIONING_INTEGER = 0,
|
|
|
|
|
INTEL_TESS_PARTITIONING_ODD_FRACTIONAL = 1,
|
|
|
|
|
INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL = 2,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum intel_tess_output_topology {
|
|
|
|
|
INTEL_TESS_OUTPUT_TOPOLOGY_POINT = 0,
|
|
|
|
|
INTEL_TESS_OUTPUT_TOPOLOGY_LINE = 1,
|
|
|
|
|
INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2,
|
|
|
|
|
INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum intel_tess_domain {
|
|
|
|
|
INTEL_TESS_DOMAIN_QUAD = 0,
|
|
|
|
|
INTEL_TESS_DOMAIN_TRI = 1,
|
|
|
|
|
INTEL_TESS_DOMAIN_ISOLINE = 2,
|
|
|
|
|
};
|
|
|
|
|
/** @} */
|
|
|
|
|
|
2024-02-01 13:58:36 -08:00
|
|
|
enum intel_shader_dispatch_mode {
|
|
|
|
|
INTEL_DISPATCH_MODE_4X1_SINGLE = 0,
|
|
|
|
|
INTEL_DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
|
|
|
|
|
INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
|
|
|
|
|
INTEL_DISPATCH_MODE_SIMD8 = 3,
|
|
|
|
|
|
|
|
|
|
INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH = 0,
|
|
|
|
|
INTEL_DISPATCH_MODE_TCS_MULTI_PATCH = 2,
|
|
|
|
|
};
|
|
|
|
|
|
2024-11-18 11:33:35 +02:00
|
|
|
enum intel_barycentric_mode {
|
|
|
|
|
INTEL_BARYCENTRIC_PERSPECTIVE_PIXEL = 0,
|
|
|
|
|
INTEL_BARYCENTRIC_PERSPECTIVE_CENTROID = 1,
|
|
|
|
|
INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE = 2,
|
|
|
|
|
INTEL_BARYCENTRIC_NONPERSPECTIVE_PIXEL = 3,
|
|
|
|
|
INTEL_BARYCENTRIC_NONPERSPECTIVE_CENTROID = 4,
|
|
|
|
|
INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE = 5,
|
|
|
|
|
INTEL_BARYCENTRIC_MODE_COUNT = 6
|
|
|
|
|
};
|
|
|
|
|
#define INTEL_BARYCENTRIC_PERSPECTIVE_BITS \
|
|
|
|
|
((1 << INTEL_BARYCENTRIC_PERSPECTIVE_PIXEL) | \
|
|
|
|
|
(1 << INTEL_BARYCENTRIC_PERSPECTIVE_CENTROID) | \
|
|
|
|
|
(1 << INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE))
|
|
|
|
|
#define INTEL_BARYCENTRIC_NONPERSPECTIVE_BITS \
|
|
|
|
|
((1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_PIXEL) | \
|
|
|
|
|
(1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_CENTROID) | \
|
|
|
|
|
(1 << INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))
|
|
|
|
|
|
2025-04-29 17:40:22 +03:00
|
|
|
enum intel_vue_layout {
|
|
|
|
|
/**
|
|
|
|
|
* Layout is fixed and shared by producer/consumer, allowing for tigh
|
|
|
|
|
* packing
|
|
|
|
|
*/
|
|
|
|
|
INTEL_VUE_LAYOUT_FIXED = 0,
|
|
|
|
|
/**
|
|
|
|
|
* Layout is separate, works for ARB_separate_shader_objects but without
|
|
|
|
|
* Mesh support.
|
|
|
|
|
*/
|
|
|
|
|
INTEL_VUE_LAYOUT_SEPARATE,
|
2025-03-10 23:18:30 +02:00
|
|
|
/**
|
|
|
|
|
* Layout is separate and works with Mesh shaders.
|
|
|
|
|
*/
|
|
|
|
|
INTEL_VUE_LAYOUT_SEPARATE_MESH,
|
2025-04-29 17:40:22 +03:00
|
|
|
};
|
|
|
|
|
|
2024-02-01 15:39:52 -08:00
|
|
|
/**
|
|
|
|
|
* Data structure recording the relationship between the gl_varying_slot enum
|
|
|
|
|
* and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
|
|
|
|
|
* single octaword within the VUE (128 bits).
|
|
|
|
|
*
|
|
|
|
|
* Note that each BRW register contains 256 bits (2 octawords), so when
|
|
|
|
|
* accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
|
|
|
|
|
* consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
|
|
|
|
|
* in a vertex shader), each register corresponds to a single VUE slot, since
|
|
|
|
|
* it contains data for two separate vertices.
|
|
|
|
|
*/
|
|
|
|
|
struct intel_vue_map {
|
|
|
|
|
/**
|
|
|
|
|
* Bitfield representing all varying slots that are (a) stored in this VUE
|
|
|
|
|
* map, and (b) actually written by the shader. Does not include any of
|
|
|
|
|
* the additional varying slots defined in brw_varying_slot.
|
|
|
|
|
*/
|
|
|
|
|
uint64_t slots_valid;
|
|
|
|
|
|
|
|
|
|
/**
|
2025-04-29 17:40:22 +03:00
|
|
|
* The layout of the VUE
|
2024-02-01 15:39:52 -08:00
|
|
|
*
|
2025-03-10 23:18:30 +02:00
|
|
|
* Separable programs (GL_ARB_separate_shader_objects) can be mixed and
|
|
|
|
|
* matched without the linker having a chance to dead code eliminate unused
|
|
|
|
|
* varyings.
|
2024-02-01 15:39:52 -08:00
|
|
|
*
|
|
|
|
|
* This means that we have to use a fixed slot layout, based on the output's
|
|
|
|
|
* location field, rather than assigning slots in a compact contiguous block.
|
2025-03-10 23:18:30 +02:00
|
|
|
*
|
|
|
|
|
* When using Mesh, another constraint arises which is the HW limits for
|
|
|
|
|
* loading per-primitive & per-vertex data, limited to 32 varying in total.
|
|
|
|
|
* This requires us to be quite inventive with the way we lay things out.
|
|
|
|
|
* Take a fragment shader loading the following data :
|
|
|
|
|
*
|
|
|
|
|
* float gl_ClipDistance[];
|
|
|
|
|
* uint gl_PrimitiveID;
|
|
|
|
|
* vec4 someAppValue[29];
|
|
|
|
|
*
|
|
|
|
|
* According to the Vulkan spec, someAppValue will occupy 29 slots,
|
|
|
|
|
* gl_PrimitiveID 1 slot, gl_ClipDistance[] up to 2 slots. If the input is
|
|
|
|
|
* coming from a VS/DS/GS shader, we can load all of this through a single
|
|
|
|
|
* block using 3DSTATE_SBE::VertexURBEntryReadLength = 16 (maximum
|
|
|
|
|
* programmable value) and the layout with
|
|
|
|
|
* BRW_VUE_MAP_LAYOUT_FIXED/BRW_VUE_MAP_LAYOUT_SEPARATE will be this :
|
|
|
|
|
*
|
|
|
|
|
* -----------------------
|
|
|
|
|
* | gl_ClipDistance 0-3 |
|
|
|
|
|
* |---------------------|
|
|
|
|
|
* | gl_ClipDistance 4-7 |
|
|
|
|
|
* |---------------------|
|
|
|
|
|
* | gl_PrimitiveID |
|
|
|
|
|
* |---------------------|
|
|
|
|
|
* | someAppValue[] |
|
|
|
|
|
* |---------------------|
|
|
|
|
|
*
|
|
|
|
|
* This works nicely as everything is coming from the same location in the
|
|
|
|
|
* URB.
|
|
|
|
|
*
|
|
|
|
|
* When mesh shaders are involved, gl_PrimitiveID is located in a different
|
|
|
|
|
* place in the URB (the per-primitive block) and requires programming
|
|
|
|
|
* 3DSTATE_SBE_MESH::PerPrimitiveURBEntryOutputReadLength to load some
|
|
|
|
|
* additional data. The HW has a limit such that
|
|
|
|
|
* 3DSTATE_SBE_MESH::PerPrimitiveURBEntryOutputReadLength +
|
|
|
|
|
* 3DSTATE_SBE_MESH::PerVertexURBEntryOutputReadLength <= 16. With the
|
|
|
|
|
* layout above, we would not be able to accomodate that HW limit.
|
|
|
|
|
*
|
|
|
|
|
* The solution to this is to lay the built-in varyings out
|
|
|
|
|
* (gl_ClipDistance omitted since it's part of the VUE header and cannot
|
|
|
|
|
* live any other place) at the end of the VUE like this :
|
|
|
|
|
*
|
|
|
|
|
* -----------------------
|
|
|
|
|
* | gl_ClipDistance 0-3 |
|
|
|
|
|
* |---------------------|
|
|
|
|
|
* | gl_ClipDistance 4-7 |
|
|
|
|
|
* |---------------------|
|
|
|
|
|
* | someAppValue[] |
|
|
|
|
|
* |---------------------|
|
|
|
|
|
* | gl_PrimitiveID |
|
|
|
|
|
* |---------------------|
|
|
|
|
|
*
|
|
|
|
|
* This layout adds another challenge because with separate shader
|
|
|
|
|
* compilations, we cannot tell in the consumer shader how many outputs the
|
|
|
|
|
* producer has, so we don't know where the gl_PrimitiveID lives. The
|
|
|
|
|
* solution to this other problem is to read the built-in with a
|
|
|
|
|
* MOV_INDIRECT and have the offset of the MOV_INDIRECT loaded through a
|
|
|
|
|
* push constant.
|
2024-02-01 15:39:52 -08:00
|
|
|
*/
|
2025-04-29 17:40:22 +03:00
|
|
|
enum intel_vue_layout layout;
|
2024-02-01 15:39:52 -08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
|
|
|
|
|
* not stored in a slot (because they are not written, or because
|
|
|
|
|
* additional processing is applied before storing them in the VUE), the
|
|
|
|
|
* value is -1.
|
|
|
|
|
*/
|
|
|
|
|
signed char varying_to_slot[VARYING_SLOT_TESS_MAX];
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Map from VUE slot to gl_varying_slot value. For slots that do not
|
|
|
|
|
* directly correspond to a gl_varying_slot, the value comes from
|
|
|
|
|
* brw_varying_slot.
|
|
|
|
|
*
|
|
|
|
|
* For slots that are not in use, the value is BRW_VARYING_SLOT_PAD.
|
|
|
|
|
*/
|
|
|
|
|
signed char slot_to_varying[VARYING_SLOT_TESS_MAX];
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Total number of VUE slots in use
|
|
|
|
|
*/
|
|
|
|
|
int num_slots;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Number of position VUE slots. If num_pos_slots > 1, primitive
|
|
|
|
|
* replication is being used.
|
|
|
|
|
*/
|
|
|
|
|
int num_pos_slots;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Number of per-patch VUE slots. Only valid for tessellation control
|
|
|
|
|
* shader outputs and tessellation evaluation shader inputs.
|
|
|
|
|
*/
|
|
|
|
|
int num_per_patch_slots;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Number of per-vertex VUE slots. Only valid for tessellation control
|
|
|
|
|
* shader outputs and tessellation evaluation shader inputs.
|
|
|
|
|
*/
|
|
|
|
|
int num_per_vertex_slots;
|
|
|
|
|
};
|
|
|
|
|
|
2024-02-01 16:02:50 -08:00
|
|
|
struct intel_cs_dispatch_info {
|
|
|
|
|
uint32_t group_size;
|
|
|
|
|
uint32_t simd_size;
|
|
|
|
|
uint32_t threads;
|
|
|
|
|
|
|
|
|
|
/* RightExecutionMask field used in GPGPU_WALKER. */
|
|
|
|
|
uint32_t right_mask;
|
|
|
|
|
};
|
|
|
|
|
|
2024-12-03 14:17:12 +02:00
|
|
|
enum intel_compute_walk_order {
|
2024-02-20 10:39:41 -08:00
|
|
|
INTEL_WALK_ORDER_XYZ = 0,
|
|
|
|
|
INTEL_WALK_ORDER_XZY = 1,
|
|
|
|
|
INTEL_WALK_ORDER_YXZ = 2,
|
|
|
|
|
INTEL_WALK_ORDER_YZX = 3,
|
|
|
|
|
INTEL_WALK_ORDER_ZXY = 4,
|
|
|
|
|
INTEL_WALK_ORDER_ZYX = 5,
|
|
|
|
|
};
|
|
|
|
|
|
2024-11-18 11:49:07 +02:00
|
|
|
static inline bool
|
|
|
|
|
intel_fs_is_persample(enum intel_sometimes shader_persample_dispatch,
|
|
|
|
|
bool shader_per_sample_shading,
|
|
|
|
|
enum intel_msaa_flags pushed_msaa_flags)
|
|
|
|
|
{
|
|
|
|
|
if (shader_persample_dispatch != INTEL_SOMETIMES)
|
|
|
|
|
return shader_persample_dispatch;
|
|
|
|
|
|
|
|
|
|
assert(pushed_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC);
|
|
|
|
|
|
|
|
|
|
if (!(pushed_msaa_flags & INTEL_MSAA_FLAG_MULTISAMPLE_FBO))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (shader_per_sample_shading)
|
|
|
|
|
assert(pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH);
|
|
|
|
|
|
|
|
|
|
return (pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH) != 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline uint32_t
|
|
|
|
|
intel_fs_barycentric_modes(enum intel_sometimes shader_persample_dispatch,
|
|
|
|
|
uint32_t shader_barycentric_modes,
|
|
|
|
|
enum intel_msaa_flags pushed_msaa_flags)
|
|
|
|
|
{
|
|
|
|
|
/* In the non dynamic case, we can just return the computed shader_barycentric_modes from
|
|
|
|
|
* compilation time.
|
|
|
|
|
*/
|
|
|
|
|
if (shader_persample_dispatch != INTEL_SOMETIMES)
|
|
|
|
|
return shader_barycentric_modes;
|
|
|
|
|
|
|
|
|
|
uint32_t modes = shader_barycentric_modes;
|
|
|
|
|
|
|
|
|
|
assert(pushed_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC);
|
|
|
|
|
|
|
|
|
|
if (pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_INTERP) {
|
|
|
|
|
assert(pushed_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH);
|
|
|
|
|
|
|
|
|
|
/* Making dynamic per-sample interpolation work is a bit tricky. The
|
|
|
|
|
* hardware will hang if SAMPLE is requested but per-sample dispatch is
|
|
|
|
|
* not enabled. This means we can't preemptively add SAMPLE to the
|
|
|
|
|
* barycentrics bitfield. Instead, we have to add it late and only
|
|
|
|
|
* on-demand. Annoyingly, changing the number of barycentrics requested
|
|
|
|
|
* changes the whole PS shader payload so we very much don't want to do
|
|
|
|
|
* that. Instead, if the dynamic per-sample interpolation flag is set,
|
|
|
|
|
* we check to see if SAMPLE was requested and, if not, replace the
|
|
|
|
|
* highest barycentric bit in the [non]perspective grouping (CENTROID,
|
|
|
|
|
* if it exists, else PIXEL) with SAMPLE. The shader will stomp all the
|
|
|
|
|
* barycentrics in the shader with SAMPLE so it really doesn't matter
|
|
|
|
|
* which one we replace. The important thing is that we keep the number
|
|
|
|
|
* of barycentrics in each [non]perspective grouping the same.
|
|
|
|
|
*/
|
|
|
|
|
if ((modes & INTEL_BARYCENTRIC_PERSPECTIVE_BITS) &&
|
|
|
|
|
!(modes & BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE))) {
|
|
|
|
|
int sample_mode =
|
|
|
|
|
util_last_bit(modes & INTEL_BARYCENTRIC_PERSPECTIVE_BITS) - 1;
|
|
|
|
|
assert(modes & BITFIELD_BIT(sample_mode));
|
|
|
|
|
|
|
|
|
|
modes &= ~BITFIELD_BIT(sample_mode);
|
|
|
|
|
modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((modes & INTEL_BARYCENTRIC_NONPERSPECTIVE_BITS) &&
|
|
|
|
|
!(modes & BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))) {
|
|
|
|
|
int sample_mode =
|
|
|
|
|
util_last_bit(modes & INTEL_BARYCENTRIC_NONPERSPECTIVE_BITS) - 1;
|
|
|
|
|
assert(modes & BITFIELD_BIT(sample_mode));
|
|
|
|
|
|
|
|
|
|
modes &= ~BITFIELD_BIT(sample_mode);
|
|
|
|
|
modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
/* If we're not using per-sample interpolation, we need to disable the
|
|
|
|
|
* per-sample bits.
|
|
|
|
|
*
|
|
|
|
|
* SKL PRMs, Volume 2a: Command Reference: Instructions,
|
|
|
|
|
* 3DSTATE_WM:Barycentric Interpolation Mode:
|
|
|
|
|
|
|
|
|
|
* "MSDISPMODE_PERSAMPLE is required in order to select Perspective
|
|
|
|
|
* Sample or Non-perspective Sample barycentric coordinates."
|
|
|
|
|
*/
|
|
|
|
|
uint32_t sample_bits = (BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE) |
|
|
|
|
|
BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE));
|
|
|
|
|
uint32_t requested_sample = modes & sample_bits;
|
|
|
|
|
modes &= ~sample_bits;
|
|
|
|
|
/*
|
|
|
|
|
* If the shader requested some sample modes and we have to disable
|
|
|
|
|
* them, make sure we add back the pixel variant back to not mess up the
|
|
|
|
|
* thread payload.
|
|
|
|
|
*
|
|
|
|
|
* Why does this works out? Because of the ordering in the thread payload :
|
|
|
|
|
*
|
|
|
|
|
* R7:10 Perspective Centroid Barycentric
|
|
|
|
|
* R11:14 Perspective Sample Barycentric
|
|
|
|
|
* R15:18 Linear Pixel Location Barycentric
|
|
|
|
|
*
|
|
|
|
|
* In the backend when persample dispatch is dynamic, we always select
|
|
|
|
|
* the sample barycentric and turn off the pixel location (even if
|
|
|
|
|
* requested through intrinsics). That way when we dynamically select
|
|
|
|
|
* pixel or sample dispatch, the barycentric always match, since the
|
|
|
|
|
* pixel location barycentric register offset will align with the sample
|
|
|
|
|
* barycentric.
|
|
|
|
|
*/
|
|
|
|
|
if (requested_sample) {
|
|
|
|
|
if (requested_sample & BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_SAMPLE))
|
|
|
|
|
modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_PERSPECTIVE_PIXEL);
|
|
|
|
|
if (requested_sample & BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_SAMPLE))
|
|
|
|
|
modes |= BITFIELD_BIT(INTEL_BARYCENTRIC_NONPERSPECTIVE_PIXEL);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return modes;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
|
intel_fs_is_coarse(enum intel_sometimes shader_coarse_pixel_dispatch,
|
|
|
|
|
enum intel_msaa_flags pushed_msaa_flags)
|
|
|
|
|
{
|
|
|
|
|
if (shader_coarse_pixel_dispatch != INTEL_SOMETIMES)
|
|
|
|
|
return shader_coarse_pixel_dispatch;
|
|
|
|
|
|
|
|
|
|
assert(pushed_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC);
|
|
|
|
|
|
2024-12-03 14:17:12 +02:00
|
|
|
assert((pushed_msaa_flags & INTEL_MSAA_FLAG_COARSE_RT_WRITES) ?
|
|
|
|
|
shader_coarse_pixel_dispatch != INTEL_NEVER :
|
|
|
|
|
shader_coarse_pixel_dispatch != INTEL_ALWAYS);
|
2024-11-18 11:49:07 +02:00
|
|
|
|
|
|
|
|
return (pushed_msaa_flags & INTEL_MSAA_FLAG_COARSE_RT_WRITES) != 0;
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-23 14:57:26 +02:00
|
|
|
struct intel_fs_params {
|
|
|
|
|
bool shader_sample_shading;
|
|
|
|
|
float shader_min_sample_shading;
|
|
|
|
|
bool state_sample_shading;
|
|
|
|
|
uint32_t rasterization_samples;
|
|
|
|
|
bool coarse_pixel;
|
|
|
|
|
bool alpha_to_coverage;
|
2025-03-10 23:18:30 +02:00
|
|
|
uint32_t primitive_id_index;
|
2025-01-23 14:57:26 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static inline enum intel_msaa_flags
|
|
|
|
|
intel_fs_msaa_flags(struct intel_fs_params params)
|
|
|
|
|
{
|
|
|
|
|
enum intel_msaa_flags fs_msaa_flags = INTEL_MSAA_FLAG_ENABLE_DYNAMIC;
|
|
|
|
|
|
|
|
|
|
if (params.rasterization_samples > 1) {
|
|
|
|
|
fs_msaa_flags |= INTEL_MSAA_FLAG_MULTISAMPLE_FBO;
|
|
|
|
|
|
|
|
|
|
if (params.shader_sample_shading)
|
|
|
|
|
fs_msaa_flags |= INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH;
|
|
|
|
|
|
|
|
|
|
if (params.shader_sample_shading ||
|
|
|
|
|
(params.state_sample_shading &&
|
|
|
|
|
(params.shader_min_sample_shading *
|
|
|
|
|
params.rasterization_samples) > 1)) {
|
|
|
|
|
fs_msaa_flags |= INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH |
|
|
|
|
|
INTEL_MSAA_FLAG_PERSAMPLE_INTERP;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!(fs_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH) &&
|
|
|
|
|
params.coarse_pixel) {
|
|
|
|
|
fs_msaa_flags |= INTEL_MSAA_FLAG_COARSE_PI_MSG |
|
|
|
|
|
INTEL_MSAA_FLAG_COARSE_RT_WRITES;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (params.alpha_to_coverage)
|
|
|
|
|
fs_msaa_flags |= INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE;
|
|
|
|
|
|
2025-03-10 23:18:30 +02:00
|
|
|
fs_msaa_flags |= (enum intel_msaa_flags)(
|
|
|
|
|
params.primitive_id_index << INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_OFFSET);
|
|
|
|
|
|
2025-01-23 14:57:26 +02:00
|
|
|
return fs_msaa_flags;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-01 13:17:42 -08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
} /* extern "C" */
|
|
|
|
|
#endif
|