mesa/src/intel/compiler/brw/brw_thread_payload.cpp
Caio Oliveira 74f1d4f47b intel/compiler: Use SPDX annotations
Minor adjustments to formatting of the copyright line, but keep
dates and holders.  "Authors" entries that could be
obtained via Git logs were also removed.

The license in brw_disasm.c and elk_disasm.c don't match directly
any SPDX pattern I could find, so kept as is.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39503>
2026-01-24 20:37:31 +00:00

462 lines
14 KiB
C++

/*
* Copyright © 2006-2022 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "brw_shader.h"
#include "brw_builder.h"
brw_vs_thread_payload::brw_vs_thread_payload(const brw_shader &v)
{
unsigned r = 0;
/* R0: Thread header. */
r += reg_unit(v.devinfo);
/* R1: URB handles. */
urb_handles = brw_ud8_grf(r, 0);
r += reg_unit(v.devinfo);
num_regs = r;
}
brw_tcs_thread_payload::brw_tcs_thread_payload(const brw_shader &v)
{
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(v.prog_data);
struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(v.prog_data);
struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) v.key;
if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH) {
patch_urb_output = brw_ud1_grf(0, 0);
primitive_id = brw_vec1_grf(0, 1);
/* r1-r4 contain the ICP handles. */
icp_handle_start = brw_ud8_grf(1, 0);
num_regs = 5;
} else {
assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH);
assert(tcs_key->input_vertices <= BRW_MAX_TCS_INPUT_VERTICES);
unsigned r = 0;
r += reg_unit(v.devinfo);
patch_urb_output = brw_ud8_grf(r, 0);
r += reg_unit(v.devinfo);
if (tcs_prog_data->include_primitive_id) {
primitive_id = brw_vec8_grf(r, 0);
r += reg_unit(v.devinfo);
}
/* ICP handles occupy the next 1-32 registers. */
icp_handle_start = brw_ud8_grf(r, 0);
r += brw_tcs_prog_key_input_vertices(tcs_key) * reg_unit(v.devinfo);
num_regs = r;
}
}
brw_tes_thread_payload::brw_tes_thread_payload(const brw_shader &v)
{
unsigned r = 0;
/* R0: Thread Header. */
patch_urb_input = retype(brw_vec1_grf(0, 0), BRW_TYPE_UD);
primitive_id = brw_vec1_grf(0, 1);
r += reg_unit(v.devinfo);
/* R1-3: gl_TessCoord.xyz. */
for (unsigned i = 0; i < 3; i++) {
coords[i] = brw_vec8_grf(r, 0);
r += reg_unit(v.devinfo);
}
/* R4: URB output handles. */
urb_output = brw_ud8_grf(r, 0);
r += reg_unit(v.devinfo);
num_regs = r;
}
brw_gs_thread_payload::brw_gs_thread_payload(brw_shader &v)
{
struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(v.prog_data);
const brw_builder bld = brw_builder(&v);
/* R0: thread header. */
unsigned r = reg_unit(v.devinfo);
/* R1: output URB handles. */
urb_handles = bld.vgrf(BRW_TYPE_UD);
bld.AND(urb_handles, brw_ud8_grf(r, 0),
v.devinfo->ver >= 20 ? brw_imm_ud(0xFFFFFF) : brw_imm_ud(0xFFFF));
/* R1: Instance ID stored in bits 31:27 */
instance_id = bld.vgrf(BRW_TYPE_UD);
bld.SHR(instance_id, brw_ud8_grf(r, 0), brw_imm_ud(27u));
r += reg_unit(v.devinfo);
if (gs_prog_data->include_primitive_id) {
primitive_id = brw_ud8_grf(r, 0);
r += reg_unit(v.devinfo);
}
/* Always enable VUE handles so we can safely use pull model if needed.
*
* The push model for a GS uses a ton of register space even for trivial
* scenarios with just a few inputs, so just make things easier and a bit
* safer by always having pull model available.
*/
gs_prog_data->base.include_vue_handles = true;
/* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
icp_handle_start = brw_ud8_grf(r, 0);
r += v.nir->info.gs.vertices_in * reg_unit(v.devinfo);
num_regs = r;
}
static inline void
setup_fs_payload_gfx20(brw_fs_thread_payload &payload,
const brw_shader &v,
bool &source_depth_to_render_target)
{
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data);
const unsigned payload_width = 16;
assert(v.dispatch_width % payload_width == 0);
assert(v.devinfo->ver >= 20);
for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
/* R0-1: PS thread payload header, masks and pixel X/Y coordinates. */
payload.num_regs++;
payload.subspan_coord_reg[j] = payload.num_regs++;
}
for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
/* R2-13: Barycentric interpolation coordinates. These appear
* in the same order that they appear in the intel_barycentric_mode
* enum. Each set of coordinates occupies 2 64B registers per
* SIMD16 half. Coordinates only appear if they were enabled
* using the "Barycentric Interpolation Mode" bits in WM_STATE.
*/
for (int i = 0; i < INTEL_BARYCENTRIC_MODE_COUNT; ++i) {
if (prog_data->barycentric_interp_modes & (1 << i)) {
payload.barycentric_coord_reg[i][j] = payload.num_regs;
payload.num_regs += payload_width / 4;
}
}
/* R14: Interpolated depth if "Pixel Shader Uses Source Depth" is set. */
if (prog_data->uses_src_depth) {
payload.source_depth_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R15: Interpolated W if "Pixel Shader Uses Source W" is set. */
if (prog_data->uses_src_w) {
payload.source_w_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R16: MSAA input coverage mask if "Pixel Shader Uses Input
* Coverage Mask" is set.
*/
if (prog_data->uses_sample_mask) {
payload.sample_mask_in_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R19: MSAA position XY offsets if "Position XY Offset Select"
* is either POSOFFSET_CENTROID or POSOFFSET_SAMPLE. Note that
* this is delivered as a single SIMD32 vector, inconsistently
* with most other PS payload fields.
*/
if (prog_data->uses_pos_offset && j == 0) {
for (unsigned k = 0; k < 2; k++) {
payload.sample_pos_reg[k] = payload.num_regs;
payload.num_regs++;
}
}
/* R22: Sample offsets. */
if (prog_data->uses_sample_offsets && j == 0) {
payload.sample_offsets_reg = payload.num_regs;
payload.num_regs += 2;
}
}
/* RP0: Source Depth and/or W Attribute Vertex Deltas and/or
* Perspective Bary Planes.
*/
if (prog_data->uses_depth_w_coefficients ||
prog_data->uses_pc_bary_coefficients) {
payload.depth_w_coef_reg = payload.pc_bary_coef_reg = payload.num_regs;
payload.num_regs += 2 * v.max_polygons;
}
/* RP4: Non-Perspective Bary planes. */
if (prog_data->uses_npc_bary_coefficients) {
payload.npc_bary_coef_reg = payload.num_regs;
payload.num_regs += 2 * v.max_polygons;
}
if (v.nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
source_depth_to_render_target = true;
}
}
static inline void
setup_fs_payload_gfx9(brw_fs_thread_payload &payload,
const brw_shader &v,
bool &source_depth_to_render_target)
{
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(v.prog_data);
const unsigned payload_width = MIN2(16, v.dispatch_width);
assert(v.dispatch_width % payload_width == 0);
assert(v.devinfo->ver < 20);
payload.num_regs = 0;
/* R0: PS thread payload header. */
payload.num_regs++;
for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
/* R1: masks, pixel X/Y coordinates. */
payload.subspan_coord_reg[j] = payload.num_regs++;
}
for (unsigned j = 0; j < v.dispatch_width / payload_width; j++) {
/* R3-26: barycentric interpolation coordinates. These appear in the
* same order that they appear in the intel_barycentric_mode enum. Each
* set of coordinates occupies 2 registers if dispatch width == 8 and 4
* registers if dispatch width == 16. Coordinates only appear if they
* were enabled using the "Barycentric Interpolation Mode" bits in
* WM_STATE.
*/
for (int i = 0; i < INTEL_BARYCENTRIC_MODE_COUNT; ++i) {
if (prog_data->barycentric_interp_modes & (1 << i)) {
payload.barycentric_coord_reg[i][j] = payload.num_regs;
payload.num_regs += payload_width / 4;
}
}
/* R27-28: interpolated depth if uses source depth */
if (prog_data->uses_src_depth) {
payload.source_depth_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R29-30: interpolated W set if GFX6_WM_USES_SOURCE_W. */
if (prog_data->uses_src_w) {
payload.source_w_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
/* R31: MSAA position offsets. */
if (prog_data->uses_pos_offset) {
payload.sample_pos_reg[j] = payload.num_regs;
payload.num_regs++;
}
/* R32-33: MSAA input coverage mask */
if (prog_data->uses_sample_mask) {
payload.sample_mask_in_reg[j] = payload.num_regs;
payload.num_regs += payload_width / 8;
}
}
/* R66: Source Depth and/or W Attribute Vertex Deltas. */
if (prog_data->uses_depth_w_coefficients) {
payload.depth_w_coef_reg = payload.num_regs;
payload.num_regs += v.max_polygons;
}
/* R68: Perspective bary planes. */
if (prog_data->uses_pc_bary_coefficients) {
payload.pc_bary_coef_reg = payload.num_regs;
payload.num_regs += v.max_polygons;
}
/* R70: Non-perspective bary planes. */
if (prog_data->uses_npc_bary_coefficients) {
payload.npc_bary_coef_reg = payload.num_regs;
payload.num_regs += v.max_polygons;
}
/* R72: Sample offsets. */
if (prog_data->uses_sample_offsets) {
payload.sample_offsets_reg = payload.num_regs;
payload.num_regs++;
}
if (v.nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
source_depth_to_render_target = true;
}
}
brw_fs_thread_payload::brw_fs_thread_payload(const brw_shader &v,
bool &source_depth_to_render_target)
: subspan_coord_reg(),
source_depth_reg(),
source_w_reg(),
aa_dest_stencil_reg(),
sample_pos_reg(),
sample_mask_in_reg(),
barycentric_coord_reg(),
depth_w_coef_reg(),
pc_bary_coef_reg(),
npc_bary_coef_reg(),
sample_offsets_reg()
{
if (v.devinfo->ver >= 20)
setup_fs_payload_gfx20(*this, v, source_depth_to_render_target);
else
setup_fs_payload_gfx9(*this, v, source_depth_to_render_target);
}
brw_cs_thread_payload::brw_cs_thread_payload(const brw_shader &v)
{
struct brw_cs_prog_data *prog_data = brw_cs_prog_data(v.prog_data);
unsigned r = reg_unit(v.devinfo);
prog_data->uses_inline_push_addr = v.key->uses_inline_push_addr;
/* See nir_setup_uniforms for subgroup_id in earlier versions. */
if (v.devinfo->verx10 >= 125) {
subgroup_id_ = brw_ud1_grf(0, 2);
for (int i = 0; i < 3; i++) {
if (prog_data->generate_local_id & (1 << i)) {
local_invocation_id[i] = brw_uw8_grf(r, 0);
r += reg_unit(v.devinfo);
if (v.devinfo->ver < 20 && v.dispatch_width == 32)
r += reg_unit(v.devinfo);
} else {
local_invocation_id[i] = brw_imm_uw(0);
}
}
/* TODO: Fill out uses_btd_stack_ids automatically */
if (prog_data->uses_btd_stack_ids)
r += reg_unit(v.devinfo);
if (v.stage == MESA_SHADER_COMPUTE &&
(prog_data->uses_inline_data ||
prog_data->uses_inline_push_addr)) {
inline_parameter = brw_ud1_grf(r, 0);
r += reg_unit(v.devinfo);
}
} else {
assert(!prog_data->uses_inline_push_addr);
}
num_regs = r;
}
void
brw_cs_thread_payload::load_subgroup_id(const brw_builder &bld,
brw_reg &dest) const
{
assert(bld.shader->devinfo->verx10 >= 125);
dest = retype(dest, BRW_TYPE_UD);
bld.AND(dest, subgroup_id_, brw_imm_ud(INTEL_MASK(7, 0)));
}
brw_task_mesh_thread_payload::brw_task_mesh_thread_payload(brw_shader &v)
: brw_cs_thread_payload(v)
{
/* Task and Mesh Shader Payloads (SIMD8 and SIMD16)
*
* R0: Header
* R1: Local_ID.X[0-7 or 0-15]
* R2: Inline Parameter
*
* Task and Mesh Shader Payloads (SIMD32)
*
* R0: Header
* R1: Local_ID.X[0-15]
* R2: Local_ID.X[16-31]
* R3: Inline Parameter
*
* Local_ID.X values are 16 bits.
*
* Inline parameter is optional but always present since we use it to pass
* the address to descriptors.
*/
const brw_builder bld = brw_builder(&v);
unsigned r = 0;
assert(subgroup_id_.file != BAD_FILE);
extended_parameter_0 = retype(brw_vec1_grf(0, 3), BRW_TYPE_UD);
if (v.devinfo->ver >= 20) {
urb_output = brw_ud1_grf(1, 0);
} else {
urb_output = bld.vgrf(BRW_TYPE_UD);
/* In both mesh and task shader payload, lower 16 bits of g0.6 is
* an offset within Slice's Local URB, which says where shader is
* supposed to output its data.
*/
bld.AND(urb_output, brw_ud1_grf(0, 6), brw_imm_ud(0xFFFF));
}
if (v.stage == MESA_SHADER_MESH) {
/* g0.7 is Task Shader URB Entry Offset, which contains both an offset
* within Slice's Local USB (bits 0:15) and a slice selector
* (bits 16:24). Slice selector can be non zero when mesh shader
* is spawned on slice other than the one where task shader was run.
* Bit 24 says that Slice ID is present and bits 16:23 is the Slice ID.
*/
task_urb_input = brw_ud1_grf(0, 7);
}
r += reg_unit(v.devinfo);
local_index = brw_uw8_grf(r, 0);
r += reg_unit(v.devinfo);
if (v.devinfo->ver < 20 && v.dispatch_width == 32)
r += reg_unit(v.devinfo);
struct brw_cs_prog_data *prog_data = brw_cs_prog_data(v.prog_data);
if (prog_data->uses_inline_data || prog_data->uses_inline_push_addr) {
inline_parameter = brw_ud1_grf(r, 0);
r += reg_unit(v.devinfo);
}
num_regs = r;
}
brw_bs_thread_payload::brw_bs_thread_payload(const brw_shader &v)
{
struct brw_bs_prog_data *prog_data = brw_bs_prog_data(v.prog_data);
unsigned r = 0;
/* R0: Thread header. */
r += reg_unit(v.devinfo);
/* R1: Stack IDs. */
r += reg_unit(v.devinfo);
/* R2: Inline Parameter. Used for argument addresses. */
prog_data->uses_inline_push_addr = v.key->uses_inline_push_addr;
inline_parameter = brw_ud1_grf(r, 0);
global_arg_ptr = brw_ud1_grf(r, 0);
local_arg_ptr = brw_ud1_grf(r, 2);
r += reg_unit(v.devinfo);
num_regs = r;
}
void
brw_bs_thread_payload::load_shader_type(const brw_builder &bld, brw_reg &dest) const
{
brw_reg ud_dest = retype(dest, BRW_TYPE_UD);
bld.MOV(ud_dest, retype(brw_vec1_grf(0, 3), ud_dest.type));
bld.AND(ud_dest, ud_dest, brw_imm_ud(0xf));
}