mesa/src/amd/common/ac_shader_args.h
Samuel Pitoiset 755cb6cb75 radv: fix independent sets with dynamic buffers and GPL
If a set layout is missing the driver can't compute the dynamic buffer
start offsets correctly. The only solution is to load these offsets from
an user SGPR.

To avoid adding more complexity, these offsets are re-emitted every
time dynamic buffers are dirty. That shouldn't matter because the
combination of dynamic buffers and independent sets is just super rare.

This fixes new VKCTS coverage
dEQP-VK.pipeline.pipeline_library.graphics_library.independent_sets_random.*.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39988>
2026-02-24 11:12:14 +00:00

208 lines
6.3 KiB
C

/*
* Copyright 2019 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#ifndef AC_SHADER_ARGS_H
#define AC_SHADER_ARGS_H
#include <stdbool.h>
#include <stdint.h>
/* Maximum dwords of inline push constants when the indirect path is still used */
#define AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT 8
/* Maximum dwords of inline push constants when the indirect path is not used */
#define AC_MAX_INLINE_PUSH_CONSTS 32
enum ac_arg_regfile
{
AC_ARG_SGPR,
AC_ARG_VGPR,
};
enum ac_arg_type
{
AC_ARG_VALUE,
AC_ARG_CONST_ADDR,
};
struct ac_arg {
uint16_t arg_index;
bool used;
};
#define AC_MAX_ARGS 384 /* including all VS->TCS IO */
struct ac_shader_args {
/* Info on how to declare arguments */
struct {
enum ac_arg_type type;
enum ac_arg_regfile file;
uint8_t offset;
uint8_t size;
bool skip : 1;
bool pending_vmem : 1; /* Loaded from VMEM and needs waitcnt before use. */
bool preserved : 1;
} args[AC_MAX_ARGS];
uint16_t arg_count;
uint16_t num_sgprs_used;
uint16_t num_vgprs_used;
uint16_t return_count;
uint16_t num_sgprs_returned;
uint16_t num_vgprs_returned;
/* User data 0/1. GFX: descriptor list, Compute: scratch BO. These are the SGPRs used by RADV for
* scratch and have to be accessed using llvm.amdgcn.implicit.buffer.ptr for LLVM in that case.
*/
struct ac_arg ring_offsets;
/* VS */
struct ac_arg base_vertex;
struct ac_arg start_instance;
struct ac_arg draw_id;
struct ac_arg vertex_buffers;
struct ac_arg vertex_id;
struct ac_arg vs_rel_patch_id;
struct ac_arg vs_prim_id;
struct ac_arg instance_id;
/* Merged shaders */
struct ac_arg tess_offchip_offset;
struct ac_arg merged_wave_info;
/* On gfx10:
* - bits 0..11: ordered_wave_id
* - bits 12..20: number of vertices in group
* - bits 22..30: number of primitives in group
*/
struct ac_arg gs_tg_info;
struct ac_arg scratch_offset;
/* TCS */
struct ac_arg tcs_factor_offset;
struct ac_arg tcs_wave_id; /* gfx11+ */
struct ac_arg tcs_patch_id;
struct ac_arg tcs_rel_ids;
/* # [0:6] = the number of tessellation patches, max = 127
* # [7:11] = TCS: the number of input patch control points minus one, max = 31
* TES: the number of output patch control points minus one, max = 31
* # [12:16] = the stride of 1 TCS per-vertex output in memory / 256, max = 16
* # [17:22] = the number of LS outputs, up to 32
* # [23:28] = the number of HS per-vertex outputs, up to 32
* # [29:30] = tess_primitive_mode
* # [31] = whether TES reads tess factors
*/
struct ac_arg tcs_offchip_layout;
/* TES */
struct ac_arg tes_u;
struct ac_arg tes_v;
struct ac_arg tes_rel_patch_id;
struct ac_arg tes_patch_id;
/* GS */
struct ac_arg es2gs_offset; /* separate legacy ES */
struct ac_arg gs2vs_offset; /* legacy GS */
struct ac_arg gs_wave_id; /* legacy GS */
struct ac_arg gs_attr_offset; /* gfx11+: attribute ring offset in 512B increments */
/* GS vertex indices/offsets:
*
* GFX6-8: [0-5] 6x uint32, multiplied by VGT_ESGS_RING_ITEMSIZE by hw
* GFX9-11 non-passthrough: [0-2] 6x packed uint16, multiplied by VGT_ESGS_RING_ITEMSIZE by hw
*
* GFX10-11 passthrough: [0] 1x uint32 with the following bitfields matching the prim export:
* [0:8] vertex index 0
* [9] edgeflag 0
* [10:18] vertex index 1
* [19] edgeflag 1
* [20:28] vertex index 2
* [29] edgeflag 2
* [31] 0 (valid prim)
*
* GFX12+: [0-1] 2x uint32 with the following bitfields matching the prim export except
* the GS invocation ID, which is 0 without a user GS, so it doesn't have to be masked
* out for the prim export:
* [0]:
* [0:7] vertex index 0
* [8] edgeflag 0
* [9:16] vertex index 1
* [17] edgeflag 1
* [18:25] vertex index 2
* [26] edgeflag 2
* [27:31] GS invocation ID
* [1]:
* [0:7] vertex index 3
* [9:16] vertex index 4
* [18:25] vertex index 5
*/
struct ac_arg gs_vtx_offset[6];
struct ac_arg gs_prim_id;
struct ac_arg gs_invocation_id; /* GFX6-11 only. GFX12+ uses gs_vtx_offset[0]. */
/* Streamout */
struct ac_arg streamout_config;
struct ac_arg streamout_write_index;
struct ac_arg streamout_offset[4];
/* PS */
struct ac_arg frag_pos[4];
struct ac_arg front_face;
struct ac_arg ancillary;
struct ac_arg sample_coverage;
struct ac_arg prim_mask;
struct ac_arg pops_collision_wave_id;
struct ac_arg load_provoking_vtx;
struct ac_arg persp_sample;
struct ac_arg persp_center;
struct ac_arg persp_centroid;
struct ac_arg pull_model;
struct ac_arg linear_sample;
struct ac_arg linear_center;
struct ac_arg linear_centroid;
struct ac_arg pos_fixed_pt;
/* CS */
struct ac_arg local_invocation_id_x;
struct ac_arg local_invocation_id_y;
struct ac_arg local_invocation_id_z;
struct ac_arg local_invocation_ids_packed;
struct ac_arg num_work_groups;
/* GFX6-11 only. GFX12+ uses read only SGPRs {TTMP9[0:31], TTMP7[0:15], TTMP7[16:31]}. */
struct ac_arg workgroup_ids[3];
struct ac_arg tg_size;
/* Mesh and task shaders */
struct ac_arg task_ring_entry; /* Pointer into the draw and payload rings. */
/* Vulkan only */
struct ac_arg push_constants;
struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
uint64_t inline_push_const_mask;
struct ac_arg dynamic_descriptors;
struct ac_arg dynamic_descriptors_offset_addr;
struct ac_arg view_index;
struct ac_arg force_vrs_rates;
/* RT */
struct {
struct ac_arg sbt_descriptors;
struct ac_arg launch_sizes[3];
struct ac_arg launch_size_addr;
struct ac_arg launch_ids[3];
struct ac_arg dynamic_callable_stack_base;
struct ac_arg traversal_shader_addr;
struct ac_arg payload_offset;
} rt;
};
void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsigned registers,
enum ac_arg_type type, struct ac_arg *arg);
void ac_add_return(struct ac_shader_args *info, enum ac_arg_regfile regfile);
void ac_add_preserved(struct ac_shader_args *info, const struct ac_arg *arg);
void ac_compact_ps_vgpr_args(struct ac_shader_args *info, uint32_t spi_ps_input);
#endif