2021-02-09 19:19:53 +01:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2021 Valve Corporation
|
|
|
|
|
*
|
2023-05-18 17:22:27 -04:00
|
|
|
* SPDX-License-Identifier: MIT
|
2021-02-09 19:19:53 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef AC_NIR_H
|
|
|
|
|
#define AC_NIR_H
|
|
|
|
|
|
2023-07-03 17:07:18 +02:00
|
|
|
#include "ac_hw_stage.h"
|
2021-02-09 19:19:53 +01:00
|
|
|
#include "ac_shader_args.h"
|
2021-04-01 12:43:31 +02:00
|
|
|
#include "ac_shader_util.h"
|
2025-02-06 11:56:36 +01:00
|
|
|
#include "nir_defines.h"
|
2021-02-09 19:19:53 +01:00
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
2021-12-12 20:20:36 -05:00
|
|
|
enum
|
|
|
|
|
{
|
|
|
|
|
/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
|
|
|
|
|
AC_EXP_PARAM_OFFSET_0 = 0,
|
|
|
|
|
AC_EXP_PARAM_OFFSET_31 = 31,
|
|
|
|
|
/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
|
|
|
|
|
AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
|
|
|
|
|
AC_EXP_PARAM_DEFAULT_VAL_0001,
|
|
|
|
|
AC_EXP_PARAM_DEFAULT_VAL_1110,
|
|
|
|
|
AC_EXP_PARAM_DEFAULT_VAL_1111,
|
2025-06-01 17:01:05 -04:00
|
|
|
AC_EXP_PARAM_UNDEFINED = 255,
|
2021-12-12 20:20:36 -05:00
|
|
|
};
|
|
|
|
|
|
2022-12-19 14:27:56 +08:00
|
|
|
enum {
|
|
|
|
|
AC_EXP_FLAG_COMPRESSED = (1 << 0),
|
|
|
|
|
AC_EXP_FLAG_DONE = (1 << 1),
|
|
|
|
|
AC_EXP_FLAG_VALID_MASK = (1 << 2),
|
|
|
|
|
};
|
|
|
|
|
|
2026-01-22 21:43:02 -05:00
|
|
|
enum {
|
|
|
|
|
/* Whether nir_tex_instr should treat the deref or handle as an image binding
|
|
|
|
|
* (image_load lowered to tex, etc.).
|
|
|
|
|
*/
|
|
|
|
|
AC_NIR_TEX_BACKEND_FLAG_IS_IMAGE = BITFIELD_BIT(0),
|
|
|
|
|
};
|
|
|
|
|
|
2025-01-09 20:02:37 -06:00
|
|
|
struct ac_nir_config {
|
|
|
|
|
enum amd_gfx_level gfx_level;
|
|
|
|
|
bool uses_aco;
|
|
|
|
|
};
|
|
|
|
|
|
2022-05-12 15:48:24 +02:00
|
|
|
/* Maps I/O semantics to the actual location used by the lowering pass. */
|
|
|
|
|
typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic);
|
|
|
|
|
|
2021-04-01 12:43:31 +02:00
|
|
|
/* Forward declaration of nir_builder so we don't have to include nir_builder.h here */
|
|
|
|
|
struct nir_builder;
|
|
|
|
|
typedef struct nir_builder nir_builder;
|
|
|
|
|
|
2024-12-17 03:15:06 -05:00
|
|
|
struct nir_xfb_info;
|
|
|
|
|
typedef struct nir_xfb_info nir_xfb_info;
|
|
|
|
|
|
2022-08-03 11:53:29 +02:00
|
|
|
/* Executed by ac_nir_cull when the current primitive is accepted. */
|
|
|
|
|
typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
|
|
|
|
|
|
2025-01-09 16:03:02 -06:00
|
|
|
void
|
|
|
|
|
ac_nir_set_options(struct radeon_info *info, bool use_llvm,
|
|
|
|
|
nir_shader_compiler_options *options);
|
|
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *
|
2023-03-18 21:32:16 +08:00
|
|
|
ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
|
|
|
|
|
struct ac_arg arg, unsigned relative_index);
|
|
|
|
|
|
2024-12-29 20:09:52 -05:00
|
|
|
nir_def *
|
|
|
|
|
ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg);
|
|
|
|
|
|
|
|
|
|
nir_def *
|
|
|
|
|
ac_nir_load_arg_upper_bound(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
|
|
|
|
|
unsigned upper_bound);
|
2021-09-29 18:25:03 +02:00
|
|
|
|
2022-05-13 12:49:40 +02:00
|
|
|
void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *val);
|
2022-05-13 12:49:40 +02:00
|
|
|
|
2023-08-12 16:17:15 -04:00
|
|
|
nir_def *
|
2022-08-10 19:18:15 +08:00
|
|
|
ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
|
|
|
|
|
unsigned rshift, unsigned bitwidth);
|
|
|
|
|
|
2025-08-08 17:05:35 -04:00
|
|
|
nir_def *
|
|
|
|
|
ac_nir_load_smem(nir_builder *b, unsigned num_components, nir_def *addr, nir_def *offset,
|
|
|
|
|
unsigned align_mul, enum gl_access_qualifier access);
|
|
|
|
|
|
2023-03-29 19:28:42 +02:00
|
|
|
bool ac_nir_lower_sin_cos(nir_shader *shader);
|
|
|
|
|
|
2026-01-31 15:38:11 -05:00
|
|
|
typedef struct {
|
|
|
|
|
enum amd_gfx_level gfx_level;
|
|
|
|
|
bool has_ls_vgpr_init_bug;
|
|
|
|
|
const enum ac_hw_stage hw_stage;
|
|
|
|
|
unsigned wave_size;
|
|
|
|
|
unsigned workgroup_size;
|
|
|
|
|
bool use_llvm;
|
2026-01-31 15:55:40 -05:00
|
|
|
bool load_grid_size_from_user_sgpr;
|
2026-01-31 15:38:11 -05:00
|
|
|
} ac_nir_lower_intrinsics_to_args_options;
|
|
|
|
|
|
|
|
|
|
bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const struct ac_shader_args *ac_args,
|
|
|
|
|
const ac_nir_lower_intrinsics_to_args_options *options);
|
2023-07-03 17:07:18 +02:00
|
|
|
|
2024-12-17 03:15:06 -05:00
|
|
|
nir_xfb_info *ac_nir_get_sorted_xfb_info(const nir_shader *nir);
|
|
|
|
|
|
2021-12-12 21:20:09 -05:00
|
|
|
bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed,
|
|
|
|
|
int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
|
|
|
|
|
uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]);
|
|
|
|
|
|
2025-04-23 13:52:39 -04:00
|
|
|
typedef struct {
|
|
|
|
|
/* Per-vertex slots and tess levels. */
|
|
|
|
|
uint64_t vram_output_mask;
|
|
|
|
|
uint64_t lds_output_mask;
|
|
|
|
|
uint64_t vgpr_output_mask; /* Hold the output values in VGPRs until the end. */
|
|
|
|
|
/* Generic per-patch slots. */
|
|
|
|
|
uint32_t vram_patch_output_mask;
|
|
|
|
|
uint32_t lds_patch_output_mask;
|
2025-04-25 20:51:52 -04:00
|
|
|
uint32_t vgpr_patch_output_mask; /* Hold the output values in VGPRs until the end. */
|
2025-04-23 21:43:59 -04:00
|
|
|
|
|
|
|
|
/* The highest index returned by map_io + 1. */
|
|
|
|
|
uint8_t highest_remapped_vram_output;
|
|
|
|
|
uint8_t highest_remapped_vram_patch_output;
|
2025-04-23 13:52:39 -04:00
|
|
|
} ac_nir_tess_io_info;
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
ac_nir_get_tess_io_info(const nir_shader *tcs, const nir_tcs_info *tcs_info, uint64_t tes_inputs_read,
|
2025-04-23 21:43:59 -04:00
|
|
|
uint32_t tes_patch_inputs_read, ac_nir_map_io_driver_location map_io,
|
|
|
|
|
bool remapped_outputs_include_tess_levels, ac_nir_tess_io_info *io_info);
|
2025-04-23 13:52:39 -04:00
|
|
|
|
2025-02-17 23:49:12 +01:00
|
|
|
bool
|
2021-02-09 19:19:53 +01:00
|
|
|
ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
|
2022-05-12 15:48:24 +02:00
|
|
|
ac_nir_map_io_driver_location map,
|
2024-09-29 17:30:49 -04:00
|
|
|
enum amd_gfx_level gfx_level,
|
2021-02-09 19:19:53 +01:00
|
|
|
bool tcs_in_out_eq,
|
2024-10-02 16:48:39 -04:00
|
|
|
uint64_t tcs_inputs_via_temp,
|
|
|
|
|
uint64_t tcs_inputs_via_lds);
|
2021-02-09 19:19:53 +01:00
|
|
|
|
2025-02-17 23:54:51 +01:00
|
|
|
bool
|
2021-02-09 19:19:53 +01:00
|
|
|
ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
|
2022-05-12 15:48:24 +02:00
|
|
|
ac_nir_map_io_driver_location map,
|
2024-09-29 17:30:49 -04:00
|
|
|
enum amd_gfx_level gfx_level,
|
2024-08-25 07:38:03 -04:00
|
|
|
bool tcs_in_out_eq,
|
2024-10-02 16:48:39 -04:00
|
|
|
uint64_t tcs_inputs_via_temp,
|
|
|
|
|
uint64_t tcs_inputs_via_lds);
|
2021-02-09 19:19:53 +01:00
|
|
|
|
2025-02-18 17:03:02 +01:00
|
|
|
bool
|
2024-11-16 21:33:36 -05:00
|
|
|
ac_nir_lower_hs_outputs_to_mem(nir_shader *shader, const nir_tcs_info *info,
|
2025-04-23 17:36:47 -04:00
|
|
|
const ac_nir_tess_io_info *io_info,
|
2022-05-12 15:48:24 +02:00
|
|
|
ac_nir_map_io_driver_location map,
|
2022-05-12 02:50:17 -04:00
|
|
|
enum amd_gfx_level gfx_level,
|
2024-09-29 23:17:21 -04:00
|
|
|
unsigned wave_size);
|
2021-02-09 19:19:53 +01:00
|
|
|
|
2025-02-18 17:03:56 +01:00
|
|
|
bool
|
2021-02-09 19:19:53 +01:00
|
|
|
ac_nir_lower_tes_inputs_to_mem(nir_shader *shader,
|
2022-05-23 17:26:00 +08:00
|
|
|
ac_nir_map_io_driver_location map);
|
2021-02-09 19:19:53 +01:00
|
|
|
|
2024-10-14 21:05:31 -04:00
|
|
|
void
|
2025-04-23 16:51:16 -04:00
|
|
|
ac_nir_compute_tess_wg_info(const struct radeon_info *info, const ac_nir_tess_io_info *io_info,
|
|
|
|
|
unsigned tcs_vertices_out, unsigned wave_size, bool tess_uses_primid,
|
2024-10-14 21:05:31 -04:00
|
|
|
unsigned num_tcs_input_cp, unsigned lds_input_vertex_size,
|
2025-04-23 21:43:59 -04:00
|
|
|
unsigned num_remapped_tess_level_outputs, unsigned *num_patches_per_wg,
|
2025-10-03 11:53:39 +02:00
|
|
|
unsigned *lds_size);
|
2024-10-14 21:05:31 -04:00
|
|
|
|
2025-02-18 17:04:46 +01:00
|
|
|
bool
|
2021-03-02 15:30:58 +01:00
|
|
|
ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
|
2022-05-12 15:48:24 +02:00
|
|
|
ac_nir_map_io_driver_location map,
|
2022-05-12 02:50:17 -04:00
|
|
|
enum amd_gfx_level gfx_level,
|
2024-06-20 17:11:23 +02:00
|
|
|
unsigned esgs_itemsize,
|
|
|
|
|
uint64_t gs_inputs_read);
|
2021-03-02 15:30:58 +01:00
|
|
|
|
2025-02-18 17:05:34 +01:00
|
|
|
bool
|
2021-03-02 15:30:58 +01:00
|
|
|
ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
|
2022-05-12 15:48:24 +02:00
|
|
|
ac_nir_map_io_driver_location map,
|
2022-05-30 19:46:15 +08:00
|
|
|
enum amd_gfx_level gfx_level,
|
|
|
|
|
bool triangle_strip_adjacency_fix);
|
2021-03-02 15:30:58 +01:00
|
|
|
|
2021-04-22 16:14:32 +02:00
|
|
|
bool
|
2026-02-18 13:27:38 +00:00
|
|
|
ac_nir_lower_indirect_derefs(nir_shader *shader);
|
2021-04-22 16:14:32 +02:00
|
|
|
|
2022-10-14 17:15:39 +01:00
|
|
|
typedef struct {
|
2026-02-26 15:24:17 +00:00
|
|
|
const struct ac_cu_info *cu_info;
|
2022-10-14 17:15:39 +01:00
|
|
|
|
|
|
|
|
unsigned max_workgroup_size;
|
|
|
|
|
unsigned wave_size;
|
2026-02-26 15:21:51 +00:00
|
|
|
|
2025-06-23 17:00:52 -04:00
|
|
|
/* The mask of clip and cull distances that the shader should export.
|
|
|
|
|
*
|
|
|
|
|
* Clip/cull distance components that are missing in export_clipdist_mask are removed, improving
|
|
|
|
|
* throughput by up to 50% (3 pos exports -> 2 pos exports). The caller shouldn't set no-op
|
|
|
|
|
* components (>= 0) in export_clipdist_mask to remove those completely. No-op components
|
|
|
|
|
* should be determined by nir_opt_clip_cull_const before this.
|
2025-06-25 08:31:34 -04:00
|
|
|
*
|
|
|
|
|
* If can_cull is true, the shader culls cull distances and they are not exported to increase
|
|
|
|
|
* throughput by reducing the number of pos exports. cull_clipdist_mask must be set to include
|
|
|
|
|
* all cull distances that are < 0. The best case scenario is 100% increase in throughput from
|
|
|
|
|
* not exporting any cull distances (2 pos exports -> 1 pos export).
|
2025-06-23 17:00:52 -04:00
|
|
|
*/
|
2025-05-30 06:17:45 -04:00
|
|
|
uint8_t export_clipdist_mask;
|
2026-02-26 15:21:51 +00:00
|
|
|
|
|
|
|
|
const uint8_t *vs_output_param_offset; /* GFX11+ */
|
|
|
|
|
bool has_param_exports;
|
|
|
|
|
bool has_gen_prim_query;
|
|
|
|
|
bool has_ms_gs_invocations_query;
|
|
|
|
|
|
|
|
|
|
/* VS/GS */
|
2025-05-29 08:19:53 -04:00
|
|
|
/* The mask of clip and cull distances that the shader should cull against.
|
|
|
|
|
* If no clip and cull distance outputs are present, it will load clip planes and cull
|
|
|
|
|
* either against CLIP_VERTEX or POS.
|
|
|
|
|
*/
|
|
|
|
|
uint8_t cull_clipdist_mask;
|
2025-05-26 06:26:27 -04:00
|
|
|
bool write_pos_to_clipvertex;
|
2025-08-05 16:19:30 -04:00
|
|
|
bool can_cull; /* if true, cull distances are not exported because the shader culls against them */
|
2022-10-14 17:15:39 +01:00
|
|
|
bool disable_streamout;
|
2022-11-03 09:02:14 +01:00
|
|
|
bool has_xfb_prim_query;
|
2024-03-04 05:50:09 -05:00
|
|
|
bool use_gfx12_xfb_intrinsic;
|
2024-01-01 19:24:34 -05:00
|
|
|
bool has_gs_primitives_query;
|
2022-12-24 14:55:29 +08:00
|
|
|
bool force_vrs;
|
2024-11-23 12:11:45 +01:00
|
|
|
bool compact_primitives;
|
2025-02-09 16:15:41 -05:00
|
|
|
/* Skip culling dependent on the viewport state, which is frustum culling and small prim
|
|
|
|
|
* culling. Set this when the shader writes the viewport index.
|
|
|
|
|
*/
|
2025-03-07 09:41:40 -05:00
|
|
|
bool skip_viewport_state_culling;
|
2025-02-03 01:58:33 -05:00
|
|
|
/* Use the point-triangle intersection to cull small triangles. */
|
|
|
|
|
bool use_point_tri_intersection;
|
2022-10-14 17:15:39 +01:00
|
|
|
|
|
|
|
|
/* VS */
|
|
|
|
|
unsigned num_vertices_per_primitive;
|
|
|
|
|
bool early_prim_export;
|
|
|
|
|
bool passthrough;
|
|
|
|
|
bool use_edgeflags;
|
2022-12-24 14:55:29 +08:00
|
|
|
bool export_primitive_id;
|
2024-11-19 14:57:25 +01:00
|
|
|
bool export_primitive_id_per_prim;
|
2022-10-14 17:15:39 +01:00
|
|
|
uint32_t instance_rate_inputs;
|
2026-02-26 15:21:51 +00:00
|
|
|
|
|
|
|
|
/* MS */
|
|
|
|
|
bool multiview;
|
2022-10-14 17:15:39 +01:00
|
|
|
} ac_nir_lower_ngg_options;
|
|
|
|
|
|
2025-02-18 17:06:44 +01:00
|
|
|
bool
|
2025-05-28 08:30:31 -04:00
|
|
|
ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options,
|
2025-06-02 22:27:41 -04:00
|
|
|
uint32_t *out_lds_vertex_size, uint8_t *out_lds_scratch_size);
|
2021-04-09 16:56:57 +02:00
|
|
|
|
2025-02-18 17:07:28 +01:00
|
|
|
bool
|
2025-05-28 12:43:12 -04:00
|
|
|
ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options,
|
2025-06-02 22:27:41 -04:00
|
|
|
uint32_t *out_lds_vertex_size, uint8_t *out_lds_scratch_size);
|
2021-04-22 14:43:54 +02:00
|
|
|
|
2025-02-18 17:08:18 +01:00
|
|
|
bool
|
2026-02-26 15:21:51 +00:00
|
|
|
ac_nir_lower_ngg_mesh(nir_shader *shader, const ac_nir_lower_ngg_options *options,
|
|
|
|
|
bool *out_needs_scratch_ring);
|
2021-08-29 10:32:01 +02:00
|
|
|
|
2025-02-18 17:09:54 +01:00
|
|
|
bool
|
2022-01-15 13:56:13 +01:00
|
|
|
ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
|
2023-09-21 13:53:59 +02:00
|
|
|
bool has_query);
|
2022-01-15 13:56:13 +01:00
|
|
|
|
2025-02-18 17:10:36 +01:00
|
|
|
bool
|
2025-12-18 22:46:58 -06:00
|
|
|
ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader);
|
2022-01-15 13:56:13 +01:00
|
|
|
|
radv,ac/nir: lower global access to _amd global access intrinsics
fossil-db (Sienna Cichlid):
Totals from 400 (0.30% of 134621) affected shaders:
VGPRs: 18696 -> 18688 (-0.04%)
CodeSize: 2031348 -> 1946640 (-4.17%)
Instrs: 374703 -> 360226 (-3.86%)
Latency: 4200727 -> 4108628 (-2.19%); split: -2.20%, +0.01%
InvThroughput: 1059935 -> 1029441 (-2.88%); split: -2.88%, +0.00%
VClause: 5777 -> 5771 (-0.10%)
SClause: 11890 -> 10891 (-8.40%); split: -8.57%, +0.17%
Copies: 34035 -> 33259 (-2.28%); split: -2.98%, +0.70%
Branches: 11108 -> 11100 (-0.07%); split: -0.08%, +0.01%
PreSGPRs: 15999 -> 15942 (-0.36%); split: -0.44%, +0.08%
PreVGPRs: 16994 -> 16970 (-0.14%)
fossil-db (Polaris10):
Totals from 400 (0.29% of 135668) affected shaders:
SGPRs: 23799 -> 22919 (-3.70%); split: -4.30%, +0.61%
VGPRs: 18480 -> 18472 (-0.04%)
CodeSize: 2090316 -> 2041592 (-2.33%)
Instrs: 395461 -> 385747 (-2.46%); split: -2.46%, +0.00%
Latency: 5045768 -> 5020196 (-0.51%); split: -0.53%, +0.02%
InvThroughput: 2694320 -> 2689886 (-0.16%); split: -0.23%, +0.07%
VClause: 5982 -> 5968 (-0.23%)
SClause: 12064 -> 10823 (-10.29%); split: -10.33%, +0.04%
Copies: 48233 -> 48322 (+0.18%); split: -0.47%, +0.65%
PreSGPRs: 16409 -> 16358 (-0.31%); split: -0.39%, +0.08%
fossil-db (Pitcairn):
Totals from 400 (0.29% of 135668) affected shaders:
SGPRs: 22431 -> 22215 (-0.96%); split: -2.60%, +1.64%
VGPRs: 18776 -> 18560 (-1.15%); split: -1.21%, +0.06%
CodeSize: 2104440 -> 2017708 (-4.12%)
MaxWaves: 2363 -> 2367 (+0.17%)
Instrs: 413099 -> 397446 (-3.79%)
Latency: 5507707 -> 5450251 (-1.04%); split: -1.12%, +0.07%
InvThroughput: 2838867 -> 2786903 (-1.83%); split: -1.83%, +0.00%
VClause: 10334 -> 10097 (-2.29%)
SClause: 12346 -> 11005 (-10.86%); split: -10.89%, +0.02%
Copies: 54034 -> 52065 (-3.64%); split: -3.99%, +0.35%
PreSGPRs: 17916 -> 17857 (-0.33%); split: -0.40%, +0.07%
PreVGPRs: 16917 -> 16893 (-0.14%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14124>
2021-12-02 14:35:15 +00:00
|
|
|
bool
|
|
|
|
|
ac_nir_lower_global_access(nir_shader *shader);
|
|
|
|
|
|
2022-07-20 11:23:26 -04:00
|
|
|
bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
|
2022-10-28 17:21:07 -04:00
|
|
|
bool ac_nir_lower_image_opcodes(nir_shader *nir);
|
2022-07-20 11:23:26 -04:00
|
|
|
|
2025-02-18 17:11:19 +01:00
|
|
|
bool
|
2022-12-23 17:17:54 +08:00
|
|
|
ac_nir_lower_legacy_vs(nir_shader *nir,
|
|
|
|
|
enum amd_gfx_level gfx_level,
|
2025-05-30 06:28:22 -04:00
|
|
|
uint32_t export_clipdist_mask,
|
2025-05-26 06:26:27 -04:00
|
|
|
bool write_pos_to_clipvertex,
|
2022-12-23 17:17:54 +08:00
|
|
|
const uint8_t *param_offsets,
|
|
|
|
|
bool has_param_exports,
|
|
|
|
|
bool export_primitive_id,
|
|
|
|
|
bool disable_streamout,
|
|
|
|
|
bool force_vrs);
|
2022-09-30 19:29:43 +01:00
|
|
|
|
2025-05-31 11:14:30 -04:00
|
|
|
typedef struct {
|
|
|
|
|
bool has_gen_prim_query;
|
|
|
|
|
bool has_pipeline_stats_query;
|
|
|
|
|
|
|
|
|
|
enum amd_gfx_level gfx_level;
|
|
|
|
|
uint32_t export_clipdist_mask;
|
|
|
|
|
bool write_pos_to_clipvertex;
|
|
|
|
|
const uint8_t *param_offsets;
|
|
|
|
|
bool has_param_exports;
|
|
|
|
|
bool disable_streamout;
|
|
|
|
|
bool force_vrs;
|
|
|
|
|
} ac_nir_lower_legacy_gs_options;
|
|
|
|
|
|
2025-06-01 16:39:35 -04:00
|
|
|
typedef struct {
|
|
|
|
|
uint8_t num_components_per_stream[4];
|
|
|
|
|
} ac_nir_legacy_gs_info;
|
|
|
|
|
|
2025-02-18 17:12:18 +01:00
|
|
|
bool
|
2025-05-31 11:14:30 -04:00
|
|
|
ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options,
|
2025-06-01 16:39:35 -04:00
|
|
|
nir_shader **gs_copy_shader, ac_nir_legacy_gs_info *out_info);
|
2022-11-30 17:57:37 +08:00
|
|
|
|
2025-01-14 22:18:07 -05:00
|
|
|
/* This is a pre-link pass. It should only eliminate code and do lowering that mostly doesn't
|
|
|
|
|
* generate AMD-specific intrinsics.
|
|
|
|
|
*/
|
2023-02-25 21:14:52 +08:00
|
|
|
typedef struct {
|
2024-12-28 11:27:56 -05:00
|
|
|
/* System values. */
|
2025-01-29 21:55:12 -05:00
|
|
|
bool msaa_disabled; /* true if MSAA is disabled, false may mean that the state is unknown */
|
2025-01-05 12:45:06 -05:00
|
|
|
bool uses_vrs_coarse_shading;
|
2024-12-29 12:58:34 -05:00
|
|
|
bool load_sample_positions_always_loads_current_ones;
|
|
|
|
|
bool dynamic_rasterization_samples;
|
2024-12-29 12:58:34 -05:00
|
|
|
int force_front_face; /* 0 -> keep, 1 -> set to true, -1 -> set to false */
|
2025-01-02 18:00:22 -05:00
|
|
|
bool optimize_frag_coord; /* TODO: remove this after RADV can handle it */
|
2025-01-15 21:10:35 -05:00
|
|
|
bool frag_coord_is_center; /* GL requirement for sample shading */
|
2025-01-02 18:00:22 -05:00
|
|
|
|
|
|
|
|
/* frag_coord/pixel_coord:
|
2025-01-29 21:55:12 -05:00
|
|
|
* allow_pixel_coord && (frag_coord_is_center || ps_iter_samples == 1 || msaa_disabled ||
|
2025-01-02 18:00:22 -05:00
|
|
|
* the fractional part of frag_coord.xy isn't used):
|
|
|
|
|
* * frag_coord.xy is replaced by u2f(pixel_coord) + 0.5.
|
|
|
|
|
* else:
|
|
|
|
|
* * pixel_coord is replaced by f2u16(frag_coord.xy)
|
|
|
|
|
* * ps_iter_samples == 0 means the state is unknown.
|
|
|
|
|
*
|
|
|
|
|
* barycentrics:
|
2025-01-29 21:55:12 -05:00
|
|
|
* msaa_disabled:
|
2025-01-05 12:45:06 -05:00
|
|
|
* * All barycentrics including at_sample but excluding at_offset are changed to
|
|
|
|
|
* barycentric_pixel
|
2025-01-02 22:08:16 -05:00
|
|
|
* ps_iter_samples >= 2:
|
|
|
|
|
* * All barycentrics are changed to per-sample interpolation except at_offset/at_sample.
|
|
|
|
|
* * barycentric_at_sample(sample_id) is replaced by barycentric_sample.
|
|
|
|
|
*
|
|
|
|
|
* sample_mask_in:
|
2025-01-29 21:55:12 -05:00
|
|
|
* msaa_disabled && !uses_vrs_coarse_shading:
|
2025-01-05 12:45:06 -05:00
|
|
|
* * sample_mask_in is replaced by b2i32(!helper_invocation)
|
2025-01-02 22:08:16 -05:00
|
|
|
* ps_iter_samples == 2, 4:
|
|
|
|
|
* * sample_mask_in is changed to (sample_mask_in & (ps_iter_mask << sample_id))
|
|
|
|
|
* ps_iter_samples == 8:
|
|
|
|
|
* * sample_mask_in is replaced by 1 << sample_id.
|
|
|
|
|
*
|
|
|
|
|
* When ps_iter_samples is equal to rasterization samples, set ps_iter_samples = 8 for this pass.
|
|
|
|
|
*/
|
|
|
|
|
unsigned ps_iter_samples;
|
2023-03-02 20:08:14 +08:00
|
|
|
|
2024-12-31 09:39:44 -05:00
|
|
|
/* fbfetch_output */
|
|
|
|
|
bool fbfetch_is_1D;
|
|
|
|
|
bool fbfetch_layered;
|
|
|
|
|
bool fbfetch_msaa;
|
|
|
|
|
bool fbfetch_apply_fmask;
|
|
|
|
|
|
2025-11-22 22:07:50 -05:00
|
|
|
/* Inputs. */
|
|
|
|
|
bool lower_color_inputs_to_load_color01;
|
|
|
|
|
|
2024-12-28 11:27:56 -05:00
|
|
|
/* Outputs. */
|
|
|
|
|
bool clamp_color; /* GL only */
|
|
|
|
|
bool alpha_test_alpha_to_one; /* GL only, this only affects alpha test */
|
|
|
|
|
enum compare_func alpha_func; /* GL only */
|
|
|
|
|
bool keep_alpha_for_mrtz; /* this prevents killing alpha based on spi_shader_col_format_hint */
|
|
|
|
|
unsigned spi_shader_col_format_hint; /* this only shrinks and eliminates output stores */
|
2024-12-12 21:32:41 -05:00
|
|
|
bool kill_z;
|
|
|
|
|
bool kill_stencil;
|
|
|
|
|
bool kill_samplemask;
|
2024-12-28 11:27:56 -05:00
|
|
|
} ac_nir_lower_ps_early_options;
|
|
|
|
|
|
2024-12-31 10:11:23 -05:00
|
|
|
bool
|
2024-12-28 11:27:56 -05:00
|
|
|
ac_nir_lower_ps_early(nir_shader *nir, const ac_nir_lower_ps_early_options *options);
|
|
|
|
|
|
2025-01-14 22:18:07 -05:00
|
|
|
/* This is a post-link pass. It shouldn't eliminate any code and it shouldn't affect shader_info
|
|
|
|
|
* (those should be done in the early pass).
|
|
|
|
|
*/
|
2024-12-28 11:27:56 -05:00
|
|
|
typedef struct {
|
|
|
|
|
enum amd_gfx_level gfx_level;
|
|
|
|
|
bool use_aco;
|
|
|
|
|
|
|
|
|
|
/* System values. */
|
|
|
|
|
bool bc_optimize_for_persp;
|
|
|
|
|
bool bc_optimize_for_linear;
|
|
|
|
|
|
|
|
|
|
/* Exports. */
|
|
|
|
|
bool uses_discard;
|
2025-03-07 12:47:41 +01:00
|
|
|
bool dcc_decompress_gfx11;
|
2024-12-28 11:27:56 -05:00
|
|
|
bool alpha_to_coverage_via_mrtz;
|
|
|
|
|
bool dual_src_blend_swizzle;
|
|
|
|
|
unsigned spi_shader_col_format;
|
|
|
|
|
unsigned color_is_int8;
|
|
|
|
|
unsigned color_is_int10;
|
|
|
|
|
bool alpha_to_one;
|
2023-02-25 21:14:52 +08:00
|
|
|
|
|
|
|
|
/* Vulkan only */
|
|
|
|
|
unsigned enable_mrt_output_nan_fixup;
|
2023-03-29 11:50:18 +08:00
|
|
|
bool no_color_export;
|
2023-11-16 16:51:28 +01:00
|
|
|
bool no_depth_export;
|
2024-12-28 11:27:56 -05:00
|
|
|
} ac_nir_lower_ps_late_options;
|
2023-02-25 21:14:52 +08:00
|
|
|
|
2024-12-31 10:11:23 -05:00
|
|
|
bool
|
2024-12-28 11:27:56 -05:00
|
|
|
ac_nir_lower_ps_late(nir_shader *nir, const ac_nir_lower_ps_late_options *options);
|
2023-02-25 21:14:52 +08:00
|
|
|
|
2023-04-05 16:58:43 +01:00
|
|
|
typedef struct {
|
|
|
|
|
enum amd_gfx_level gfx_level;
|
2023-04-25 15:37:02 +01:00
|
|
|
|
|
|
|
|
/* If true, round the layer component of the coordinates source to the nearest
|
2023-07-12 21:14:26 +01:00
|
|
|
* integer for all array ops. This is always done for cube array ops.
|
2023-04-25 15:37:02 +01:00
|
|
|
*/
|
|
|
|
|
bool lower_array_layer_round_even;
|
2023-04-24 12:21:04 +01:00
|
|
|
|
|
|
|
|
/* Fix derivatives of constants and FS inputs in control flow.
|
|
|
|
|
*
|
|
|
|
|
* Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads,
|
|
|
|
|
* pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and
|
|
|
|
|
* texture samples with nir_tex_src_min_lod.
|
|
|
|
|
*
|
|
|
|
|
* The layer must also be a constant or FS input.
|
|
|
|
|
*/
|
|
|
|
|
bool fix_derivs_in_divergent_cf;
|
|
|
|
|
unsigned max_wqm_vgprs;
|
2026-01-22 15:34:05 -05:00
|
|
|
} ac_nir_lower_image_tex_options;
|
2023-04-05 16:58:43 +01:00
|
|
|
|
|
|
|
|
bool
|
2026-01-22 15:34:05 -05:00
|
|
|
ac_nir_lower_image_tex(nir_shader *nir, const ac_nir_lower_image_tex_options *options);
|
2023-04-05 16:58:43 +01:00
|
|
|
|
2023-11-10 19:14:34 -05:00
|
|
|
void
|
|
|
|
|
ac_nir_store_debug_log_amd(nir_builder *b, nir_def *uvec4);
|
|
|
|
|
|
2024-05-15 03:31:32 +02:00
|
|
|
unsigned
|
|
|
|
|
ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer);
|
|
|
|
|
|
2024-09-07 14:49:47 +02:00
|
|
|
bool
|
|
|
|
|
ac_nir_opt_shared_append(nir_shader *shader);
|
|
|
|
|
|
ac/nir,radv,radeonsi: flip branches to avoid waitcnts
fossil-db (navi31):
Totals from 5123 (6.42% of 79825) affected shaders:
Instrs: 12712435 -> 12703672 (-0.07%); split: -0.12%, +0.05%
CodeSize: 67068852 -> 67033244 (-0.05%); split: -0.10%, +0.05%
VGPRs: 363896 -> 363956 (+0.02%)
SpillSGPRs: 5035 -> 5074 (+0.77%); split: -0.83%, +1.61%
Latency: 115048972 -> 111944013 (-2.70%); split: -2.89%, +0.19%
InvThroughput: 19102126 -> 18696069 (-2.13%); split: -2.34%, +0.22%
VClause: 258693 -> 258770 (+0.03%); split: -0.01%, +0.04%
SClause: 346271 -> 346225 (-0.01%); split: -0.02%, +0.00%
Copies: 1040815 -> 1042017 (+0.12%); split: -0.23%, +0.34%
Branches: 332467 -> 332565 (+0.03%); split: -0.04%, +0.07%
PreSGPRs: 304888 -> 304699 (-0.06%); split: -0.10%, +0.04%
PreVGPRs: 296652 -> 296654 (+0.00%)
VALU: 7591803 -> 7594601 (+0.04%); split: -0.01%, +0.05%
SALU: 1454420 -> 1455764 (+0.09%); split: -0.24%, +0.33%
VOPD: 1826 -> 1810 (-0.88%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38262>
2025-11-12 11:09:02 +00:00
|
|
|
bool
|
|
|
|
|
ac_nir_opt_flip_if_for_mem_loads(nir_shader *shader);
|
|
|
|
|
|
2024-10-22 14:37:19 +01:00
|
|
|
bool
|
2025-10-08 19:15:50 +02:00
|
|
|
ac_nir_flag_smem_for_loads(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm);
|
2024-10-22 14:37:19 +01:00
|
|
|
|
2025-12-19 12:35:28 -06:00
|
|
|
bool
|
|
|
|
|
ac_nir_fixup_mem_access_gfx6(nir_shader *shader,
|
|
|
|
|
struct ac_shader_args *args,
|
|
|
|
|
const uint32_t padding_bytes,
|
|
|
|
|
const bool fixup_null_desc,
|
|
|
|
|
const bool fixup_robust_oob);
|
|
|
|
|
|
2024-10-22 14:47:32 +01:00
|
|
|
bool
|
|
|
|
|
ac_nir_lower_mem_access_bit_sizes(nir_shader *shader, enum amd_gfx_level gfx_level, bool use_llvm);
|
|
|
|
|
|
2024-12-29 21:27:21 -05:00
|
|
|
bool
|
|
|
|
|
ac_nir_optimize_uniform_atomics(nir_shader *nir);
|
|
|
|
|
|
2025-01-09 15:54:00 -06:00
|
|
|
unsigned
|
|
|
|
|
ac_nir_lower_bit_size_callback(const nir_instr *instr, void *data);
|
|
|
|
|
|
2025-02-03 16:33:46 +00:00
|
|
|
bool
|
|
|
|
|
ac_nir_might_lower_bit_size(const nir_shader *shader);
|
|
|
|
|
|
2025-01-09 16:00:42 -06:00
|
|
|
bool
|
|
|
|
|
ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
|
|
|
|
|
unsigned num_components, int64_t hole_size,
|
|
|
|
|
nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data);
|
|
|
|
|
|
|
|
|
|
bool
|
2025-10-20 16:53:21 -04:00
|
|
|
ac_nir_scalarize_overfetching_loads_callback(const nir_intrinsic_instr *intr, const void *data);
|
2025-01-09 16:00:42 -06:00
|
|
|
|
2025-08-13 18:22:12 +02:00
|
|
|
bool
|
|
|
|
|
ac_nir_store_may_be_subdword(const nir_intrinsic_instr *instr);
|
2025-01-09 15:57:25 -06:00
|
|
|
|
radv: don't lower subdword phis to scalar
Totals from 193 (0.24% of 79839) affected shaders: (Navi48)
MaxWaves: 6004 -> 6024 (+0.33%)
Instrs: 169276 -> 166784 (-1.47%); split: -3.01%, +1.53%
CodeSize: 940608 -> 915768 (-2.64%); split: -4.29%, +1.64%
VGPRs: 8012 -> 7716 (-3.69%); split: -3.99%, +0.30%
SpillVGPRs: 185 -> 0 (-inf%)
Scratch: 13568 -> 0 (-inf%)
Latency: 2159787 -> 2147084 (-0.59%); split: -2.86%, +2.28%
InvThroughput: 664022 -> 395859 (-40.38%); split: -42.59%, +2.21%
VClause: 2998 -> 2880 (-3.94%); split: -4.27%, +0.33%
SClause: 3117 -> 3120 (+0.10%)
Copies: 21290 -> 16278 (-23.54%); split: -24.74%, +1.20%
Branches: 4757 -> 4760 (+0.06%); split: -0.34%, +0.40%
PreSGPRs: 7369 -> 7378 (+0.12%); split: -0.11%, +0.23%
PreVGPRs: 4257 -> 3859 (-9.35%); split: -9.94%, +0.59%
VALU: 83173 -> 79804 (-4.05%); split: -5.68%, +1.63%
SALU: 36672 -> 37318 (+1.76%); split: -0.02%, +1.78%
VMEM: 4012 -> 3762 (-6.23%); split: -6.83%, +0.60%
SMEM: 4300 -> 4303 (+0.07%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35784>
2025-06-26 17:36:16 +02:00
|
|
|
uint8_t
|
|
|
|
|
ac_nir_lower_phis_to_scalar_cb(const nir_instr *instr, const void *_);
|
|
|
|
|
|
2025-09-18 10:12:47 +02:00
|
|
|
bool
|
|
|
|
|
ac_nir_allow_offset_wrap_cb(nir_intrinsic_instr *instr, const void *data);
|
|
|
|
|
|
2025-11-26 21:08:51 -05:00
|
|
|
bool
|
|
|
|
|
ac_nir_op_supports_packed_math_16bit(const nir_alu_instr* alu);
|
|
|
|
|
|
2025-11-21 22:19:41 -05:00
|
|
|
uint8_t
|
|
|
|
|
ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data);
|
|
|
|
|
|
2021-02-09 19:19:53 +01:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#endif /* AC_NIR_H */
|