mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
radeonsi: move shader variant info and spi_ps_input_ena code into its own file
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34492>
This commit is contained in:
parent
2e8cac328a
commit
af8c4f19ab
6 changed files with 378 additions and 373 deletions
|
|
@ -65,6 +65,7 @@ files_libradeonsi = files(
|
|||
'si_shader_info.h',
|
||||
'si_shader_internal.h',
|
||||
'si_shader_nir.c',
|
||||
'si_shader_variant_info.c',
|
||||
'si_shaderlib_nir.c',
|
||||
'si_sqtt.c',
|
||||
'si_state.c',
|
||||
|
|
|
|||
|
|
@ -4,23 +4,15 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "si_shader.h"
|
||||
#include "ac_nir.h"
|
||||
#include "ac_rtld.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_range_analysis.h"
|
||||
#include "nir_serialize.h"
|
||||
#include "nir_tcs_info.h"
|
||||
#include "nir_xfb_info.h"
|
||||
#include "si_pipe.h"
|
||||
#include "si_shader_internal.h"
|
||||
#include "sid.h"
|
||||
#include "tgsi/tgsi_from_mesa.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
|
||||
static const char scratch_rsrc_dword0_symbol[] = "SCRATCH_RSRC_DWORD0";
|
||||
static const char scratch_rsrc_dword1_symbol[] = "SCRATCH_RSRC_DWORD1";
|
||||
|
|
@ -1800,7 +1792,7 @@ static void si_nir_assign_param_offsets(nir_shader *nir, struct si_shader *shade
|
|||
int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS])
|
||||
{
|
||||
struct si_shader_selector *sel = shader->selector;
|
||||
struct si_shader_binary_info *info = &shader->info;
|
||||
struct si_shader_variant_info *info = &shader->info;
|
||||
|
||||
uint64_t outputs_written = 0;
|
||||
uint32_t outputs_written_16bit = 0;
|
||||
|
|
@ -2417,367 +2409,6 @@ static void get_prev_stage_input_nir(struct si_shader *shader, struct si_linked_
|
|||
get_input_nir(&linked->producer_shader, &linked->producer);
|
||||
}
|
||||
|
||||
static void si_set_spi_ps_input_config_for_separate_prolog(struct si_shader *shader)
|
||||
{
|
||||
const union si_shader_key *key = &shader->key;
|
||||
|
||||
/* Enable POS_FIXED_PT if polygon stippling is enabled. */
|
||||
if (key->ps.part.prolog.poly_stipple)
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
|
||||
|
||||
/* Set up the enable bits for per-sample shading if needed. */
|
||||
if (key->ps.part.prolog.force_persp_sample_interp &&
|
||||
(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
||||
}
|
||||
if (key->ps.part.prolog.force_linear_sample_interp &&
|
||||
(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
|
||||
}
|
||||
if (key->ps.part.prolog.force_persp_center_interp &&
|
||||
(G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
|
||||
}
|
||||
if (key->ps.part.prolog.force_linear_center_interp &&
|
||||
(G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
|
||||
}
|
||||
|
||||
/* The sample mask fixup requires the sample ID. */
|
||||
if (key->ps.part.prolog.samplemask_log_ps_iter)
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1);
|
||||
|
||||
if (key->ps.part.prolog.force_samplemask_to_helper_invocation) {
|
||||
assert(key->ps.part.prolog.samplemask_log_ps_iter == 0);
|
||||
assert(!key->ps.mono.poly_line_smoothing);
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
|
||||
}
|
||||
|
||||
/* The sample mask fixup has an optimization that replaces the sample mask with the sample ID. */
|
||||
if (key->ps.part.prolog.samplemask_log_ps_iter == 3)
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
|
||||
|
||||
if (key->ps.part.prolog.get_frag_coord_from_pixel_coord) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_POS_X_FLOAT_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_POS_Y_FLOAT_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_fixup_spi_ps_input_config(struct si_shader *shader)
|
||||
{
|
||||
/* POW_W_FLOAT requires that one of the perspective weights is enabled. */
|
||||
if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
|
||||
!(shader->config.spi_ps_input_ena & 0xf)) {
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
||||
}
|
||||
|
||||
/* At least one pair of interpolation weights must be enabled. */
|
||||
if (!(shader->config.spi_ps_input_ena & 0x7f))
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
||||
}
|
||||
|
||||
static void
|
||||
si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir)
|
||||
{
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
assert(nir->info.use_aco_amd == si_shader_uses_aco(shader));
|
||||
const BITSET_WORD *sysvals = nir->info.system_values_read;
|
||||
/* Find out which frag coord components are used. */
|
||||
uint8_t frag_coord_mask = 0;
|
||||
|
||||
nir_divergence_analysis(nir);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* Since flat+convergent and non-flat components can occur in the same vec4, start with
|
||||
* all PS inputs as flat and change them to smooth when we find a component that's
|
||||
* interpolated.
|
||||
*/
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(shader->info.ps_inputs); i++)
|
||||
shader->info.ps_inputs[i].interpolate = INTERP_MODE_FLAT;
|
||||
}
|
||||
|
||||
nir_foreach_block(block, nir_shader_get_entrypoint(nir)) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_instance_id:
|
||||
shader->info.uses_instance_id = true;
|
||||
break;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
shader->info.uses_base_instance = true;
|
||||
break;
|
||||
case nir_intrinsic_load_draw_id:
|
||||
shader->info.uses_draw_id = true;
|
||||
break;
|
||||
case nir_intrinsic_load_frag_coord:
|
||||
case nir_intrinsic_load_sample_pos:
|
||||
frag_coord_mask |= nir_def_components_read(&intr->def);
|
||||
break;
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_input_vertex:
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
case nir_intrinsic_load_interpolated_input: {
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_input) {
|
||||
if ((shader->key.ge.mono.instance_divisor_is_one |
|
||||
shader->key.ge.mono.instance_divisor_is_fetched) &
|
||||
BITFIELD_BIT(nir_intrinsic_base(intr))) {
|
||||
/* Instanced attribs. */
|
||||
shader->info.uses_instance_id = true;
|
||||
shader->info.uses_base_instance = true;
|
||||
}
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
unsigned index = nir_intrinsic_base(intr);
|
||||
assert(sem.num_slots == 1);
|
||||
|
||||
shader->info.num_ps_inputs = MAX2(shader->info.num_ps_inputs, index + 1);
|
||||
shader->info.ps_inputs[index].semantic = sem.location;
|
||||
/* Determine interpolation mode. This only cares about FLAT/SMOOTH/COLOR.
|
||||
* COLOR is only for nir_intrinsic_load_color0/1.
|
||||
*/
|
||||
if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
|
||||
shader->info.ps_inputs[index].interpolate = INTERP_MODE_SMOOTH;
|
||||
if (intr->def.bit_size == 16)
|
||||
shader->info.ps_inputs[index].fp16_lo_hi_valid |= 0x1 << sem.high_16bits;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_color0:
|
||||
assert(!shader->is_monolithic);
|
||||
shader->info.ps_colors_read |= nir_def_components_read(&intr->def);
|
||||
break;
|
||||
case nir_intrinsic_load_color1:
|
||||
assert(!shader->is_monolithic);
|
||||
shader->info.ps_colors_read |= nir_def_components_read(&intr->def) << 4;
|
||||
break;
|
||||
case nir_intrinsic_load_ubo:
|
||||
if (intr->src[1].ssa->divergent)
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
break;
|
||||
case nir_intrinsic_load_constant:
|
||||
if (intr->src[0].ssa->divergent)
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
break;
|
||||
/* Global */
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_global_atomic:
|
||||
case nir_intrinsic_global_atomic_swap:
|
||||
/* SSBOs (this list is from si_nir_lower_resource.c) */
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
/* Images (this list is from si_nir_lower_resource.c) */
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_deref_sparse_load:
|
||||
case nir_intrinsic_image_deref_fragment_mask_load_amd:
|
||||
case nir_intrinsic_image_deref_atomic:
|
||||
case nir_intrinsic_image_deref_atomic_swap:
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
case nir_intrinsic_bindless_image_fragment_mask_load_amd:
|
||||
case nir_intrinsic_bindless_image_atomic:
|
||||
case nir_intrinsic_bindless_image_atomic_swap:
|
||||
/* Scratch */
|
||||
case nir_intrinsic_load_scratch:
|
||||
/* AMD-specific. */
|
||||
case nir_intrinsic_load_buffer_amd:
|
||||
/* Atomics without return are not treated as loads. */
|
||||
if (nir_def_components_read(&intr->def) &&
|
||||
(!nir_intrinsic_has_atomic_op(intr) ||
|
||||
nir_intrinsic_atomic_op(intr) != nir_atomic_op_ordered_add_gfx12_amd))
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
break;
|
||||
case nir_intrinsic_store_output:
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
|
||||
if (sem.location == FRAG_RESULT_DEPTH)
|
||||
shader->info.writes_z = true;
|
||||
else if (sem.location == FRAG_RESULT_STENCIL)
|
||||
shader->info.writes_stencil = true;
|
||||
else if (sem.location == FRAG_RESULT_SAMPLE_MASK)
|
||||
shader->info.writes_sample_mask = true;
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_demote:
|
||||
case nir_intrinsic_demote_if:
|
||||
case nir_intrinsic_terminate:
|
||||
case nir_intrinsic_terminate_if:
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
shader->info.uses_discard = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
|
||||
shader->info.has_non_uniform_tex_access |= tex->texture_non_uniform || tex->sampler_non_uniform;
|
||||
shader->info.has_shadow_comparison |= tex->is_shadow;
|
||||
|
||||
/* Gather the types of used VMEM instructions that return something. */
|
||||
switch (tex->op) {
|
||||
case nir_texop_tex:
|
||||
case nir_texop_txb:
|
||||
case nir_texop_txl:
|
||||
case nir_texop_txd:
|
||||
case nir_texop_lod:
|
||||
case nir_texop_tg4:
|
||||
shader->info.uses_vmem_sampler_or_bvh = true;
|
||||
break;
|
||||
case nir_texop_txs:
|
||||
case nir_texop_query_levels:
|
||||
case nir_texop_texture_samples:
|
||||
case nir_texop_descriptor_amd:
|
||||
case nir_texop_sampler_descriptor_amd:
|
||||
/* These just return the descriptor or information from it. */
|
||||
break;
|
||||
default:
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* Add both front and back color inputs. */
|
||||
if (!shader->is_monolithic) {
|
||||
unsigned index = shader->info.num_ps_inputs;
|
||||
|
||||
for (unsigned back = 0; back < 2; back++) {
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if ((shader->info.ps_colors_read >> (i * 4)) & 0xf) {
|
||||
assert(index < ARRAY_SIZE(shader->info.ps_inputs));
|
||||
shader->info.ps_inputs[index].semantic =
|
||||
(back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i;
|
||||
|
||||
enum glsl_interp_mode mode = i ? nir->info.fs.color1_interp
|
||||
: nir->info.fs.color0_interp;
|
||||
shader->info.ps_inputs[index].interpolate =
|
||||
mode == INTERP_MODE_NONE ? INTERP_MODE_COLOR : mode;
|
||||
index++;
|
||||
|
||||
/* Back-face colors don't increment num_ps_inputs. si_emit_spi_map will use
|
||||
* back-face colors conditionally only when needed.
|
||||
*/
|
||||
if (!back)
|
||||
shader->info.num_ps_inputs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ACO needs spi_ps_input_ena before si_init_shader_args. */
|
||||
shader->config.spi_ps_input_ena =
|
||||
S_0286CC_PERSP_SAMPLE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE)) |
|
||||
S_0286CC_PERSP_CENTER_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL)) |
|
||||
S_0286CC_PERSP_CENTROID_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID)) |
|
||||
S_0286CC_LINEAR_SAMPLE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE)) |
|
||||
S_0286CC_LINEAR_CENTER_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL)) |
|
||||
S_0286CC_LINEAR_CENTROID_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID)) |
|
||||
S_0286CC_POS_X_FLOAT_ENA(!!(frag_coord_mask & 0x1)) |
|
||||
S_0286CC_POS_Y_FLOAT_ENA(!!(frag_coord_mask & 0x2)) |
|
||||
S_0286CC_POS_Z_FLOAT_ENA(!!(frag_coord_mask & 0x4)) |
|
||||
S_0286CC_POS_W_FLOAT_ENA(!!(frag_coord_mask & 0x8)) |
|
||||
S_0286CC_FRONT_FACE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_FRONT_FACE) |
|
||||
BITSET_TEST(sysvals, SYSTEM_VALUE_FRONT_FACE_FSIGN)) |
|
||||
S_0286CC_ANCILLARY_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_SAMPLE_ID) |
|
||||
BITSET_TEST(sysvals, SYSTEM_VALUE_LAYER_ID)) |
|
||||
S_0286CC_SAMPLE_COVERAGE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_SAMPLE_MASK_IN)) |
|
||||
S_0286CC_POS_FIXED_PT_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_PIXEL_COORD));
|
||||
|
||||
if (shader->is_monolithic) {
|
||||
si_fixup_spi_ps_input_config(shader);
|
||||
shader->config.spi_ps_input_addr = shader->config.spi_ps_input_ena;
|
||||
} else {
|
||||
/* Part mode will call si_fixup_spi_ps_input_config() when combining multi
|
||||
* shader part in si_shader_select_ps_parts().
|
||||
*
|
||||
* Reserve register locations for VGPR inputs the PS prolog may need.
|
||||
*/
|
||||
shader->config.spi_ps_input_addr = shader->config.spi_ps_input_ena |
|
||||
SI_SPI_PS_INPUT_ADDR_FOR_PROLOG;
|
||||
}
|
||||
}
|
||||
|
||||
if (nir->info.stage <= MESA_SHADER_GEOMETRY && nir->xfb_info &&
|
||||
!shader->key.ge.as_ls && !shader->key.ge.as_es) {
|
||||
unsigned num_streamout_dwords = 0;
|
||||
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
num_streamout_dwords += nir->info.xfb_stride[i];
|
||||
shader->info.num_streamout_vec4s = DIV_ROUND_UP(num_streamout_dwords, 4);
|
||||
}
|
||||
}
|
||||
|
||||
/* Late shader variant info for AMD-specific intrinsics. */
|
||||
static void
|
||||
si_get_late_shader_variant_info(struct si_shader *shader, struct si_shader_args *args,
|
||||
nir_shader *nir)
|
||||
{
|
||||
if ((nir->info.stage != MESA_SHADER_VERTEX || nir->info.vs.blit_sgprs_amd) &&
|
||||
nir->info.stage != MESA_SHADER_TESS_EVAL &&
|
||||
(nir->info.stage != MESA_SHADER_GEOMETRY || !shader->key.ge.as_ngg))
|
||||
return;
|
||||
|
||||
nir_foreach_block(block, nir_shader_get_entrypoint(nir)) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type == nir_instr_type_intrinsic &&
|
||||
nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_scalar_arg_amd &&
|
||||
nir_intrinsic_base(nir_instr_as_intrinsic(instr)) == args->vs_state_bits.arg_index) {
|
||||
assert(args->vs_state_bits.used);
|
||||
|
||||
/* Gather which VS_STATE and GS_STATE user SGPR bits are used. */
|
||||
uint32_t bits_used = nir_def_bits_used(nir_instr_def(instr));
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX &&
|
||||
bits_used & ENCODE_FIELD(VS_STATE_INDEXED, ~0))
|
||||
shader->info.uses_vs_state_indexed = true;
|
||||
|
||||
if (!shader->key.ge.as_es && shader->key.ge.as_ngg) {
|
||||
if (bits_used & ENCODE_FIELD(GS_STATE_PROVOKING_VTX_FIRST, ~0))
|
||||
shader->info.uses_gs_state_provoking_vtx_first = true;
|
||||
|
||||
if (bits_used & ENCODE_FIELD(GS_STATE_OUTPRIM, ~0))
|
||||
shader->info.uses_gs_state_outprim = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *linked)
|
||||
{
|
||||
memset(linked, 0, sizeof(*linked));
|
||||
|
|
|
|||
|
|
@ -795,7 +795,7 @@ struct si_shader {
|
|||
/* The following data is all that's needed for binary shaders. */
|
||||
struct si_shader_binary binary;
|
||||
struct ac_shader_config config;
|
||||
struct si_shader_binary_info info;
|
||||
struct si_shader_variant_info info;
|
||||
|
||||
/* Shader key + LLVM IR + disassembly + statistics.
|
||||
* Generated for debug contexts only.
|
||||
|
|
|
|||
|
|
@ -202,7 +202,7 @@ union si_ps_input_info {
|
|||
};
|
||||
|
||||
/* Final shader info from fully compiled and optimized shader variants. */
|
||||
struct si_shader_binary_info {
|
||||
struct si_shader_variant_info {
|
||||
uint8_t vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS];
|
||||
uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS];
|
||||
union si_ps_input_info ps_inputs[SI_NUM_INTERP];
|
||||
|
|
|
|||
|
|
@ -177,4 +177,11 @@ bool si_aco_build_shader_part(struct si_screen *screen, gl_shader_stage stage, b
|
|||
struct util_debug_callback *debug, const char *name,
|
||||
struct si_shader_part *result);
|
||||
|
||||
/* si_shader_variant_info.c */
|
||||
void si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir);
|
||||
void si_get_late_shader_variant_info(struct si_shader *shader, struct si_shader_args *args,
|
||||
nir_shader *nir);
|
||||
void si_set_spi_ps_input_config_for_separate_prolog(struct si_shader *shader);
|
||||
void si_fixup_spi_ps_input_config(struct si_shader *shader);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
366
src/gallium/drivers/radeonsi/si_shader_variant_info.c
Normal file
366
src/gallium/drivers/radeonsi/si_shader_variant_info.c
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
/* Copyright 2025 Advanced Micro Devices, Inc.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "si_shader_internal.h"
|
||||
#include "nir.h"
|
||||
#include "nir_range_analysis.h"
|
||||
#include "sid.h"
|
||||
|
||||
void si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir)
|
||||
{
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
assert(nir->info.use_aco_amd == si_shader_uses_aco(shader));
|
||||
const BITSET_WORD *sysvals = nir->info.system_values_read;
|
||||
/* Find out which frag coord components are used. */
|
||||
uint8_t frag_coord_mask = 0;
|
||||
|
||||
nir_divergence_analysis(nir);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* Since flat+convergent and non-flat components can occur in the same vec4, start with
|
||||
* all PS inputs as flat and change them to smooth when we find a component that's
|
||||
* interpolated.
|
||||
*/
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(shader->info.ps_inputs); i++)
|
||||
shader->info.ps_inputs[i].interpolate = INTERP_MODE_FLAT;
|
||||
}
|
||||
|
||||
nir_foreach_block(block, nir_shader_get_entrypoint(nir)) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_instance_id:
|
||||
shader->info.uses_instance_id = true;
|
||||
break;
|
||||
case nir_intrinsic_load_base_instance:
|
||||
shader->info.uses_base_instance = true;
|
||||
break;
|
||||
case nir_intrinsic_load_draw_id:
|
||||
shader->info.uses_draw_id = true;
|
||||
break;
|
||||
case nir_intrinsic_load_frag_coord:
|
||||
case nir_intrinsic_load_sample_pos:
|
||||
frag_coord_mask |= nir_def_components_read(&intr->def);
|
||||
break;
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_input_vertex:
|
||||
case nir_intrinsic_load_per_vertex_input:
|
||||
case nir_intrinsic_load_interpolated_input: {
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_input &&
|
||||
(shader->key.ge.mono.instance_divisor_is_one |
|
||||
shader->key.ge.mono.instance_divisor_is_fetched) &
|
||||
BITFIELD_BIT(nir_intrinsic_base(intr))) {
|
||||
/* Instanced attribs. */
|
||||
shader->info.uses_instance_id = true;
|
||||
shader->info.uses_base_instance = true;
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
unsigned index = nir_intrinsic_base(intr);
|
||||
assert(sem.num_slots == 1);
|
||||
|
||||
shader->info.num_ps_inputs = MAX2(shader->info.num_ps_inputs, index + 1);
|
||||
shader->info.ps_inputs[index].semantic = sem.location;
|
||||
/* Determine interpolation mode. This only cares about FLAT/SMOOTH/COLOR.
|
||||
* COLOR is only for nir_intrinsic_load_color0/1.
|
||||
*/
|
||||
if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
|
||||
shader->info.ps_inputs[index].interpolate = INTERP_MODE_SMOOTH;
|
||||
if (intr->def.bit_size == 16)
|
||||
shader->info.ps_inputs[index].fp16_lo_hi_valid |= 0x1 << sem.high_16bits;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_color0:
|
||||
assert(!shader->is_monolithic);
|
||||
shader->info.ps_colors_read |= nir_def_components_read(&intr->def);
|
||||
break;
|
||||
case nir_intrinsic_load_color1:
|
||||
assert(!shader->is_monolithic);
|
||||
shader->info.ps_colors_read |= nir_def_components_read(&intr->def) << 4;
|
||||
break;
|
||||
case nir_intrinsic_load_ubo:
|
||||
if (intr->src[1].ssa->divergent)
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
break;
|
||||
case nir_intrinsic_load_constant:
|
||||
if (intr->src[0].ssa->divergent)
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
break;
|
||||
/* Global */
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_global_atomic:
|
||||
case nir_intrinsic_global_atomic_swap:
|
||||
/* SSBOs (this list is from si_nir_lower_resource.c) */
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
/* Images (this list is from si_nir_lower_resource.c) */
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_deref_sparse_load:
|
||||
case nir_intrinsic_image_deref_fragment_mask_load_amd:
|
||||
case nir_intrinsic_image_deref_atomic:
|
||||
case nir_intrinsic_image_deref_atomic_swap:
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
case nir_intrinsic_bindless_image_fragment_mask_load_amd:
|
||||
case nir_intrinsic_bindless_image_atomic:
|
||||
case nir_intrinsic_bindless_image_atomic_swap:
|
||||
/* Scratch */
|
||||
case nir_intrinsic_load_scratch:
|
||||
/* AMD-specific. */
|
||||
case nir_intrinsic_load_buffer_amd:
|
||||
/* Atomics without return are not treated as loads. */
|
||||
if (nir_def_components_read(&intr->def) &&
|
||||
(!nir_intrinsic_has_atomic_op(intr) ||
|
||||
nir_intrinsic_atomic_op(intr) != nir_atomic_op_ordered_add_gfx12_amd))
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
break;
|
||||
case nir_intrinsic_store_output:
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
|
||||
if (sem.location == FRAG_RESULT_DEPTH)
|
||||
shader->info.writes_z = true;
|
||||
else if (sem.location == FRAG_RESULT_STENCIL)
|
||||
shader->info.writes_stencil = true;
|
||||
else if (sem.location == FRAG_RESULT_SAMPLE_MASK)
|
||||
shader->info.writes_sample_mask = true;
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_demote:
|
||||
case nir_intrinsic_demote_if:
|
||||
case nir_intrinsic_terminate:
|
||||
case nir_intrinsic_terminate_if:
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
shader->info.uses_discard = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
|
||||
shader->info.has_non_uniform_tex_access |= tex->texture_non_uniform || tex->sampler_non_uniform;
|
||||
shader->info.has_shadow_comparison |= tex->is_shadow;
|
||||
|
||||
/* Gather the types of used VMEM instructions that return something. */
|
||||
switch (tex->op) {
|
||||
case nir_texop_tex:
|
||||
case nir_texop_txb:
|
||||
case nir_texop_txl:
|
||||
case nir_texop_txd:
|
||||
case nir_texop_lod:
|
||||
case nir_texop_tg4:
|
||||
shader->info.uses_vmem_sampler_or_bvh = true;
|
||||
break;
|
||||
case nir_texop_txs:
|
||||
case nir_texop_query_levels:
|
||||
case nir_texop_texture_samples:
|
||||
case nir_texop_descriptor_amd:
|
||||
case nir_texop_sampler_descriptor_amd:
|
||||
/* These just return the descriptor or information from it. */
|
||||
break;
|
||||
default:
|
||||
shader->info.uses_vmem_load_other = true;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* Add both front and back color inputs. */
|
||||
if (!shader->is_monolithic) {
|
||||
unsigned index = shader->info.num_ps_inputs;
|
||||
|
||||
for (unsigned back = 0; back < 2; back++) {
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if ((shader->info.ps_colors_read >> (i * 4)) & 0xf) {
|
||||
assert(index < ARRAY_SIZE(shader->info.ps_inputs));
|
||||
shader->info.ps_inputs[index].semantic =
|
||||
(back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i;
|
||||
|
||||
enum glsl_interp_mode mode = i ? nir->info.fs.color1_interp
|
||||
: nir->info.fs.color0_interp;
|
||||
shader->info.ps_inputs[index].interpolate =
|
||||
mode == INTERP_MODE_NONE ? INTERP_MODE_COLOR : mode;
|
||||
index++;
|
||||
|
||||
/* Back-face colors don't increment num_ps_inputs. si_emit_spi_map will use
|
||||
* back-face colors conditionally only when needed.
|
||||
*/
|
||||
if (!back)
|
||||
shader->info.num_ps_inputs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ACO needs spi_ps_input_ena before si_init_shader_args. */
|
||||
shader->config.spi_ps_input_ena =
|
||||
S_0286CC_PERSP_SAMPLE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE)) |
|
||||
S_0286CC_PERSP_CENTER_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL)) |
|
||||
S_0286CC_PERSP_CENTROID_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID)) |
|
||||
S_0286CC_LINEAR_SAMPLE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE)) |
|
||||
S_0286CC_LINEAR_CENTER_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL)) |
|
||||
S_0286CC_LINEAR_CENTROID_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID)) |
|
||||
S_0286CC_POS_X_FLOAT_ENA(!!(frag_coord_mask & 0x1)) |
|
||||
S_0286CC_POS_Y_FLOAT_ENA(!!(frag_coord_mask & 0x2)) |
|
||||
S_0286CC_POS_Z_FLOAT_ENA(!!(frag_coord_mask & 0x4)) |
|
||||
S_0286CC_POS_W_FLOAT_ENA(!!(frag_coord_mask & 0x8)) |
|
||||
S_0286CC_FRONT_FACE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_FRONT_FACE) |
|
||||
BITSET_TEST(sysvals, SYSTEM_VALUE_FRONT_FACE_FSIGN)) |
|
||||
S_0286CC_ANCILLARY_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_SAMPLE_ID) |
|
||||
BITSET_TEST(sysvals, SYSTEM_VALUE_LAYER_ID)) |
|
||||
S_0286CC_SAMPLE_COVERAGE_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_SAMPLE_MASK_IN)) |
|
||||
S_0286CC_POS_FIXED_PT_ENA(BITSET_TEST(sysvals, SYSTEM_VALUE_PIXEL_COORD));
|
||||
|
||||
if (shader->is_monolithic) {
|
||||
si_fixup_spi_ps_input_config(shader);
|
||||
shader->config.spi_ps_input_addr = shader->config.spi_ps_input_ena;
|
||||
} else {
|
||||
/* Part mode will call si_fixup_spi_ps_input_config() when combining multi
|
||||
* shader part in si_shader_select_ps_parts().
|
||||
*
|
||||
* Reserve register locations for VGPR inputs the PS prolog may need.
|
||||
*/
|
||||
shader->config.spi_ps_input_addr = shader->config.spi_ps_input_ena |
|
||||
SI_SPI_PS_INPUT_ADDR_FOR_PROLOG;
|
||||
}
|
||||
}
|
||||
|
||||
if (nir->info.stage <= MESA_SHADER_GEOMETRY && nir->xfb_info &&
|
||||
!shader->key.ge.as_ls && !shader->key.ge.as_es) {
|
||||
unsigned num_streamout_dwords = 0;
|
||||
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
num_streamout_dwords += nir->info.xfb_stride[i];
|
||||
shader->info.num_streamout_vec4s = DIV_ROUND_UP(num_streamout_dwords, 4);
|
||||
}
|
||||
}
|
||||
|
||||
/* Late shader variant info for AMD-specific intrinsics. */
|
||||
void si_get_late_shader_variant_info(struct si_shader *shader, struct si_shader_args *args,
|
||||
nir_shader *nir)
|
||||
{
|
||||
if ((nir->info.stage != MESA_SHADER_VERTEX || nir->info.vs.blit_sgprs_amd) &&
|
||||
nir->info.stage != MESA_SHADER_TESS_EVAL &&
|
||||
(nir->info.stage != MESA_SHADER_GEOMETRY || !shader->key.ge.as_ngg))
|
||||
return;
|
||||
|
||||
nir_foreach_block(block, nir_shader_get_entrypoint(nir)) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type == nir_instr_type_intrinsic &&
|
||||
nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_scalar_arg_amd &&
|
||||
nir_intrinsic_base(nir_instr_as_intrinsic(instr)) == args->vs_state_bits.arg_index) {
|
||||
assert(args->vs_state_bits.used);
|
||||
|
||||
/* Gather which VS_STATE and GS_STATE user SGPR bits are used. */
|
||||
uint32_t bits_used = nir_def_bits_used(nir_instr_def(instr));
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX &&
|
||||
bits_used & ENCODE_FIELD(VS_STATE_INDEXED, ~0))
|
||||
shader->info.uses_vs_state_indexed = true;
|
||||
|
||||
if (!shader->key.ge.as_es && shader->key.ge.as_ngg) {
|
||||
if (bits_used & ENCODE_FIELD(GS_STATE_PROVOKING_VTX_FIRST, ~0))
|
||||
shader->info.uses_gs_state_provoking_vtx_first = true;
|
||||
|
||||
if (bits_used & ENCODE_FIELD(GS_STATE_OUTPRIM, ~0))
|
||||
shader->info.uses_gs_state_outprim = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void si_set_spi_ps_input_config_for_separate_prolog(struct si_shader *shader)
|
||||
{
|
||||
const union si_shader_key *key = &shader->key;
|
||||
|
||||
/* Enable POS_FIXED_PT if polygon stippling is enabled. */
|
||||
if (key->ps.part.prolog.poly_stipple)
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
|
||||
|
||||
/* Set up the enable bits for per-sample shading if needed. */
|
||||
if (key->ps.part.prolog.force_persp_sample_interp &&
|
||||
(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
||||
}
|
||||
if (key->ps.part.prolog.force_linear_sample_interp &&
|
||||
(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
|
||||
}
|
||||
if (key->ps.part.prolog.force_persp_center_interp &&
|
||||
(G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
|
||||
}
|
||||
if (key->ps.part.prolog.force_linear_center_interp &&
|
||||
(G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
|
||||
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
|
||||
}
|
||||
|
||||
/* The sample mask fixup requires the sample ID. */
|
||||
if (key->ps.part.prolog.samplemask_log_ps_iter)
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1);
|
||||
|
||||
if (key->ps.part.prolog.force_samplemask_to_helper_invocation) {
|
||||
assert(key->ps.part.prolog.samplemask_log_ps_iter == 0);
|
||||
assert(!key->ps.mono.poly_line_smoothing);
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
|
||||
}
|
||||
|
||||
/* The sample mask fixup has an optimization that replaces the sample mask with the sample ID. */
|
||||
if (key->ps.part.prolog.samplemask_log_ps_iter == 3)
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
|
||||
|
||||
if (key->ps.part.prolog.get_frag_coord_from_pixel_coord) {
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_POS_X_FLOAT_ENA;
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_POS_Y_FLOAT_ENA;
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
|
||||
}
|
||||
}
|
||||
|
||||
void si_fixup_spi_ps_input_config(struct si_shader *shader)
|
||||
{
|
||||
/* POW_W_FLOAT requires that one of the perspective weights is enabled. */
|
||||
if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
|
||||
!(shader->config.spi_ps_input_ena & 0xf)) {
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
||||
}
|
||||
|
||||
/* At least one pair of interpolation weights must be enabled. */
|
||||
if (!(shader->config.spi_ps_input_ena & 0x7f))
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue