Merge branch 'si-prolog-vgpr-reduction' into 'main'

ac,radeonsi: reduce VGPR usage of PS prologs, fix si_shader_update_spi_shader_formats

See merge request mesa/mesa!41226
This commit is contained in:
Marek Olšák 2026-05-08 00:34:53 +00:00
commit f619c94996
19 changed files with 409 additions and 196 deletions

View file

@ -82,3 +82,35 @@ void ac_compact_ps_vgpr_args(struct ac_shader_args *info, uint32_t spi_ps_input)
info->num_vgprs_used = vgpr_reg;
}
unsigned
ac_get_color_interp_arg(const struct ac_shader_args *args, enum ac_color_interp interp)
{
struct ac_arg arg;
switch (interp) {
case AC_COLOR_INTERP_PERSP_SAMPLE:
arg = args->persp_sample;
break;
case AC_COLOR_INTERP_PERSP_CENTER:
arg = args->persp_center;
break;
case AC_COLOR_INTERP_PERSP_CENTROID:
arg = args->persp_centroid;
break;
case AC_COLOR_INTERP_LINEAR_SAMPLE:
arg = args->linear_sample;
break;
case AC_COLOR_INTERP_LINEAR_CENTER:
arg = args->linear_center;
break;
case AC_COLOR_INTERP_LINEAR_CENTROID:
arg = args->linear_centroid;
break;
default:
UNREACHABLE("unexpected interp mode");
}
assert(arg.used);
return arg.arg_index;
}

View file

@ -10,6 +10,10 @@
#include <stdbool.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Maximum dwords of inline push constants when the indirect path is still used */
#define AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT 8
/* Maximum dwords of inline push constants when the indirect path is not used */
@ -27,6 +31,16 @@ enum ac_arg_type
AC_ARG_CONST_ADDR,
};
enum ac_color_interp {
AC_COLOR_INTERP_FLAT,
AC_COLOR_INTERP_PERSP_SAMPLE,
AC_COLOR_INTERP_PERSP_CENTER,
AC_COLOR_INTERP_PERSP_CENTROID,
AC_COLOR_INTERP_LINEAR_SAMPLE,
AC_COLOR_INTERP_LINEAR_CENTER,
AC_COLOR_INTERP_LINEAR_CENTROID,
};
struct ac_arg {
uint16_t arg_index;
bool used;
@ -163,6 +177,7 @@ struct ac_shader_args {
struct ac_arg linear_sample;
struct ac_arg linear_center;
struct ac_arg linear_centroid;
struct ac_arg line_stipple_tex_ena;
struct ac_arg pos_fixed_pt;
/* CS */
@ -204,5 +219,10 @@ void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsign
void ac_add_return(struct ac_shader_args *info, enum ac_arg_regfile regfile);
void ac_add_preserved(struct ac_shader_args *info, const struct ac_arg *arg);
void ac_compact_ps_vgpr_args(struct ac_shader_args *info, uint32_t spi_ps_input);
unsigned ac_get_color_interp_arg(const struct ac_shader_args *args, enum ac_color_interp interp);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -1511,3 +1511,96 @@ retry_select_mode:
max_out_vertices <= max_workgroup_size &&
out->hw_max_esverts >= min_esverts;
}
/* Print SPI_PS_INPUT_ADDR as follows:
* v[0:1] = PERSP_SAMPLE
* v[2:3] = PERSP_CENTER
* v[4:5] = LINEAR_SAMPLE
* v[6:7] = LINEAR_CENTER
* v8 = LINE_STIPPLE_TEX
* v9 = FRONT_FACE
* v10 = ANCILLARY
* v11 = SAMPLE_COVERAGE
* v12 = POS_FIXED_PT
*/
void
ac_print_spi_ps_input_vgpr_list(uint32_t spi_ps_input_ena, uint32_t spi_ps_input_addr, FILE *f)
{
unsigned vgpr = 0;
#define PRINT_PS_INPUT_VGPR(count, name) do { \
if (G_0286CC_##name##_ENA(spi_ps_input_addr)) { \
bool enabled = G_0286CC_##name##_ENA(spi_ps_input_ena); \
if (count > 1) \
fprintf(f, " v[%u:%u] = %s%s\n", vgpr, vgpr + count - 1, #name, \
enabled ? " === initialized ===" : ""); \
else \
fprintf(f, " v%u = %s\n", vgpr, #name); \
vgpr += count; \
} \
} while (0)
PRINT_PS_INPUT_VGPR(2, PERSP_SAMPLE);
PRINT_PS_INPUT_VGPR(2, PERSP_CENTER);
PRINT_PS_INPUT_VGPR(2, PERSP_CENTROID);
PRINT_PS_INPUT_VGPR(3, PERSP_PULL_MODEL);
PRINT_PS_INPUT_VGPR(2, LINEAR_SAMPLE);
PRINT_PS_INPUT_VGPR(2, LINEAR_CENTER);
PRINT_PS_INPUT_VGPR(2, LINEAR_CENTROID);
PRINT_PS_INPUT_VGPR(1, LINE_STIPPLE_TEX);
PRINT_PS_INPUT_VGPR(1, POS_X_FLOAT);
PRINT_PS_INPUT_VGPR(1, POS_Y_FLOAT);
PRINT_PS_INPUT_VGPR(1, POS_Z_FLOAT);
PRINT_PS_INPUT_VGPR(1, POS_W_FLOAT);
PRINT_PS_INPUT_VGPR(1, FRONT_FACE);
PRINT_PS_INPUT_VGPR(1, ANCILLARY);
PRINT_PS_INPUT_VGPR(1, SAMPLE_COVERAGE);
PRINT_PS_INPUT_VGPR(1, POS_FIXED_PT);
#undef PRINT_PS_INPUT_VGPR
}
static const char *
get_spi_shader_format(unsigned format)
{
switch (format) {
#define PS_FORMAT(name) case V_028714_SPI_SHADER_##name: return #name;
PS_FORMAT(ZERO)
PS_FORMAT(32_R)
PS_FORMAT(32_GR)
PS_FORMAT(32_AR)
PS_FORMAT(FP16_ABGR)
PS_FORMAT(UNORM16_ABGR)
PS_FORMAT(SNORM16_ABGR)
PS_FORMAT(UINT16_ABGR)
PS_FORMAT(SINT16_ABGR)
PS_FORMAT(32_ABGR)
#undef PS_FORMAT
default:
UNREACHABLE("invalid export format");
}
}
/* Print (example):
* mrt0 = FP16_ABGR
* mrt1 = 32_R
*/
void
ac_print_spi_ps_shader_col_format(uint32_t spi_shader_col_format, FILE *f)
{
for (unsigned i = 0; i < 8; i++) {
unsigned format = (spi_shader_col_format >> (i * 4)) & 0xf;
if (format)
fprintf(f, " mrt%u = %s\n", i, get_spi_shader_format(format));
}
}
/* Print (example):
* mrtz = 32_R
*/
void
ac_print_spi_ps_shader_z_format(uint32_t spi_shader_z_format, FILE *f)
{
if (spi_shader_z_format)
fprintf(f, " mrtz = %s\n", get_spi_shader_format(spi_shader_z_format));
}

View file

@ -343,6 +343,12 @@ ac_ngg_compute_subgroup_info(enum amd_gfx_level gfx_level, mesa_shader_stage es_
unsigned ngg_lds_scratch_size, bool tess_turns_off_ngg,
unsigned max_esgs_lds_padding, ac_ngg_subgroup_info *out);
void
ac_print_spi_ps_input_vgpr_list(uint32_t spi_ps_input_ena, uint32_t spi_ps_input_addr, FILE *f);
void ac_print_spi_ps_shader_col_format(uint32_t spi_shader_col_format, FILE *f);
void ac_print_spi_ps_shader_z_format(uint32_t spi_shader_z_format, FILE *f);
static unsigned inline
ac_shader_get_lds_alloc_granularity(enum amd_gfx_level gfx_level)
{

View file

@ -91,15 +91,15 @@ struct aco_ps_prolog_info {
bool force_linear_sample_interp;
bool force_persp_center_interp;
bool force_linear_center_interp;
bool uses_persp_centroid;
bool uses_linear_centroid;
unsigned samplemask_log_ps_iter;
bool get_frag_coord_from_pixel_coord;
bool pixel_center_integer;
bool force_samplemask_to_helper_invocation;
unsigned num_interp_inputs;
unsigned colors_read;
int color_interp_vgpr_index[2];
int color_attr_index[2];
uint8_t color_attr_index[2];
enum ac_color_interp color_interp[2];
bool color_two_side;
bool needs_wqm;

View file

@ -64,6 +64,8 @@ overwrite_interp_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo)
cond = bool_to_vector_condition(ctx, cond);
if (finfo->bc_optimize_for_persp) {
assert(finfo->uses_persp_centroid);
Temp center = get_arg(ctx, ctx->args->persp_center);
Temp centroid = get_arg(ctx, ctx->args->persp_centroid);
@ -73,6 +75,8 @@ overwrite_interp_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo)
}
if (finfo->bc_optimize_for_linear) {
assert(finfo->uses_linear_centroid);
Temp center = get_arg(ctx, ctx->args->linear_center);
Temp centroid = get_arg(ctx, ctx->args->linear_centroid);
@ -85,25 +89,29 @@ overwrite_interp_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo)
if (finfo->force_persp_sample_interp) {
Temp persp_sample = get_arg(ctx, ctx->args->persp_sample);
ctx->arg_temps[ctx->args->persp_center.arg_index] = persp_sample;
ctx->arg_temps[ctx->args->persp_centroid.arg_index] = persp_sample;
if (finfo->uses_persp_centroid)
ctx->arg_temps[ctx->args->persp_centroid.arg_index] = persp_sample;
}
if (finfo->force_linear_sample_interp) {
Temp linear_sample = get_arg(ctx, ctx->args->linear_sample);
ctx->arg_temps[ctx->args->linear_center.arg_index] = linear_sample;
ctx->arg_temps[ctx->args->linear_centroid.arg_index] = linear_sample;
if (finfo->uses_linear_centroid)
ctx->arg_temps[ctx->args->linear_centroid.arg_index] = linear_sample;
}
if (finfo->force_persp_center_interp) {
Temp persp_center = get_arg(ctx, ctx->args->persp_center);
ctx->arg_temps[ctx->args->persp_sample.arg_index] = persp_center;
ctx->arg_temps[ctx->args->persp_centroid.arg_index] = persp_center;
if (finfo->uses_persp_centroid)
ctx->arg_temps[ctx->args->persp_centroid.arg_index] = persp_center;
}
if (finfo->force_linear_center_interp) {
Temp linear_center = get_arg(ctx, ctx->args->linear_center);
ctx->arg_temps[ctx->args->linear_sample.arg_index] = linear_center;
ctx->arg_temps[ctx->args->linear_centroid.arg_index] = linear_center;
if (finfo->uses_linear_centroid)
ctx->arg_temps[ctx->args->linear_centroid.arg_index] = linear_center;
}
}
@ -167,32 +175,6 @@ overwrite_samplemask_arg(isel_context* ctx, const struct aco_ps_prolog_info* fin
is_helper_invoc);
}
}
void
overwrite_pos_xy_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo)
{
if (!finfo->get_frag_coord_from_pixel_coord)
return;
Builder bld(ctx->program, ctx->block);
Temp pos_fixed_pt = get_arg(ctx, ctx->args->pos_fixed_pt);
for (unsigned i = 0; i < 2; i++) {
if (!ctx->args->frag_pos[i].used)
continue;
Temp t;
if (i)
t = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(16), pos_fixed_pt);
else
t = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0xffff), pos_fixed_pt);
t = bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), t);
if (!finfo->pixel_center_integer)
t = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000 /*0.5*/), t);
ctx->arg_temps[ctx->args->frag_pos[i].arg_index] = t;
}
}
void
passthrough_all_args(isel_context* ctx, std::vector<Operand>& regs)
@ -200,12 +182,20 @@ passthrough_all_args(isel_context* ctx, std::vector<Operand>& regs)
struct ac_arg arg;
arg.used = true;
for (arg.arg_index = 0; arg.arg_index < ctx->args->arg_count; arg.arg_index++)
for (arg.arg_index = 0; arg.arg_index < ctx->args->arg_count; arg.arg_index++) {
/* Don't pass LINE_STIPPLE_TEX_ENA to the next shader binary because it's unused.
* This saves 1 VGPR in the prolog.
*/
if (ctx->args->line_stipple_tex_ena.used &&
arg.arg_index == ctx->args->line_stipple_tex_ena.arg_index)
continue;
regs.emplace_back(Operand(get_arg(ctx, arg), get_arg_reg(ctx->args, arg)));
}
}
Temp
get_interp_color(isel_context* ctx, int interp_vgpr, unsigned attr_index, unsigned comp)
get_interp_color(isel_context* ctx, int interp_arg, unsigned attr_index, unsigned comp)
{
Builder bld(ctx->program, ctx->block);
@ -213,10 +203,8 @@ get_interp_color(isel_context* ctx, int interp_vgpr, unsigned attr_index, unsign
Temp prim_mask = get_arg(ctx, ctx->args->prim_mask);
if (interp_vgpr != -1) {
/* interp args are all 2 vgprs */
int arg_index = ctx->args->persp_sample.arg_index + interp_vgpr / 2;
Temp interp_ij = ctx->arg_temps[arg_index];
if (interp_arg != -1) {
Temp interp_ij = ctx->arg_temps[interp_arg];
emit_interp_instr(ctx, attr_index, comp, interp_ij, dst, prim_mask, false);
} else {
@ -245,7 +233,9 @@ interpolate_color_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo
u_foreach_bit (i, finfo->colors_read) {
unsigned color_index = i / 4;
unsigned front_index = finfo->color_attr_index[color_index];
int interp_vgpr = finfo->color_interp_vgpr_index[color_index];
int interp_arg = finfo->color_interp[color_index] == AC_COLOR_INTERP_FLAT
? -1
: ac_get_color_interp_arg(ctx->args, finfo->color_interp[color_index]);
/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
* otherwise it's at offset "num_inputs".
@ -254,8 +244,8 @@ interpolate_color_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo
if (color_index == 1 && finfo->colors_read & 0xf)
back_index++;
Temp front = get_interp_color(ctx, interp_vgpr, front_index, i % 4);
Temp back = get_interp_color(ctx, interp_vgpr, back_index, i % 4);
Temp front = get_interp_color(ctx, interp_arg, front_index, i % 4);
Temp back = get_interp_color(ctx, interp_arg, back_index, i % 4);
Temp color =
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), back, front, is_face_positive);
@ -266,8 +256,10 @@ interpolate_color_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo
u_foreach_bit (i, finfo->colors_read) {
unsigned color_index = i / 4;
unsigned attr_index = finfo->color_attr_index[color_index];
int interp_vgpr = finfo->color_interp_vgpr_index[color_index];
Temp color = get_interp_color(ctx, interp_vgpr, attr_index, i % 4);
int interp_arg = finfo->color_interp[color_index] == AC_COLOR_INTERP_FLAT
? -1
: ac_get_color_interp_arg(ctx->args, finfo->color_interp[color_index]);
Temp color = get_interp_color(ctx, interp_arg, attr_index, i % 4);
regs.emplace_back(Operand(color, PhysReg{vgpr++}));
}
@ -295,7 +287,6 @@ select_ps_prolog(Program* program, void* pinfo, ac_shader_config* config,
overwrite_interp_args(&ctx, finfo);
overwrite_samplemask_arg(&ctx, finfo);
overwrite_pos_xy_args(&ctx, finfo);
std::vector<Operand> regs;
passthrough_all_args(&ctx, regs);

View file

@ -342,7 +342,7 @@ declare_ps_input_vgprs(struct radv_shader_args_state *state, const struct radv_s
RADV_ADD_ARG(state, AC_ARG_VGPR, 2, AC_ARG_VALUE, ac.linear_sample);
RADV_ADD_ARG(state, AC_ARG_VGPR, 2, AC_ARG_VALUE, ac.linear_center);
RADV_ADD_ARG(state, AC_ARG_VGPR, 2, AC_ARG_VALUE, ac.linear_centroid);
RADV_ADD_NULL_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE); /* line stipple tex */
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.line_stipple_tex_ena);
RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.frag_pos, 0);
RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.frag_pos, 1);
RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.frag_pos, 2);

View file

@ -13,7 +13,6 @@
#include "nir_xfb_info.h"
#include "si_pipe.h"
#include "si_shader_internal.h"
#include "pipe/p_shader_tokens.h"
static void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader);
@ -853,11 +852,11 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx)
.optimize_frag_coord = true,
.frag_coord_is_center = true,
/* This does a lot of things. See the description in ac_nir_lower_ps_early_options. */
.ps_iter_samples = key->ps.part.prolog.samplemask_log_ps_iter ?
(1 << key->ps.part.prolog.samplemask_log_ps_iter) :
(key->ps.part.prolog.force_persp_sample_interp ||
key->ps.part.prolog.force_linear_sample_interp ? 2 :
(key->ps.part.prolog.get_frag_coord_from_pixel_coord ? 1 : 0)),
.ps_iter_samples = nir->info.fs.uses_sample_shading ? 8 :
key->ps.part.prolog.samplemask_log_ps_iter ?
(1 << key->ps.part.prolog.samplemask_log_ps_iter) :
(key->ps.part.prolog.force_persp_sample_interp ||
key->ps.part.prolog.force_linear_sample_interp ? 2 : 0),
.fbfetch_is_1D = key->ps.mono.fbfetch_is_1D,
.fbfetch_layered = key->ps.mono.fbfetch_layered,
@ -892,6 +891,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx)
ac_nir_lower_ps_early_options early_options = {
.optimize_frag_coord = true,
.frag_coord_is_center = true,
.ps_iter_samples = nir->info.fs.uses_sample_shading ? 8 : 0,
.lower_color_inputs_to_load_color01 = true,
.alpha_func = COMPARE_FUNC_ALWAYS,
.spi_shader_col_format_hint = ~0,
@ -1742,15 +1742,28 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_
key->ps_prolog.states.force_linear_center_interp ||
key->ps_prolog.states.bc_optimize_for_persp || key->ps_prolog.states.bc_optimize_for_linear ||
key->ps_prolog.states.samplemask_log_ps_iter ||
key->ps_prolog.states.get_frag_coord_from_pixel_coord ||
key->ps_prolog.states.force_samplemask_to_helper_invocation);
key->ps_prolog.uses_persp_centroid =
G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */
/* The PS prolog can change one to the other, so we need both or neither to be set. */
assert(G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr) ==
G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
key->ps_prolog.uses_linear_sample_and_center =
G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr) || /* addr because the PS prolog may use it */
G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr);
key->ps_prolog.uses_linear_centroid =
G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */
key->ps_prolog.reserve_line_stipple_tex_ena =
G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr); /* unused but may need to be reserved */
key->ps_prolog.fragcoord_usage_mask =
G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_ena) |
(G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_ena) << 1) |
(G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_ena) << 2) |
(G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) << 3);
key->ps_prolog.pixel_center_integer = key->ps_prolog.fragcoord_usage_mask &&
shader->selector->info.base.fs.pixel_center_integer;
key->ps_prolog.uses_ancillary =
G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */
key->ps_prolog.uses_sample_coverage =
G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */
if (shader->key.ps.part.prolog.poly_stipple)
shader->info.uses_vmem_load_other = true;
@ -1778,27 +1791,27 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_
switch (interp) {
case INTERP_MODE_FLAT:
key->ps_prolog.color_interp_vgpr_index[i] = -1;
key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_FLAT;
break;
case INTERP_MODE_SMOOTH:
case INTERP_MODE_COLOR:
/* Force the interpolation location for colors here. */
if (shader->key.ps.part.prolog.force_persp_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
location = SI_INTERPOLATE_LOC_SAMPLE;
if (shader->key.ps.part.prolog.force_persp_center_interp)
location = TGSI_INTERPOLATE_LOC_CENTER;
location = SI_INTERPOLATE_LOC_CENTER;
switch (location) {
case TGSI_INTERPOLATE_LOC_SAMPLE:
key->ps_prolog.color_interp_vgpr_index[i] = 0;
case SI_INTERPOLATE_LOC_SAMPLE:
key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_PERSP_SAMPLE;
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
break;
case TGSI_INTERPOLATE_LOC_CENTER:
key->ps_prolog.color_interp_vgpr_index[i] = 2;
case SI_INTERPOLATE_LOC_CENTER:
key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_PERSP_CENTER;
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
break;
case TGSI_INTERPOLATE_LOC_CENTROID:
key->ps_prolog.color_interp_vgpr_index[i] = 4;
case SI_INTERPOLATE_LOC_CENTROID:
key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_PERSP_CENTROID;
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTROID_ENA(1);
break;
default:
@ -1808,25 +1821,25 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_
case INTERP_MODE_NOPERSPECTIVE:
/* Force the interpolation location for colors here. */
if (shader->key.ps.part.prolog.force_linear_sample_interp)
location = TGSI_INTERPOLATE_LOC_SAMPLE;
location = SI_INTERPOLATE_LOC_SAMPLE;
if (shader->key.ps.part.prolog.force_linear_center_interp)
location = TGSI_INTERPOLATE_LOC_CENTER;
location = SI_INTERPOLATE_LOC_CENTER;
/* The VGPR assignment for non-monolithic shaders
* works because InitialPSInputAddr is set on the
* main shader and PERSP_PULL_MODEL is never used.
*/
switch (location) {
case TGSI_INTERPOLATE_LOC_SAMPLE:
key->ps_prolog.color_interp_vgpr_index[i] = 6;
case SI_INTERPOLATE_LOC_SAMPLE:
key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_LINEAR_SAMPLE;
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
break;
case TGSI_INTERPOLATE_LOC_CENTER:
key->ps_prolog.color_interp_vgpr_index[i] = 8;
case SI_INTERPOLATE_LOC_CENTER:
key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_LINEAR_CENTER;
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
break;
case TGSI_INTERPOLATE_LOC_CENTROID:
key->ps_prolog.color_interp_vgpr_index[i] = 10;
case SI_INTERPOLATE_LOC_CENTROID:
key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_LINEAR_CENTROID;
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTROID_ENA(1);
break;
default:
@ -1852,7 +1865,6 @@ static bool si_need_ps_prolog(const union si_shader_part_key *key)
key->ps_prolog.states.bc_optimize_for_persp ||
key->ps_prolog.states.bc_optimize_for_linear || key->ps_prolog.states.poly_stipple ||
key->ps_prolog.states.samplemask_log_ps_iter ||
key->ps_prolog.states.get_frag_coord_from_pixel_coord ||
key->ps_prolog.states.force_samplemask_to_helper_invocation;
}

View file

@ -234,6 +234,12 @@ enum
SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */
};
enum {
SI_INTERPOLATE_LOC_CENTER,
SI_INTERPOLATE_LOC_CENTROID,
SI_INTERPOLATE_LOC_SAMPLE,
};
/* These fields are only set in current_vs_state (except INDEXED) in si_context, and they are
* accessible in the shader via vs_state_bits in VS, TES, and GS.
*/
@ -536,7 +542,6 @@ struct si_ps_prolog_bits {
unsigned bc_optimize_for_persp : 1;
unsigned bc_optimize_for_linear : 1;
unsigned samplemask_log_ps_iter : 2;
unsigned get_frag_coord_from_pixel_coord : 1;
unsigned force_samplemask_to_helper_invocation : 1;
};
@ -565,11 +570,16 @@ union si_shader_part_key {
/* Color interpolation and two-side color selection. */
unsigned colors_read : 8; /* color input components read */
unsigned num_interp_inputs : 5; /* BCOLOR is at this location */
unsigned uses_persp_centroid : 1;
unsigned uses_linear_sample_and_center : 1;
unsigned uses_linear_centroid : 1;
unsigned reserve_line_stipple_tex_ena : 1; /* only reserve the VGPR, don't use it */
unsigned fragcoord_usage_mask : 4;
unsigned pixel_center_integer : 1;
unsigned uses_ancillary : 1;
unsigned uses_sample_coverage : 1;
unsigned wqm : 1;
char color_attr_index[2];
signed char color_interp_vgpr_index[2]; /* -1 == constant */
uint8_t color_attr_index[2];
uint8_t color_interp[2]; /* AC_COLOR_INTERP_* */
} ps_prolog;
struct {
struct si_ps_epilog_bits states;

View file

@ -253,15 +253,15 @@ si_aco_build_ps_prolog(struct aco_compiler_options *options,
.force_linear_sample_interp = key->ps_prolog.states.force_linear_sample_interp,
.force_persp_center_interp = key->ps_prolog.states.force_persp_center_interp,
.force_linear_center_interp = key->ps_prolog.states.force_linear_center_interp,
.uses_persp_centroid = key->ps_prolog.uses_persp_centroid,
.uses_linear_centroid = key->ps_prolog.uses_linear_centroid,
.samplemask_log_ps_iter = key->ps_prolog.states.samplemask_log_ps_iter,
.get_frag_coord_from_pixel_coord = key->ps_prolog.states.get_frag_coord_from_pixel_coord,
.pixel_center_integer = key->ps_prolog.pixel_center_integer,
.force_samplemask_to_helper_invocation = key->ps_prolog.states.force_samplemask_to_helper_invocation,
.num_interp_inputs = key->ps_prolog.num_interp_inputs,
.colors_read = key->ps_prolog.colors_read,
.color_interp_vgpr_index[0] = key->ps_prolog.color_interp_vgpr_index[0],
.color_interp_vgpr_index[1] = key->ps_prolog.color_interp_vgpr_index[1],
.color_interp[0] = key->ps_prolog.color_interp[0],
.color_interp[1] = key->ps_prolog.color_interp[1],
.color_attr_index[0] = key->ps_prolog.color_attr_index[0],
.color_attr_index[1] = key->ps_prolog.color_attr_index[1],
.color_two_side = key->ps_prolog.states.color_two_side,

View file

@ -459,7 +459,8 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
SI_PARAM_LINEAR_CENTER);
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid,
SI_PARAM_LINEAR_CENTROID);
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, NULL, SI_PARAM_LINE_STIPPLE_TEX);
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.line_stipple_tex_ena,
SI_PARAM_LINE_STIPPLE_TEX);
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.frag_pos[0],
SI_PARAM_POS_X_FLOAT);
si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.frag_pos[1],
@ -672,20 +673,35 @@ void si_get_ps_prolog_args(struct si_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.persp_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.persp_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.persp_centroid);
if (key->ps_prolog.uses_persp_centroid)
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.persp_centroid);
/* skip PERSP_PULL_MODEL */
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_center);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, NULL); /* LINE_STIPPLE_TEX */
if (key->ps_prolog.uses_linear_sample_and_center) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_center);
}
if (key->ps_prolog.uses_linear_centroid)
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid);
if (key->ps_prolog.reserve_line_stipple_tex_ena)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.line_stipple_tex_ena);
/* POS_X|Y|Z|W_FLOAT */
u_foreach_bit(i, key->ps_prolog.fragcoord_usage_mask)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.frag_pos[i]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.front_face);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.ancillary);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.sample_coverage);
if (key->ps_prolog.uses_ancillary)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.ancillary);
if (key->ps_prolog.uses_sample_coverage)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.sample_coverage);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.pos_fixed_pt);
}

View file

@ -491,9 +491,15 @@ static void si_shader_dump_stats(struct si_screen *sscreen, struct si_shader *sh
if (shader->selector->stage == MESA_SHADER_FRAGMENT) {
fprintf(file,
"*** SHADER CONFIG ***\n"
"SPI_PS_INPUT_ADDR = 0x%04x\n"
"SPI_PS_INPUT_ENA = 0x%04x\n",
"SPI_PS_INPUT_ADDR = 0x%04x\n"
"SPI_PS_INPUT_ENA = 0x%04x\n",
conf->spi_ps_input_addr, conf->spi_ps_input_ena);
ac_print_spi_ps_input_vgpr_list(conf->spi_ps_input_ena, conf->spi_ps_input_addr, file);
fprintf(file, "SPI_SHADER_Z_FORMAT = 0x%x\n", shader->info.spi_shader_z_format);
ac_print_spi_ps_shader_z_format(shader->info.spi_shader_z_format, file);
fprintf(file, "SPI_SHADER_COL_FORMAT = 0x%x\n", shader->info.spi_shader_col_format);
ac_print_spi_ps_shader_col_format(shader->info.spi_shader_col_format, file);
}
fprintf(file,
@ -601,8 +607,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
key->ps.part.prolog.bc_optimize_for_linear);
fprintf(f, " prolog.samplemask_log_ps_iter = %u\n",
key->ps.part.prolog.samplemask_log_ps_iter);
fprintf(f, " prolog.get_frag_coord_from_pixel_coord = %u\n",
key->ps.part.prolog.get_frag_coord_from_pixel_coord);
fprintf(f, " prolog.force_samplemask_to_helper_invocation = %u\n",
key->ps.part.prolog.force_samplemask_to_helper_invocation);
fprintf(f, " epilog.spi_shader_col_format = 0x%x\n",

View file

@ -7,7 +7,6 @@
#include "si_pipe.h"
#include "si_shader_internal.h"
#include "util/mesa-blake3.h"
#include "pipe/p_shader_tokens.h"
#include "nir.h"
#include "nir_tcs_info.h"
#include "nir_xfb_info.h"
@ -60,39 +59,35 @@ static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type typ
}
static void
get_interp_info_from_input_load(nir_intrinsic_instr *intr, enum glsl_interp_mode *interp_mode,
get_color_input_interp_info(nir_intrinsic_instr *intr, enum glsl_interp_mode *interp_mode,
unsigned *interp_location)
{
assert(nir_is_input_load(intr));
*interp_mode = INTERP_MODE_FLAT;
*interp_location = TGSI_INTERPOLATE_LOC_CENTER;
if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
if (intr->intrinsic != nir_intrinsic_load_interpolated_input) {
*interp_mode = INTERP_MODE_FLAT;
*interp_location = SI_INTERPOLATE_LOC_CENTER;
return;
}
ASSERTED unsigned io_location = nir_intrinsic_io_semantics(intr).location;
assert(io_location == VARYING_SLOT_COL0 || io_location == VARYING_SLOT_COL1);
unsigned io_location = nir_intrinsic_io_semantics(intr).location;
nir_intrinsic_instr *baryc = nir_def_as_intrinsic(intr->src[0].ssa);
*interp_mode = nir_intrinsic_interp_mode(baryc);
bool is_color = io_location == VARYING_SLOT_COL0 || io_location == VARYING_SLOT_COL1;
if (*interp_mode == INTERP_MODE_NONE && is_color)
if (*interp_mode == INTERP_MODE_NONE)
*interp_mode = INTERP_MODE_COLOR;
switch (baryc->intrinsic) {
case nir_intrinsic_load_barycentric_pixel:
*interp_location = TGSI_INTERPOLATE_LOC_CENTER;
*interp_location = SI_INTERPOLATE_LOC_CENTER;
break;
case nir_intrinsic_load_barycentric_centroid:
*interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
*interp_location = SI_INTERPOLATE_LOC_CENTROID;
break;
case nir_intrinsic_load_barycentric_sample:
*interp_location = TGSI_INTERPOLATE_LOC_SAMPLE;
break;
case nir_intrinsic_load_barycentric_at_offset:
case nir_intrinsic_load_barycentric_at_sample:
assert(!is_color);
*interp_location = TGSI_INTERPOLATE_LOC_CENTER;
*interp_location = SI_INTERPOLATE_LOC_SAMPLE;
break;
default:
UNREACHABLE("unexpected baryc intrinsic");
@ -151,7 +146,7 @@ static void gather_io_instrinsic(const nir_shader *nir, struct si_shader_info *i
enum glsl_interp_mode interp_mode;
unsigned interp_location;
get_interp_info_from_input_load(intr, &interp_mode, &interp_location);
get_color_input_interp_info(intr, &interp_mode, &interp_location);
/* Both flat and non-flat can occur with nir_io_mix_convergent_flat_with_interpolated,
* but we want to save only the non-flat interp mode in that case.
@ -165,19 +160,19 @@ static void gather_io_instrinsic(const nir_shader *nir, struct si_shader_info *i
switch (interp_mode) {
case INTERP_MODE_SMOOTH:
if (interp_location == TGSI_INTERPOLATE_LOC_SAMPLE)
if (interp_location == SI_INTERPOLATE_LOC_SAMPLE)
info->uses_sysval_persp_sample = true;
else if (interp_location == TGSI_INTERPOLATE_LOC_CENTROID)
else if (interp_location == SI_INTERPOLATE_LOC_CENTROID)
info->uses_sysval_persp_centroid = true;
else if (interp_location == TGSI_INTERPOLATE_LOC_CENTER)
else if (interp_location == SI_INTERPOLATE_LOC_CENTER)
info->uses_sysval_persp_center = true;
break;
case INTERP_MODE_NOPERSPECTIVE:
if (interp_location == TGSI_INTERPOLATE_LOC_SAMPLE)
if (interp_location == SI_INTERPOLATE_LOC_SAMPLE)
info->uses_sysval_linear_sample = true;
else if (interp_location == TGSI_INTERPOLATE_LOC_CENTROID)
else if (interp_location == SI_INTERPOLATE_LOC_CENTROID)
info->uses_sysval_linear_centroid = true;
else if (interp_location == TGSI_INTERPOLATE_LOC_CENTER)
else if (interp_location == SI_INTERPOLATE_LOC_CENTER)
info->uses_sysval_linear_center = true;
break;
case INTERP_MODE_COLOR:
@ -185,11 +180,11 @@ static void gather_io_instrinsic(const nir_shader *nir, struct si_shader_info *i
* in the rasterizer state, otherwise it will be SMOOTH.
*/
info->uses_interp_color = true;
if (interp_location == TGSI_INTERPOLATE_LOC_SAMPLE)
if (interp_location == SI_INTERPOLATE_LOC_SAMPLE)
info->uses_persp_sample_color = true;
else if (interp_location == TGSI_INTERPOLATE_LOC_CENTROID)
else if (interp_location == SI_INTERPOLATE_LOC_CENTROID)
info->uses_persp_centroid_color = true;
else if (interp_location == TGSI_INTERPOLATE_LOC_CENTER)
else if (interp_location == SI_INTERPOLATE_LOC_CENTER)
info->uses_persp_center_color = true;
break;
case INTERP_MODE_FLAT:
@ -370,7 +365,8 @@ static void gather_instruction(const struct nir_shader *nir, struct si_shader_in
info->uses_interp_at_sample = true;
break;
case nir_intrinsic_load_frag_coord:
info->reads_frag_coord_mask |= nir_def_components_read(&intr->def);
if (nir_def_components_read(&intr->def) & BITFIELD_BIT(3))
info->uses_sysval_frag_coord_w = true;
break;
case nir_intrinsic_load_input:
case nir_intrinsic_load_per_vertex_input:
@ -525,7 +521,6 @@ void si_nir_gather_info(struct si_screen *sscreen, struct nir_shader *nir,
info->base.fs.uses_sample_shading = nir->info.fs.uses_sample_shading;
info->base.fs.early_fragment_tests = nir->info.fs.early_fragment_tests;
info->base.fs.post_depth_coverage = nir->info.fs.post_depth_coverage;
info->base.fs.pixel_center_integer = nir->info.fs.pixel_center_integer;
info->base.fs.depth_layout = nir->info.fs.depth_layout;
break;
@ -592,6 +587,13 @@ void si_nir_gather_info(struct si_screen *sscreen, struct nir_shader *nir,
info->uses_sysval_invocation_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID);
info->uses_sysval_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID;
info->uses_sysval_ancillary = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LAYER_ID) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_SHADING_RATE) ||
/* The PS prolog uses LAYER_ID for fbfetch. */
(nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) ||
/* The PS prolog uses SAMPLE_ID for SAMPLE_MASK_IN. */
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
info->uses_sysval_sample_mask_in = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
info->uses_sysval_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
info->uses_sysval_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);

View file

@ -70,7 +70,6 @@ struct si_shader_info {
bool uses_sample_shading:1;
bool early_fragment_tests:1;
bool post_depth_coverage:1;
bool pixel_center_integer:1;
enum gl_frag_depth_layout depth_layout:3;
} fs;
@ -126,6 +125,7 @@ struct si_shader_info {
uint8_t colors_written;
uint16_t output_color_types; /**< Each bit pair is enum si_color_output_type */
bool color0_writes_all_cbufs; /**< gl_FragColor */
bool uses_sysval_ancillary;
bool uses_sysval_sample_mask_in; /**< does fragment shader read sample mask? */
bool reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */
bool writes_z; /**< does fragment shader write Z value? */
@ -150,6 +150,7 @@ struct si_shader_info {
bool uses_sysval_primitive_id;
bool uses_sysval_front_face;
bool uses_sysval_invocation_id;
bool uses_sysval_frag_coord_w;
bool uses_atomic_ordered_add;
bool writes_psize;
bool writes_primid;
@ -172,9 +173,6 @@ struct si_shader_info {
*/
uint8_t writes_1_if_tex_is_1;
/* frag coord and sample pos per component read mask. */
uint8_t reads_frag_coord_mask;
unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
enum mesa_prim rast_prim;

View file

@ -9,19 +9,6 @@
#include "si_shader.h"
#define SI_SPI_PS_INPUT_ADDR_FOR_PROLOG ( \
S_0286D0_PERSP_SAMPLE_ENA(1) | \
S_0286D0_PERSP_CENTER_ENA(1) | \
S_0286D0_PERSP_CENTROID_ENA(1) | \
S_0286D0_LINEAR_SAMPLE_ENA(1) | \
S_0286D0_LINEAR_CENTER_ENA(1) | \
S_0286D0_LINEAR_CENTROID_ENA(1) | \
S_0286D0_LINE_STIPPLE_TEX_ENA(1) | \
S_0286D0_FRONT_FACE_ENA(1) | \
S_0286D0_ANCILLARY_ENA(1) | \
S_0286D0_SAMPLE_COVERAGE_ENA(1) | \
S_0286D0_POS_FIXED_PT_ENA(1))
struct util_debug_callback;
struct si_shader_args {
@ -159,6 +146,7 @@ void si_get_late_shader_variant_info(struct si_shader *shader, struct si_shader_
nir_shader *nir);
void si_set_spi_ps_input_config_for_separate_prolog(struct si_shader *shader);
void si_fixup_spi_ps_input_config(struct si_shader *shader);
unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel);
void si_shader_update_spi_shader_formats(struct si_shader *shader, nir_shader *nir);
#endif

View file

@ -191,7 +191,8 @@ static void si_llvm_create_main_func(struct si_shader_context *ctx)
/* Reserve register locations for VGPR inputs the PS prolog may need. */
if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->shader->is_monolithic) {
ac_llvm_add_target_dep_function_attr(
ctx->main_fn.value, "InitialPSInputAddr", SI_SPI_PS_INPUT_ADDR_FOR_PROLOG);
ctx->main_fn.value, "InitialPSInputAddr",
si_get_spi_ps_input_addr_for_prolog(shader->selector));
}
}

View file

@ -471,16 +471,11 @@ static LLVMValueRef insert_ret_of_arg(struct si_shader_context *ctx, LLVMValueRe
if (is_vgpr)
data = ac_to_float(&ctx->ac, data);
if (ctx->args->ac.args[arg_index].size == 1) {
return LLVMBuildInsertValue(ctx->ac.builder, ret, data, index, "");
} else {
assert(ctx->args->ac.args[arg_index].size == 2);
LLVMValueRef tmp = LLVMBuildExtractElement(ctx->ac.builder, data, ctx->ac.i32_0, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, index, "");
tmp = LLVMBuildExtractElement(ctx->ac.builder, data, ctx->ac.i32_1, "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, index + 1, "");
return ret;
for (unsigned i = 0; i < ctx->args->ac.args[arg_index].size; i++) {
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
ac_llvm_extract_elem(&ctx->ac, data, i), index + i, "");
}
return ret;
}
/**
@ -514,11 +509,20 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
si_llvm_create_func(ctx, "ps_prolog", return_types, num_returns, 0);
LLVMValueRef func = ctx->main_fn.value;
/* Disable elimination of unused inputs. */
ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "InitialPSInputAddr", 0xffffff);
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
*/
LLVMValueRef ret = ctx->return_value;
for (int i = 0; i < args->ac.arg_count; i++) {
/* Don't pass LINE_STIPPLE_TEX_ENA to the next shader binary because it's unused.
* This saves 1 VGPR in the prolog.
*/
if (args->ac.line_stipple_tex_ena.used && i == args->ac.line_stipple_tex_ena.arg_index)
continue;
LLVMValueRef p = LLVMGetParam(func, i);
ret = insert_ret_of_arg(ctx, ret, p, i);
}
@ -541,15 +545,22 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
bc_optimize = LLVMBuildTrunc(ctx->ac.builder, bc_optimize, ctx->ac.i1, "");
if (key->ps_prolog.states.bc_optimize_for_persp) {
assert(key->ps_prolog.uses_persp_centroid);
center = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.persp_center));
centroid = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.persp_centroid));
/* Select PERSP_CENTROID. */
tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center, centroid, "");
ret = insert_ret_of_arg(ctx, ret, tmp, args->ac.persp_centroid.arg_index);
}
if (key->ps_prolog.states.bc_optimize_for_linear) {
assert(key->ps_prolog.uses_linear_centroid);
center = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.linear_center));
centroid = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.linear_centroid));
/* Select PERSP_CENTROID. */
tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center, centroid, "");
ret = insert_ret_of_arg(ctx, ret, tmp, args->ac.linear_centroid.arg_index);
@ -561,15 +572,21 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
LLVMValueRef persp_sample = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.persp_sample));
/* Overwrite PERSP_CENTER. */
ret = insert_ret_of_arg(ctx, ret, persp_sample, args->ac.persp_center.arg_index);
/* Overwrite PERSP_CENTROID. */
ret = insert_ret_of_arg(ctx, ret, persp_sample, args->ac.persp_centroid.arg_index);
if (key->ps_prolog.uses_persp_centroid) {
/* Overwrite PERSP_CENTROID. */
ret = insert_ret_of_arg(ctx, ret, persp_sample, args->ac.persp_centroid.arg_index);
}
}
if (key->ps_prolog.states.force_linear_sample_interp) {
LLVMValueRef linear_sample = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.linear_sample));
/* Overwrite LINEAR_CENTER. */
ret = insert_ret_of_arg(ctx, ret, linear_sample, args->ac.linear_center.arg_index);
/* Overwrite LINEAR_CENTROID. */
ret = insert_ret_of_arg(ctx, ret, linear_sample, args->ac.linear_centroid.arg_index);
if (key->ps_prolog.uses_linear_centroid) {
/* Overwrite LINEAR_CENTROID. */
ret = insert_ret_of_arg(ctx, ret, linear_sample, args->ac.linear_centroid.arg_index);
}
}
/* Force center interpolation. */
@ -577,15 +594,21 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
LLVMValueRef persp_center = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.persp_center));
/* Overwrite PERSP_SAMPLE. */
ret = insert_ret_of_arg(ctx, ret, persp_center, args->ac.persp_sample.arg_index);
/* Overwrite PERSP_CENTROID. */
ret = insert_ret_of_arg(ctx, ret, persp_center, args->ac.persp_centroid.arg_index);
if (key->ps_prolog.uses_persp_centroid) {
/* Overwrite PERSP_CENTROID. */
ret = insert_ret_of_arg(ctx, ret, persp_center, args->ac.persp_centroid.arg_index);
}
}
if (key->ps_prolog.states.force_linear_center_interp) {
LLVMValueRef linear_center = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.linear_center));
/* Overwrite LINEAR_SAMPLE. */
ret = insert_ret_of_arg(ctx, ret, linear_center, args->ac.linear_sample.arg_index);
/* Overwrite LINEAR_CENTROID. */
ret = insert_ret_of_arg(ctx, ret, linear_center, args->ac.linear_centroid.arg_index);
if (key->ps_prolog.uses_linear_centroid) {
/* Overwrite LINEAR_CENTROID. */
ret = insert_ret_of_arg(ctx, ret, linear_center, args->ac.linear_centroid.arg_index);
}
}
/* Interpolate colors. */
@ -597,11 +620,12 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
if (!writemask)
continue;
/* If the interpolation qualifier is not CONSTANT (-1). */
LLVMValueRef interp_ij = NULL;
if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
unsigned index =
args->ac.num_sgprs_used + key->ps_prolog.color_interp_vgpr_index[i];
if (key->ps_prolog.color_interp[i] != AC_COLOR_INTERP_FLAT) {
unsigned index = ac_get_color_interp_arg(&args->ac, key->ps_prolog.color_interp[i]);
index = args->ac.num_sgprs_used + args->ac.args[index].offset;
/* Get the (i,j) updated by bc_optimize handling. */
LLVMValueRef interp[2] = {
@ -677,28 +701,6 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
args->ac.sample_coverage.arg_index);
}
if (key->ps_prolog.states.get_frag_coord_from_pixel_coord) {
LLVMValueRef pixel_coord = ac_get_arg(&ctx->ac, args->ac.pos_fixed_pt);
pixel_coord = LLVMBuildBitCast(ctx->ac.builder, pixel_coord, ctx->ac.v2i16, "");
pixel_coord = LLVMBuildUIToFP(ctx->ac.builder, pixel_coord, ctx->ac.v2f32, "");
if (!key->ps_prolog.pixel_center_integer) {
LLVMValueRef vec2_half = LLVMConstVector((LLVMValueRef[]){LLVMConstReal(ctx->ac.f32, 0.5),
LLVMConstReal(ctx->ac.f32, 0.5)}, 2);
pixel_coord = LLVMBuildFAdd(ctx->ac.builder, pixel_coord, vec2_half, "");
}
for (unsigned i = 0; i < 2; i++) {
if (!args->ac.frag_pos[i].used)
continue;
ret = insert_ret_of_arg(ctx, ret,
LLVMBuildExtractElement(ctx->ac.builder, pixel_coord,
LLVMConstInt(ctx->ac.i32, i, 0), ""),
args->ac.frag_pos[i].arg_index);
}
}
/* Tell LLVM to insert WQM instruction sequence when needed. */
if (key->ps_prolog.wqm) {
LLVMAddTargetDependentFunctionAttr(func, "amdgpu-ps-wqm-outputs", "");

View file

@ -37,7 +37,7 @@ void si_shader_update_spi_shader_formats(struct si_shader *shader, nir_shader *n
for (i = 0; i < num_targets; i++) {
unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
if (spi_format && (colors_written & 1u << num_mrts)) {
if (spi_format && (colors_written & 1u << i)) {
value |= spi_format << (num_mrts * 4);
num_mrts++;
}
@ -390,7 +390,7 @@ void si_get_shader_variant_info(struct si_shader *shader,
* Reserve register locations for VGPR inputs the PS prolog may need.
*/
shader->config.spi_ps_input_addr = shader->config.spi_ps_input_ena |
SI_SPI_PS_INPUT_ADDR_FOR_PROLOG;
si_get_spi_ps_input_addr_for_prolog(shader->selector);
}
}
@ -580,12 +580,6 @@ void si_set_spi_ps_input_config_for_separate_prolog(struct si_shader *shader)
/* The sample mask fixup has an optimization that replaces the sample mask with the sample ID. */
if (key->ps.part.prolog.samplemask_log_ps_iter == 3)
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
if (key->ps.part.prolog.get_frag_coord_from_pixel_coord) {
shader->config.spi_ps_input_ena &= C_0286CC_POS_X_FLOAT_ENA;
shader->config.spi_ps_input_ena &= C_0286CC_POS_Y_FLOAT_ENA;
shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
}
}
void si_fixup_spi_ps_input_config(struct si_shader *shader)
@ -613,3 +607,51 @@ void si_fixup_spi_ps_input_config(struct si_shader *shader)
shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
}
}
unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel)
{
unsigned spi_ps_input_addr = S_0286D0_PERSP_SAMPLE_ENA(1) |
S_0286D0_PERSP_CENTER_ENA(1) |
S_0286D0_FRONT_FACE_ENA(1) |
S_0286D0_POS_FIXED_PT_ENA(1);
/* This includes color interpolation at centroid even if the main shader part doesn't
* use the barycentrics. The PS prolog can still use them.
*/
if (sel->info.uses_sysval_persp_centroid)
spi_ps_input_addr |= S_0286D0_PERSP_CENTROID_ENA(1);
/* The PS prolog can change one to the other, so we need both. */
if (sel->info.uses_sysval_linear_sample ||
sel->info.uses_sysval_linear_center) {
spi_ps_input_addr |= S_0286D0_LINEAR_SAMPLE_ENA(1) |
S_0286D0_LINEAR_CENTER_ENA(1);
}
if (sel->info.uses_sysval_linear_centroid)
spi_ps_input_addr |= S_0286D0_LINEAR_CENTROID_ENA(1);
/* If barycentrics and pos.w aren't used, we may need LINE_STIPPLE_TEX_ENA as the filler
* input VGPR. See si_fixup_spi_ps_input_config for more information.
*/
if (!sel->info.uses_sysval_persp_sample &&
!sel->info.uses_sysval_persp_center &&
!sel->info.uses_sysval_persp_centroid &&
!sel->info.uses_sysval_linear_sample &&
!sel->info.uses_sysval_linear_center &&
!sel->info.uses_sysval_linear_centroid &&
!sel->info.uses_interp_color &&
!sel->info.uses_sysval_frag_coord_w &&
/* We don't set LINE_STIPPLE_TEX_ENA with LLVM, and never on GFX12. */
sel->info.base.use_aco_amd &&
sel->screen->info.gfx_level != GFX12)
spi_ps_input_addr |= S_0286D0_LINE_STIPPLE_TEX_ENA(1);
if (sel->info.uses_sysval_ancillary)
spi_ps_input_addr |= S_0286D0_ANCILLARY_ENA(1);
if (sel->info.uses_sysval_sample_mask_in)
spi_ps_input_addr |= S_0286D0_SAMPLE_COVERAGE_ENA(1);
return spi_ps_input_addr;
}

View file

@ -2424,8 +2424,6 @@ void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *s
uses_persp_center && uses_persp_centroid;
key->ps.part.prolog.bc_optimize_for_linear =
sel->info.uses_sysval_linear_center && sel->info.uses_sysval_linear_centroid;
key->ps.part.prolog.get_frag_coord_from_pixel_coord =
!sel->info.base.fs.uses_sample_shading && sel->info.reads_frag_coord_mask & 0x3;
key->ps.part.prolog.force_samplemask_to_helper_invocation = 0;
key->ps.mono.force_mono = 0;
key->ps.mono.interpolate_at_sample_force_center = 0;
@ -2444,8 +2442,6 @@ void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *s
sel->info.uses_sysval_linear_sample > 1;
key->ps.part.prolog.bc_optimize_for_persp = 0;
key->ps.part.prolog.bc_optimize_for_linear = 0;
key->ps.part.prolog.get_frag_coord_from_pixel_coord =
!!(sel->info.reads_frag_coord_mask & 0x3);
key->ps.part.prolog.force_samplemask_to_helper_invocation = sel->info.uses_sysval_sample_mask_in;
key->ps.mono.force_mono = 0;
key->ps.mono.interpolate_at_sample_force_center = sel->info.uses_interp_at_sample;