diff --git a/src/amd/common/ac_shader_args.c b/src/amd/common/ac_shader_args.c index 6246347972e..5f4cb6ea3e4 100644 --- a/src/amd/common/ac_shader_args.c +++ b/src/amd/common/ac_shader_args.c @@ -82,3 +82,35 @@ void ac_compact_ps_vgpr_args(struct ac_shader_args *info, uint32_t spi_ps_input) info->num_vgprs_used = vgpr_reg; } + +unsigned +ac_get_color_interp_arg(const struct ac_shader_args *args, enum ac_color_interp interp) +{ + struct ac_arg arg; + + switch (interp) { + case AC_COLOR_INTERP_PERSP_SAMPLE: + arg = args->persp_sample; + break; + case AC_COLOR_INTERP_PERSP_CENTER: + arg = args->persp_center; + break; + case AC_COLOR_INTERP_PERSP_CENTROID: + arg = args->persp_centroid; + break; + case AC_COLOR_INTERP_LINEAR_SAMPLE: + arg = args->linear_sample; + break; + case AC_COLOR_INTERP_LINEAR_CENTER: + arg = args->linear_center; + break; + case AC_COLOR_INTERP_LINEAR_CENTROID: + arg = args->linear_centroid; + break; + default: + UNREACHABLE("unexpected interp mode"); + } + + assert(arg.used); + return arg.arg_index; +} diff --git a/src/amd/common/ac_shader_args.h b/src/amd/common/ac_shader_args.h index 64675c03854..9654f3550c4 100644 --- a/src/amd/common/ac_shader_args.h +++ b/src/amd/common/ac_shader_args.h @@ -10,6 +10,10 @@ #include #include +#ifdef __cplusplus +extern "C" { +#endif + /* Maximum dwords of inline push constants when the indirect path is still used */ #define AC_MAX_INLINE_PUSH_CONSTS_WITH_INDIRECT 8 /* Maximum dwords of inline push constants when the indirect path is not used */ @@ -27,6 +31,16 @@ enum ac_arg_type AC_ARG_CONST_ADDR, }; +enum ac_color_interp { + AC_COLOR_INTERP_FLAT, + AC_COLOR_INTERP_PERSP_SAMPLE, + AC_COLOR_INTERP_PERSP_CENTER, + AC_COLOR_INTERP_PERSP_CENTROID, + AC_COLOR_INTERP_LINEAR_SAMPLE, + AC_COLOR_INTERP_LINEAR_CENTER, + AC_COLOR_INTERP_LINEAR_CENTROID, +}; + struct ac_arg { uint16_t arg_index; bool used; @@ -163,6 +177,7 @@ struct ac_shader_args { struct ac_arg linear_sample; struct ac_arg linear_center; struct ac_arg linear_centroid; + struct ac_arg line_stipple_tex_ena; struct ac_arg pos_fixed_pt; /* CS */ @@ -204,5 +219,10 @@ void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsign void ac_add_return(struct ac_shader_args *info, enum ac_arg_regfile regfile); void ac_add_preserved(struct ac_shader_args *info, const struct ac_arg *arg); void ac_compact_ps_vgpr_args(struct ac_shader_args *info, uint32_t spi_ps_input); +unsigned ac_get_color_interp_arg(const struct ac_shader_args *args, enum ac_color_interp interp); + +#ifdef __cplusplus +} +#endif #endif diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 98da14565b4..a1d59de0eb6 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -1511,3 +1511,96 @@ retry_select_mode: max_out_vertices <= max_workgroup_size && out->hw_max_esverts >= min_esverts; } + +/* Print SPI_PS_INPUT_ADDR as follows: + * v[0:1] = PERSP_SAMPLE + * v[2:3] = PERSP_CENTER + * v[4:5] = LINEAR_SAMPLE + * v[6:7] = LINEAR_CENTER + * v8 = LINE_STIPPLE_TEX + * v9 = FRONT_FACE + * v10 = ANCILLARY + * v11 = SAMPLE_COVERAGE + * v12 = POS_FIXED_PT + */ +void +ac_print_spi_ps_input_vgpr_list(uint32_t spi_ps_input_ena, uint32_t spi_ps_input_addr, FILE *f) +{ + unsigned vgpr = 0; + +#define PRINT_PS_INPUT_VGPR(count, name) do { \ + if (G_0286CC_##name##_ENA(spi_ps_input_addr)) { \ + bool enabled = G_0286CC_##name##_ENA(spi_ps_input_ena); \ + if (count > 1) \ + fprintf(f, " v[%u:%u] = %s%s\n", vgpr, vgpr + count - 1, #name, \ + enabled ? " === initialized ===" : ""); \ + else \ + fprintf(f, " v%u = %s\n", vgpr, #name); \ + vgpr += count; \ + } \ +} while (0) + + PRINT_PS_INPUT_VGPR(2, PERSP_SAMPLE); + PRINT_PS_INPUT_VGPR(2, PERSP_CENTER); + PRINT_PS_INPUT_VGPR(2, PERSP_CENTROID); + PRINT_PS_INPUT_VGPR(3, PERSP_PULL_MODEL); + PRINT_PS_INPUT_VGPR(2, LINEAR_SAMPLE); + PRINT_PS_INPUT_VGPR(2, LINEAR_CENTER); + PRINT_PS_INPUT_VGPR(2, LINEAR_CENTROID); + PRINT_PS_INPUT_VGPR(1, LINE_STIPPLE_TEX); + PRINT_PS_INPUT_VGPR(1, POS_X_FLOAT); + PRINT_PS_INPUT_VGPR(1, POS_Y_FLOAT); + PRINT_PS_INPUT_VGPR(1, POS_Z_FLOAT); + PRINT_PS_INPUT_VGPR(1, POS_W_FLOAT); + PRINT_PS_INPUT_VGPR(1, FRONT_FACE); + PRINT_PS_INPUT_VGPR(1, ANCILLARY); + PRINT_PS_INPUT_VGPR(1, SAMPLE_COVERAGE); + PRINT_PS_INPUT_VGPR(1, POS_FIXED_PT); +#undef PRINT_PS_INPUT_VGPR +} + +static const char * +get_spi_shader_format(unsigned format) +{ + switch (format) { +#define PS_FORMAT(name) case V_028714_SPI_SHADER_##name: return #name; + PS_FORMAT(ZERO) + PS_FORMAT(32_R) + PS_FORMAT(32_GR) + PS_FORMAT(32_AR) + PS_FORMAT(FP16_ABGR) + PS_FORMAT(UNORM16_ABGR) + PS_FORMAT(SNORM16_ABGR) + PS_FORMAT(UINT16_ABGR) + PS_FORMAT(SINT16_ABGR) + PS_FORMAT(32_ABGR) +#undef PS_FORMAT + default: + UNREACHABLE("invalid export format"); + } +} + +/* Print (example): + * mrt0 = FP16_ABGR + * mrt1 = 32_R + */ +void +ac_print_spi_ps_shader_col_format(uint32_t spi_shader_col_format, FILE *f) +{ + for (unsigned i = 0; i < 8; i++) { + unsigned format = (spi_shader_col_format >> (i * 4)) & 0xf; + + if (format) + fprintf(f, " mrt%u = %s\n", i, get_spi_shader_format(format)); + } +} + +/* Print (example): + * mrtz = 32_R + */ +void +ac_print_spi_ps_shader_z_format(uint32_t spi_shader_z_format, FILE *f) +{ + if (spi_shader_z_format) + fprintf(f, " mrtz = %s\n", get_spi_shader_format(spi_shader_z_format)); +} diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index 77069fdb099..6a622c6d6c1 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -343,6 +343,12 @@ ac_ngg_compute_subgroup_info(enum amd_gfx_level gfx_level, mesa_shader_stage es_ unsigned ngg_lds_scratch_size, bool tess_turns_off_ngg, unsigned max_esgs_lds_padding, ac_ngg_subgroup_info *out); +void +ac_print_spi_ps_input_vgpr_list(uint32_t spi_ps_input_ena, uint32_t spi_ps_input_addr, FILE *f); + +void ac_print_spi_ps_shader_col_format(uint32_t spi_shader_col_format, FILE *f); +void ac_print_spi_ps_shader_z_format(uint32_t spi_shader_z_format, FILE *f); + static unsigned inline ac_shader_get_lds_alloc_granularity(enum amd_gfx_level gfx_level) { diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index 56f230b63b6..e2819a508d9 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -91,15 +91,15 @@ struct aco_ps_prolog_info { bool force_linear_sample_interp; bool force_persp_center_interp; bool force_linear_center_interp; + bool uses_persp_centroid; + bool uses_linear_centroid; unsigned samplemask_log_ps_iter; - bool get_frag_coord_from_pixel_coord; - bool pixel_center_integer; bool force_samplemask_to_helper_invocation; unsigned num_interp_inputs; unsigned colors_read; - int color_interp_vgpr_index[2]; - int color_attr_index[2]; + uint8_t color_attr_index[2]; + enum ac_color_interp color_interp[2]; bool color_two_side; bool needs_wqm; diff --git a/src/amd/compiler/instruction_selection/aco_select_ps_prolog.cpp b/src/amd/compiler/instruction_selection/aco_select_ps_prolog.cpp index 927df3c8eca..fa26a0bd624 100644 --- a/src/amd/compiler/instruction_selection/aco_select_ps_prolog.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_ps_prolog.cpp @@ -64,6 +64,8 @@ overwrite_interp_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo) cond = bool_to_vector_condition(ctx, cond); if (finfo->bc_optimize_for_persp) { + assert(finfo->uses_persp_centroid); + Temp center = get_arg(ctx, ctx->args->persp_center); Temp centroid = get_arg(ctx, ctx->args->persp_centroid); @@ -73,6 +75,8 @@ overwrite_interp_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo) } if (finfo->bc_optimize_for_linear) { + assert(finfo->uses_linear_centroid); + Temp center = get_arg(ctx, ctx->args->linear_center); Temp centroid = get_arg(ctx, ctx->args->linear_centroid); @@ -85,25 +89,29 @@ overwrite_interp_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo) if (finfo->force_persp_sample_interp) { Temp persp_sample = get_arg(ctx, ctx->args->persp_sample); ctx->arg_temps[ctx->args->persp_center.arg_index] = persp_sample; - ctx->arg_temps[ctx->args->persp_centroid.arg_index] = persp_sample; + if (finfo->uses_persp_centroid) + ctx->arg_temps[ctx->args->persp_centroid.arg_index] = persp_sample; } if (finfo->force_linear_sample_interp) { Temp linear_sample = get_arg(ctx, ctx->args->linear_sample); ctx->arg_temps[ctx->args->linear_center.arg_index] = linear_sample; - ctx->arg_temps[ctx->args->linear_centroid.arg_index] = linear_sample; + if (finfo->uses_linear_centroid) + ctx->arg_temps[ctx->args->linear_centroid.arg_index] = linear_sample; } if (finfo->force_persp_center_interp) { Temp persp_center = get_arg(ctx, ctx->args->persp_center); ctx->arg_temps[ctx->args->persp_sample.arg_index] = persp_center; - ctx->arg_temps[ctx->args->persp_centroid.arg_index] = persp_center; + if (finfo->uses_persp_centroid) + ctx->arg_temps[ctx->args->persp_centroid.arg_index] = persp_center; } if (finfo->force_linear_center_interp) { Temp linear_center = get_arg(ctx, ctx->args->linear_center); ctx->arg_temps[ctx->args->linear_sample.arg_index] = linear_center; - ctx->arg_temps[ctx->args->linear_centroid.arg_index] = linear_center; + if (finfo->uses_linear_centroid) + ctx->arg_temps[ctx->args->linear_centroid.arg_index] = linear_center; } } @@ -167,32 +175,6 @@ overwrite_samplemask_arg(isel_context* ctx, const struct aco_ps_prolog_info* fin is_helper_invoc); } } -void -overwrite_pos_xy_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo) -{ - if (!finfo->get_frag_coord_from_pixel_coord) - return; - - Builder bld(ctx->program, ctx->block); - Temp pos_fixed_pt = get_arg(ctx, ctx->args->pos_fixed_pt); - - for (unsigned i = 0; i < 2; i++) { - if (!ctx->args->frag_pos[i].used) - continue; - - Temp t; - if (i) - t = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(16), pos_fixed_pt); - else - t = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0xffff), pos_fixed_pt); - - t = bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), t); - if (!finfo->pixel_center_integer) - t = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000 /*0.5*/), t); - - ctx->arg_temps[ctx->args->frag_pos[i].arg_index] = t; - } -} void passthrough_all_args(isel_context* ctx, std::vector& regs) @@ -200,12 +182,20 @@ passthrough_all_args(isel_context* ctx, std::vector& regs) struct ac_arg arg; arg.used = true; - for (arg.arg_index = 0; arg.arg_index < ctx->args->arg_count; arg.arg_index++) + for (arg.arg_index = 0; arg.arg_index < ctx->args->arg_count; arg.arg_index++) { + /* Don't pass LINE_STIPPLE_TEX_ENA to the next shader binary because it's unused. + * This saves 1 VGPR in the prolog. + */ + if (ctx->args->line_stipple_tex_ena.used && + arg.arg_index == ctx->args->line_stipple_tex_ena.arg_index) + continue; + regs.emplace_back(Operand(get_arg(ctx, arg), get_arg_reg(ctx->args, arg))); + } } Temp -get_interp_color(isel_context* ctx, int interp_vgpr, unsigned attr_index, unsigned comp) +get_interp_color(isel_context* ctx, int interp_arg, unsigned attr_index, unsigned comp) { Builder bld(ctx->program, ctx->block); @@ -213,10 +203,8 @@ get_interp_color(isel_context* ctx, int interp_vgpr, unsigned attr_index, unsign Temp prim_mask = get_arg(ctx, ctx->args->prim_mask); - if (interp_vgpr != -1) { - /* interp args are all 2 vgprs */ - int arg_index = ctx->args->persp_sample.arg_index + interp_vgpr / 2; - Temp interp_ij = ctx->arg_temps[arg_index]; + if (interp_arg != -1) { + Temp interp_ij = ctx->arg_temps[interp_arg]; emit_interp_instr(ctx, attr_index, comp, interp_ij, dst, prim_mask, false); } else { @@ -245,7 +233,9 @@ interpolate_color_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo u_foreach_bit (i, finfo->colors_read) { unsigned color_index = i / 4; unsigned front_index = finfo->color_attr_index[color_index]; - int interp_vgpr = finfo->color_interp_vgpr_index[color_index]; + int interp_arg = finfo->color_interp[color_index] == AC_COLOR_INTERP_FLAT + ? -1 + : ac_get_color_interp_arg(ctx->args, finfo->color_interp[color_index]); /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1", * otherwise it's at offset "num_inputs". @@ -254,8 +244,8 @@ interpolate_color_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo if (color_index == 1 && finfo->colors_read & 0xf) back_index++; - Temp front = get_interp_color(ctx, interp_vgpr, front_index, i % 4); - Temp back = get_interp_color(ctx, interp_vgpr, back_index, i % 4); + Temp front = get_interp_color(ctx, interp_arg, front_index, i % 4); + Temp back = get_interp_color(ctx, interp_arg, back_index, i % 4); Temp color = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), back, front, is_face_positive); @@ -266,8 +256,10 @@ interpolate_color_args(isel_context* ctx, const struct aco_ps_prolog_info* finfo u_foreach_bit (i, finfo->colors_read) { unsigned color_index = i / 4; unsigned attr_index = finfo->color_attr_index[color_index]; - int interp_vgpr = finfo->color_interp_vgpr_index[color_index]; - Temp color = get_interp_color(ctx, interp_vgpr, attr_index, i % 4); + int interp_arg = finfo->color_interp[color_index] == AC_COLOR_INTERP_FLAT + ? -1 + : ac_get_color_interp_arg(ctx->args, finfo->color_interp[color_index]); + Temp color = get_interp_color(ctx, interp_arg, attr_index, i % 4); regs.emplace_back(Operand(color, PhysReg{vgpr++})); } @@ -295,7 +287,6 @@ select_ps_prolog(Program* program, void* pinfo, ac_shader_config* config, overwrite_interp_args(&ctx, finfo); overwrite_samplemask_arg(&ctx, finfo); - overwrite_pos_xy_args(&ctx, finfo); std::vector regs; passthrough_all_args(&ctx, regs); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index c66ee7aa5ff..183e4228abf 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -342,7 +342,7 @@ declare_ps_input_vgprs(struct radv_shader_args_state *state, const struct radv_s RADV_ADD_ARG(state, AC_ARG_VGPR, 2, AC_ARG_VALUE, ac.linear_sample); RADV_ADD_ARG(state, AC_ARG_VGPR, 2, AC_ARG_VALUE, ac.linear_center); RADV_ADD_ARG(state, AC_ARG_VGPR, 2, AC_ARG_VALUE, ac.linear_centroid); - RADV_ADD_NULL_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE); /* line stipple tex */ + RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.line_stipple_tex_ena); RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.frag_pos, 0); RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.frag_pos, 1); RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.frag_pos, 2); diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader.c b/src/gallium/drivers/radeonsi/gfx/si_shader.c index f3e8e1ac95c..55c02360d69 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader.c @@ -13,7 +13,6 @@ #include "nir_xfb_info.h" #include "si_pipe.h" #include "si_shader_internal.h" -#include "pipe/p_shader_tokens.h" static void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader); @@ -853,11 +852,11 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx) .optimize_frag_coord = true, .frag_coord_is_center = true, /* This does a lot of things. See the description in ac_nir_lower_ps_early_options. */ - .ps_iter_samples = key->ps.part.prolog.samplemask_log_ps_iter ? - (1 << key->ps.part.prolog.samplemask_log_ps_iter) : - (key->ps.part.prolog.force_persp_sample_interp || - key->ps.part.prolog.force_linear_sample_interp ? 2 : - (key->ps.part.prolog.get_frag_coord_from_pixel_coord ? 1 : 0)), + .ps_iter_samples = nir->info.fs.uses_sample_shading ? 8 : + key->ps.part.prolog.samplemask_log_ps_iter ? + (1 << key->ps.part.prolog.samplemask_log_ps_iter) : + (key->ps.part.prolog.force_persp_sample_interp || + key->ps.part.prolog.force_linear_sample_interp ? 2 : 0), .fbfetch_is_1D = key->ps.mono.fbfetch_is_1D, .fbfetch_layered = key->ps.mono.fbfetch_layered, @@ -892,6 +891,7 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx) ac_nir_lower_ps_early_options early_options = { .optimize_frag_coord = true, .frag_coord_is_center = true, + .ps_iter_samples = nir->info.fs.uses_sample_shading ? 8 : 0, .lower_color_inputs_to_load_color01 = true, .alpha_func = COMPARE_FUNC_ALWAYS, .spi_shader_col_format_hint = ~0, @@ -1742,15 +1742,28 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_ key->ps_prolog.states.force_linear_center_interp || key->ps_prolog.states.bc_optimize_for_persp || key->ps_prolog.states.bc_optimize_for_linear || key->ps_prolog.states.samplemask_log_ps_iter || - key->ps_prolog.states.get_frag_coord_from_pixel_coord || key->ps_prolog.states.force_samplemask_to_helper_invocation); + key->ps_prolog.uses_persp_centroid = + G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */ + /* The PS prolog can change one to the other, so we need both or neither to be set. */ + assert(G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr) == + G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr)); + key->ps_prolog.uses_linear_sample_and_center = + G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr) || /* addr because the PS prolog may use it */ + G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr); + key->ps_prolog.uses_linear_centroid = + G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */ + key->ps_prolog.reserve_line_stipple_tex_ena = + G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr); /* unused but may need to be reserved */ key->ps_prolog.fragcoord_usage_mask = G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_ena) | (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_ena) << 1) | (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_ena) << 2) | (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) << 3); - key->ps_prolog.pixel_center_integer = key->ps_prolog.fragcoord_usage_mask && - shader->selector->info.base.fs.pixel_center_integer; + key->ps_prolog.uses_ancillary = + G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */ + key->ps_prolog.uses_sample_coverage = + G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr); /* addr because the PS prolog may use it */ if (shader->key.ps.part.prolog.poly_stipple) shader->info.uses_vmem_load_other = true; @@ -1778,27 +1791,27 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_ switch (interp) { case INTERP_MODE_FLAT: - key->ps_prolog.color_interp_vgpr_index[i] = -1; + key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_FLAT; break; case INTERP_MODE_SMOOTH: case INTERP_MODE_COLOR: /* Force the interpolation location for colors here. */ if (shader->key.ps.part.prolog.force_persp_sample_interp) - location = TGSI_INTERPOLATE_LOC_SAMPLE; + location = SI_INTERPOLATE_LOC_SAMPLE; if (shader->key.ps.part.prolog.force_persp_center_interp) - location = TGSI_INTERPOLATE_LOC_CENTER; + location = SI_INTERPOLATE_LOC_CENTER; switch (location) { - case TGSI_INTERPOLATE_LOC_SAMPLE: - key->ps_prolog.color_interp_vgpr_index[i] = 0; + case SI_INTERPOLATE_LOC_SAMPLE: + key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_PERSP_SAMPLE; shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1); break; - case TGSI_INTERPOLATE_LOC_CENTER: - key->ps_prolog.color_interp_vgpr_index[i] = 2; + case SI_INTERPOLATE_LOC_CENTER: + key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_PERSP_CENTER; shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1); break; - case TGSI_INTERPOLATE_LOC_CENTROID: - key->ps_prolog.color_interp_vgpr_index[i] = 4; + case SI_INTERPOLATE_LOC_CENTROID: + key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_PERSP_CENTROID; shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTROID_ENA(1); break; default: @@ -1808,25 +1821,25 @@ static void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_ case INTERP_MODE_NOPERSPECTIVE: /* Force the interpolation location for colors here. */ if (shader->key.ps.part.prolog.force_linear_sample_interp) - location = TGSI_INTERPOLATE_LOC_SAMPLE; + location = SI_INTERPOLATE_LOC_SAMPLE; if (shader->key.ps.part.prolog.force_linear_center_interp) - location = TGSI_INTERPOLATE_LOC_CENTER; + location = SI_INTERPOLATE_LOC_CENTER; /* The VGPR assignment for non-monolithic shaders * works because InitialPSInputAddr is set on the * main shader and PERSP_PULL_MODEL is never used. */ switch (location) { - case TGSI_INTERPOLATE_LOC_SAMPLE: - key->ps_prolog.color_interp_vgpr_index[i] = 6; + case SI_INTERPOLATE_LOC_SAMPLE: + key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_LINEAR_SAMPLE; shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1); break; - case TGSI_INTERPOLATE_LOC_CENTER: - key->ps_prolog.color_interp_vgpr_index[i] = 8; + case SI_INTERPOLATE_LOC_CENTER: + key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_LINEAR_CENTER; shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1); break; - case TGSI_INTERPOLATE_LOC_CENTROID: - key->ps_prolog.color_interp_vgpr_index[i] = 10; + case SI_INTERPOLATE_LOC_CENTROID: + key->ps_prolog.color_interp[i] = AC_COLOR_INTERP_LINEAR_CENTROID; shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTROID_ENA(1); break; default: @@ -1852,7 +1865,6 @@ static bool si_need_ps_prolog(const union si_shader_part_key *key) key->ps_prolog.states.bc_optimize_for_persp || key->ps_prolog.states.bc_optimize_for_linear || key->ps_prolog.states.poly_stipple || key->ps_prolog.states.samplemask_log_ps_iter || - key->ps_prolog.states.get_frag_coord_from_pixel_coord || key->ps_prolog.states.force_samplemask_to_helper_invocation; } diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader.h b/src/gallium/drivers/radeonsi/gfx/si_shader.h index d8051647363..3eacd7c0b58 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader.h +++ b/src/gallium/drivers/radeonsi/gfx/si_shader.h @@ -234,6 +234,12 @@ enum SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */ }; +enum { + SI_INTERPOLATE_LOC_CENTER, + SI_INTERPOLATE_LOC_CENTROID, + SI_INTERPOLATE_LOC_SAMPLE, +}; + /* These fields are only set in current_vs_state (except INDEXED) in si_context, and they are * accessible in the shader via vs_state_bits in VS, TES, and GS. */ @@ -536,7 +542,6 @@ struct si_ps_prolog_bits { unsigned bc_optimize_for_persp : 1; unsigned bc_optimize_for_linear : 1; unsigned samplemask_log_ps_iter : 2; - unsigned get_frag_coord_from_pixel_coord : 1; unsigned force_samplemask_to_helper_invocation : 1; }; @@ -565,11 +570,16 @@ union si_shader_part_key { /* Color interpolation and two-side color selection. */ unsigned colors_read : 8; /* color input components read */ unsigned num_interp_inputs : 5; /* BCOLOR is at this location */ + unsigned uses_persp_centroid : 1; + unsigned uses_linear_sample_and_center : 1; + unsigned uses_linear_centroid : 1; + unsigned reserve_line_stipple_tex_ena : 1; /* only reserve the VGPR, don't use it */ unsigned fragcoord_usage_mask : 4; - unsigned pixel_center_integer : 1; + unsigned uses_ancillary : 1; + unsigned uses_sample_coverage : 1; unsigned wqm : 1; - char color_attr_index[2]; - signed char color_interp_vgpr_index[2]; /* -1 == constant */ + uint8_t color_attr_index[2]; + uint8_t color_interp[2]; /* AC_COLOR_INTERP_* */ } ps_prolog; struct { struct si_ps_epilog_bits states; diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_aco.c b/src/gallium/drivers/radeonsi/gfx/si_shader_aco.c index 942cd76a18d..bae3a4fea5d 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_aco.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_aco.c @@ -253,15 +253,15 @@ si_aco_build_ps_prolog(struct aco_compiler_options *options, .force_linear_sample_interp = key->ps_prolog.states.force_linear_sample_interp, .force_persp_center_interp = key->ps_prolog.states.force_persp_center_interp, .force_linear_center_interp = key->ps_prolog.states.force_linear_center_interp, + .uses_persp_centroid = key->ps_prolog.uses_persp_centroid, + .uses_linear_centroid = key->ps_prolog.uses_linear_centroid, .samplemask_log_ps_iter = key->ps_prolog.states.samplemask_log_ps_iter, - .get_frag_coord_from_pixel_coord = key->ps_prolog.states.get_frag_coord_from_pixel_coord, - .pixel_center_integer = key->ps_prolog.pixel_center_integer, .force_samplemask_to_helper_invocation = key->ps_prolog.states.force_samplemask_to_helper_invocation, .num_interp_inputs = key->ps_prolog.num_interp_inputs, .colors_read = key->ps_prolog.colors_read, - .color_interp_vgpr_index[0] = key->ps_prolog.color_interp_vgpr_index[0], - .color_interp_vgpr_index[1] = key->ps_prolog.color_interp_vgpr_index[1], + .color_interp[0] = key->ps_prolog.color_interp[0], + .color_interp[1] = key->ps_prolog.color_interp[1], .color_attr_index[0] = key->ps_prolog.color_attr_index[0], .color_attr_index[1] = key->ps_prolog.color_attr_index[1], .color_two_side = key->ps_prolog.states.color_two_side, diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_args.c b/src/gallium/drivers/radeonsi/gfx/si_shader_args.c index 067dffc0e79..2bf763f5b2a 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_args.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_args.c @@ -459,7 +459,8 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args, SI_PARAM_LINEAR_CENTER); si_add_arg_checked(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid, SI_PARAM_LINEAR_CENTROID); - si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, NULL, SI_PARAM_LINE_STIPPLE_TEX); + si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.line_stipple_tex_ena, + SI_PARAM_LINE_STIPPLE_TEX); si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.frag_pos[0], SI_PARAM_POS_X_FLOAT); si_add_arg_checked(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.frag_pos[1], @@ -672,20 +673,35 @@ void si_get_ps_prolog_args(struct si_shader_args *args, ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.persp_sample); ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.persp_center); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.persp_centroid); + + if (key->ps_prolog.uses_persp_centroid) + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.persp_centroid); + /* skip PERSP_PULL_MODEL */ - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_sample); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_center); - ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, NULL); /* LINE_STIPPLE_TEX */ + + if (key->ps_prolog.uses_linear_sample_and_center) { + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_sample); + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_center); + } + + if (key->ps_prolog.uses_linear_centroid) + ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_VALUE, &args->ac.linear_centroid); + + if (key->ps_prolog.reserve_line_stipple_tex_ena) + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.line_stipple_tex_ena); /* POS_X|Y|Z|W_FLOAT */ u_foreach_bit(i, key->ps_prolog.fragcoord_usage_mask) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.frag_pos[i]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.front_face); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.ancillary); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.sample_coverage); + + if (key->ps_prolog.uses_ancillary) + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.ancillary); + + if (key->ps_prolog.uses_sample_coverage) + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.sample_coverage); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_VALUE, &args->ac.pos_fixed_pt); } diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_binary.c b/src/gallium/drivers/radeonsi/gfx/si_shader_binary.c index ed87f360c8d..4c54c7f2033 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_binary.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_binary.c @@ -491,9 +491,15 @@ static void si_shader_dump_stats(struct si_screen *sscreen, struct si_shader *sh if (shader->selector->stage == MESA_SHADER_FRAGMENT) { fprintf(file, "*** SHADER CONFIG ***\n" - "SPI_PS_INPUT_ADDR = 0x%04x\n" - "SPI_PS_INPUT_ENA = 0x%04x\n", + "SPI_PS_INPUT_ADDR = 0x%04x\n" + "SPI_PS_INPUT_ENA = 0x%04x\n", conf->spi_ps_input_addr, conf->spi_ps_input_ena); + ac_print_spi_ps_input_vgpr_list(conf->spi_ps_input_ena, conf->spi_ps_input_addr, file); + + fprintf(file, "SPI_SHADER_Z_FORMAT = 0x%x\n", shader->info.spi_shader_z_format); + ac_print_spi_ps_shader_z_format(shader->info.spi_shader_z_format, file); + fprintf(file, "SPI_SHADER_COL_FORMAT = 0x%x\n", shader->info.spi_shader_col_format); + ac_print_spi_ps_shader_col_format(shader->info.spi_shader_col_format, file); } fprintf(file, @@ -601,8 +607,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) key->ps.part.prolog.bc_optimize_for_linear); fprintf(f, " prolog.samplemask_log_ps_iter = %u\n", key->ps.part.prolog.samplemask_log_ps_iter); - fprintf(f, " prolog.get_frag_coord_from_pixel_coord = %u\n", - key->ps.part.prolog.get_frag_coord_from_pixel_coord); fprintf(f, " prolog.force_samplemask_to_helper_invocation = %u\n", key->ps.part.prolog.force_samplemask_to_helper_invocation); fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_info.c b/src/gallium/drivers/radeonsi/gfx/si_shader_info.c index 3e6a5bffa91..c042c9d46ee 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_info.c @@ -7,7 +7,6 @@ #include "si_pipe.h" #include "si_shader_internal.h" #include "util/mesa-blake3.h" -#include "pipe/p_shader_tokens.h" #include "nir.h" #include "nir_tcs_info.h" #include "nir_xfb_info.h" @@ -60,39 +59,35 @@ static const nir_src *get_texture_src(nir_tex_instr *instr, nir_tex_src_type typ } static void -get_interp_info_from_input_load(nir_intrinsic_instr *intr, enum glsl_interp_mode *interp_mode, +get_color_input_interp_info(nir_intrinsic_instr *intr, enum glsl_interp_mode *interp_mode, unsigned *interp_location) { assert(nir_is_input_load(intr)); - *interp_mode = INTERP_MODE_FLAT; - *interp_location = TGSI_INTERPOLATE_LOC_CENTER; - - if (intr->intrinsic != nir_intrinsic_load_interpolated_input) + if (intr->intrinsic != nir_intrinsic_load_interpolated_input) { + *interp_mode = INTERP_MODE_FLAT; + *interp_location = SI_INTERPOLATE_LOC_CENTER; return; + } + + ASSERTED unsigned io_location = nir_intrinsic_io_semantics(intr).location; + assert(io_location == VARYING_SLOT_COL0 || io_location == VARYING_SLOT_COL1); - unsigned io_location = nir_intrinsic_io_semantics(intr).location; nir_intrinsic_instr *baryc = nir_def_as_intrinsic(intr->src[0].ssa); *interp_mode = nir_intrinsic_interp_mode(baryc); - bool is_color = io_location == VARYING_SLOT_COL0 || io_location == VARYING_SLOT_COL1; - if (*interp_mode == INTERP_MODE_NONE && is_color) + if (*interp_mode == INTERP_MODE_NONE) *interp_mode = INTERP_MODE_COLOR; switch (baryc->intrinsic) { case nir_intrinsic_load_barycentric_pixel: - *interp_location = TGSI_INTERPOLATE_LOC_CENTER; + *interp_location = SI_INTERPOLATE_LOC_CENTER; break; case nir_intrinsic_load_barycentric_centroid: - *interp_location = TGSI_INTERPOLATE_LOC_CENTROID; + *interp_location = SI_INTERPOLATE_LOC_CENTROID; break; case nir_intrinsic_load_barycentric_sample: - *interp_location = TGSI_INTERPOLATE_LOC_SAMPLE; - break; - case nir_intrinsic_load_barycentric_at_offset: - case nir_intrinsic_load_barycentric_at_sample: - assert(!is_color); - *interp_location = TGSI_INTERPOLATE_LOC_CENTER; + *interp_location = SI_INTERPOLATE_LOC_SAMPLE; break; default: UNREACHABLE("unexpected baryc intrinsic"); @@ -151,7 +146,7 @@ static void gather_io_instrinsic(const nir_shader *nir, struct si_shader_info *i enum glsl_interp_mode interp_mode; unsigned interp_location; - get_interp_info_from_input_load(intr, &interp_mode, &interp_location); + get_color_input_interp_info(intr, &interp_mode, &interp_location); /* Both flat and non-flat can occur with nir_io_mix_convergent_flat_with_interpolated, * but we want to save only the non-flat interp mode in that case. @@ -165,19 +160,19 @@ static void gather_io_instrinsic(const nir_shader *nir, struct si_shader_info *i switch (interp_mode) { case INTERP_MODE_SMOOTH: - if (interp_location == TGSI_INTERPOLATE_LOC_SAMPLE) + if (interp_location == SI_INTERPOLATE_LOC_SAMPLE) info->uses_sysval_persp_sample = true; - else if (interp_location == TGSI_INTERPOLATE_LOC_CENTROID) + else if (interp_location == SI_INTERPOLATE_LOC_CENTROID) info->uses_sysval_persp_centroid = true; - else if (interp_location == TGSI_INTERPOLATE_LOC_CENTER) + else if (interp_location == SI_INTERPOLATE_LOC_CENTER) info->uses_sysval_persp_center = true; break; case INTERP_MODE_NOPERSPECTIVE: - if (interp_location == TGSI_INTERPOLATE_LOC_SAMPLE) + if (interp_location == SI_INTERPOLATE_LOC_SAMPLE) info->uses_sysval_linear_sample = true; - else if (interp_location == TGSI_INTERPOLATE_LOC_CENTROID) + else if (interp_location == SI_INTERPOLATE_LOC_CENTROID) info->uses_sysval_linear_centroid = true; - else if (interp_location == TGSI_INTERPOLATE_LOC_CENTER) + else if (interp_location == SI_INTERPOLATE_LOC_CENTER) info->uses_sysval_linear_center = true; break; case INTERP_MODE_COLOR: @@ -185,11 +180,11 @@ static void gather_io_instrinsic(const nir_shader *nir, struct si_shader_info *i * in the rasterizer state, otherwise it will be SMOOTH. */ info->uses_interp_color = true; - if (interp_location == TGSI_INTERPOLATE_LOC_SAMPLE) + if (interp_location == SI_INTERPOLATE_LOC_SAMPLE) info->uses_persp_sample_color = true; - else if (interp_location == TGSI_INTERPOLATE_LOC_CENTROID) + else if (interp_location == SI_INTERPOLATE_LOC_CENTROID) info->uses_persp_centroid_color = true; - else if (interp_location == TGSI_INTERPOLATE_LOC_CENTER) + else if (interp_location == SI_INTERPOLATE_LOC_CENTER) info->uses_persp_center_color = true; break; case INTERP_MODE_FLAT: @@ -370,7 +365,8 @@ static void gather_instruction(const struct nir_shader *nir, struct si_shader_in info->uses_interp_at_sample = true; break; case nir_intrinsic_load_frag_coord: - info->reads_frag_coord_mask |= nir_def_components_read(&intr->def); + if (nir_def_components_read(&intr->def) & BITFIELD_BIT(3)) + info->uses_sysval_frag_coord_w = true; break; case nir_intrinsic_load_input: case nir_intrinsic_load_per_vertex_input: @@ -525,7 +521,6 @@ void si_nir_gather_info(struct si_screen *sscreen, struct nir_shader *nir, info->base.fs.uses_sample_shading = nir->info.fs.uses_sample_shading; info->base.fs.early_fragment_tests = nir->info.fs.early_fragment_tests; info->base.fs.post_depth_coverage = nir->info.fs.post_depth_coverage; - info->base.fs.pixel_center_integer = nir->info.fs.pixel_center_integer; info->base.fs.depth_layout = nir->info.fs.depth_layout; break; @@ -592,6 +587,13 @@ void si_nir_gather_info(struct si_screen *sscreen, struct nir_shader *nir, info->uses_sysval_invocation_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_INVOCATION_ID); info->uses_sysval_primitive_id = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) || nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID; + info->uses_sysval_ancillary = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LAYER_ID) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_SHADING_RATE) || + /* The PS prolog uses LAYER_ID for fbfetch. */ + (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) || + /* The PS prolog uses SAMPLE_ID for SAMPLE_MASK_IN. */ + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); info->uses_sysval_sample_mask_in = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); info->uses_sysval_linear_sample = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE); info->uses_sysval_linear_centroid = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID); diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_info.h b/src/gallium/drivers/radeonsi/gfx/si_shader_info.h index a6743228131..ef08e1d3784 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_info.h +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_info.h @@ -70,7 +70,6 @@ struct si_shader_info { bool uses_sample_shading:1; bool early_fragment_tests:1; bool post_depth_coverage:1; - bool pixel_center_integer:1; enum gl_frag_depth_layout depth_layout:3; } fs; @@ -126,6 +125,7 @@ struct si_shader_info { uint8_t colors_written; uint16_t output_color_types; /**< Each bit pair is enum si_color_output_type */ bool color0_writes_all_cbufs; /**< gl_FragColor */ + bool uses_sysval_ancillary; bool uses_sysval_sample_mask_in; /**< does fragment shader read sample mask? */ bool reads_tess_factors; /**< If TES reads TESSINNER or TESSOUTER */ bool writes_z; /**< does fragment shader write Z value? */ @@ -150,6 +150,7 @@ struct si_shader_info { bool uses_sysval_primitive_id; bool uses_sysval_front_face; bool uses_sysval_invocation_id; + bool uses_sysval_frag_coord_w; bool uses_atomic_ordered_add; bool writes_psize; bool writes_primid; @@ -172,9 +173,6 @@ struct si_shader_info { */ uint8_t writes_1_if_tex_is_1; - /* frag coord and sample pos per component read mask. */ - uint8_t reads_frag_coord_mask; - unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */ enum mesa_prim rast_prim; diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_internal.h b/src/gallium/drivers/radeonsi/gfx/si_shader_internal.h index 7b777b29019..6aff758f497 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_internal.h @@ -9,19 +9,6 @@ #include "si_shader.h" -#define SI_SPI_PS_INPUT_ADDR_FOR_PROLOG ( \ - S_0286D0_PERSP_SAMPLE_ENA(1) | \ - S_0286D0_PERSP_CENTER_ENA(1) | \ - S_0286D0_PERSP_CENTROID_ENA(1) | \ - S_0286D0_LINEAR_SAMPLE_ENA(1) | \ - S_0286D0_LINEAR_CENTER_ENA(1) | \ - S_0286D0_LINEAR_CENTROID_ENA(1) | \ - S_0286D0_LINE_STIPPLE_TEX_ENA(1) | \ - S_0286D0_FRONT_FACE_ENA(1) | \ - S_0286D0_ANCILLARY_ENA(1) | \ - S_0286D0_SAMPLE_COVERAGE_ENA(1) | \ - S_0286D0_POS_FIXED_PT_ENA(1)) - struct util_debug_callback; struct si_shader_args { @@ -159,6 +146,7 @@ void si_get_late_shader_variant_info(struct si_shader *shader, struct si_shader_ nir_shader *nir); void si_set_spi_ps_input_config_for_separate_prolog(struct si_shader *shader); void si_fixup_spi_ps_input_config(struct si_shader *shader); +unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel); void si_shader_update_spi_shader_formats(struct si_shader *shader, nir_shader *nir); #endif diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_llvm.c b/src/gallium/drivers/radeonsi/gfx/si_shader_llvm.c index 230f704cb1c..08c5c0e2c16 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_llvm.c @@ -191,7 +191,8 @@ static void si_llvm_create_main_func(struct si_shader_context *ctx) /* Reserve register locations for VGPR inputs the PS prolog may need. */ if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->shader->is_monolithic) { ac_llvm_add_target_dep_function_attr( - ctx->main_fn.value, "InitialPSInputAddr", SI_SPI_PS_INPUT_ADDR_FOR_PROLOG); + ctx->main_fn.value, "InitialPSInputAddr", + si_get_spi_ps_input_addr_for_prolog(shader->selector)); } } diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/gfx/si_shader_llvm_ps.c index 3d4625fda7a..eec0dd3824f 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_llvm_ps.c @@ -471,16 +471,11 @@ static LLVMValueRef insert_ret_of_arg(struct si_shader_context *ctx, LLVMValueRe if (is_vgpr) data = ac_to_float(&ctx->ac, data); - if (ctx->args->ac.args[arg_index].size == 1) { - return LLVMBuildInsertValue(ctx->ac.builder, ret, data, index, ""); - } else { - assert(ctx->args->ac.args[arg_index].size == 2); - LLVMValueRef tmp = LLVMBuildExtractElement(ctx->ac.builder, data, ctx->ac.i32_0, ""); - ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, index, ""); - tmp = LLVMBuildExtractElement(ctx->ac.builder, data, ctx->ac.i32_1, ""); - ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, index + 1, ""); - return ret; + for (unsigned i = 0; i < ctx->args->ac.args[arg_index].size; i++) { + ret = LLVMBuildInsertValue(ctx->ac.builder, ret, + ac_llvm_extract_elem(&ctx->ac, data, i), index + i, ""); } + return ret; } /** @@ -514,11 +509,20 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part si_llvm_create_func(ctx, "ps_prolog", return_types, num_returns, 0); LLVMValueRef func = ctx->main_fn.value; + /* Disable elimination of unused inputs. */ + ac_llvm_add_target_dep_function_attr(ctx->main_fn.value, "InitialPSInputAddr", 0xffffff); + /* Copy inputs to outputs. This should be no-op, as the registers match, * but it will prevent the compiler from overwriting them unintentionally. */ LLVMValueRef ret = ctx->return_value; for (int i = 0; i < args->ac.arg_count; i++) { + /* Don't pass LINE_STIPPLE_TEX_ENA to the next shader binary because it's unused. + * This saves 1 VGPR in the prolog. + */ + if (args->ac.line_stipple_tex_ena.used && i == args->ac.line_stipple_tex_ena.arg_index) + continue; + LLVMValueRef p = LLVMGetParam(func, i); ret = insert_ret_of_arg(ctx, ret, p, i); } @@ -541,15 +545,22 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part bc_optimize = LLVMBuildTrunc(ctx->ac.builder, bc_optimize, ctx->ac.i1, ""); if (key->ps_prolog.states.bc_optimize_for_persp) { + assert(key->ps_prolog.uses_persp_centroid); + center = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.persp_center)); centroid = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.persp_centroid)); + /* Select PERSP_CENTROID. */ tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center, centroid, ""); ret = insert_ret_of_arg(ctx, ret, tmp, args->ac.persp_centroid.arg_index); } + if (key->ps_prolog.states.bc_optimize_for_linear) { + assert(key->ps_prolog.uses_linear_centroid); + center = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.linear_center)); centroid = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.linear_centroid)); + /* Select PERSP_CENTROID. */ tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center, centroid, ""); ret = insert_ret_of_arg(ctx, ret, tmp, args->ac.linear_centroid.arg_index); @@ -561,15 +572,21 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part LLVMValueRef persp_sample = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.persp_sample)); /* Overwrite PERSP_CENTER. */ ret = insert_ret_of_arg(ctx, ret, persp_sample, args->ac.persp_center.arg_index); - /* Overwrite PERSP_CENTROID. */ - ret = insert_ret_of_arg(ctx, ret, persp_sample, args->ac.persp_centroid.arg_index); + + if (key->ps_prolog.uses_persp_centroid) { + /* Overwrite PERSP_CENTROID. */ + ret = insert_ret_of_arg(ctx, ret, persp_sample, args->ac.persp_centroid.arg_index); + } } if (key->ps_prolog.states.force_linear_sample_interp) { LLVMValueRef linear_sample = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.linear_sample)); /* Overwrite LINEAR_CENTER. */ ret = insert_ret_of_arg(ctx, ret, linear_sample, args->ac.linear_center.arg_index); - /* Overwrite LINEAR_CENTROID. */ - ret = insert_ret_of_arg(ctx, ret, linear_sample, args->ac.linear_centroid.arg_index); + + if (key->ps_prolog.uses_linear_centroid) { + /* Overwrite LINEAR_CENTROID. */ + ret = insert_ret_of_arg(ctx, ret, linear_sample, args->ac.linear_centroid.arg_index); + } } /* Force center interpolation. */ @@ -577,15 +594,21 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part LLVMValueRef persp_center = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.persp_center)); /* Overwrite PERSP_SAMPLE. */ ret = insert_ret_of_arg(ctx, ret, persp_center, args->ac.persp_sample.arg_index); - /* Overwrite PERSP_CENTROID. */ - ret = insert_ret_of_arg(ctx, ret, persp_center, args->ac.persp_centroid.arg_index); + + if (key->ps_prolog.uses_persp_centroid) { + /* Overwrite PERSP_CENTROID. */ + ret = insert_ret_of_arg(ctx, ret, persp_center, args->ac.persp_centroid.arg_index); + } } if (key->ps_prolog.states.force_linear_center_interp) { LLVMValueRef linear_center = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.linear_center)); /* Overwrite LINEAR_SAMPLE. */ ret = insert_ret_of_arg(ctx, ret, linear_center, args->ac.linear_sample.arg_index); - /* Overwrite LINEAR_CENTROID. */ - ret = insert_ret_of_arg(ctx, ret, linear_center, args->ac.linear_centroid.arg_index); + + if (key->ps_prolog.uses_linear_centroid) { + /* Overwrite LINEAR_CENTROID. */ + ret = insert_ret_of_arg(ctx, ret, linear_center, args->ac.linear_centroid.arg_index); + } } /* Interpolate colors. */ @@ -597,11 +620,12 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part if (!writemask) continue; - /* If the interpolation qualifier is not CONSTANT (-1). */ LLVMValueRef interp_ij = NULL; - if (key->ps_prolog.color_interp_vgpr_index[i] != -1) { - unsigned index = - args->ac.num_sgprs_used + key->ps_prolog.color_interp_vgpr_index[i]; + + if (key->ps_prolog.color_interp[i] != AC_COLOR_INTERP_FLAT) { + unsigned index = ac_get_color_interp_arg(&args->ac, key->ps_prolog.color_interp[i]); + + index = args->ac.num_sgprs_used + args->ac.args[index].offset; /* Get the (i,j) updated by bc_optimize handling. */ LLVMValueRef interp[2] = { @@ -677,28 +701,6 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part args->ac.sample_coverage.arg_index); } - if (key->ps_prolog.states.get_frag_coord_from_pixel_coord) { - LLVMValueRef pixel_coord = ac_get_arg(&ctx->ac, args->ac.pos_fixed_pt); - pixel_coord = LLVMBuildBitCast(ctx->ac.builder, pixel_coord, ctx->ac.v2i16, ""); - pixel_coord = LLVMBuildUIToFP(ctx->ac.builder, pixel_coord, ctx->ac.v2f32, ""); - - if (!key->ps_prolog.pixel_center_integer) { - LLVMValueRef vec2_half = LLVMConstVector((LLVMValueRef[]){LLVMConstReal(ctx->ac.f32, 0.5), - LLVMConstReal(ctx->ac.f32, 0.5)}, 2); - pixel_coord = LLVMBuildFAdd(ctx->ac.builder, pixel_coord, vec2_half, ""); - } - - for (unsigned i = 0; i < 2; i++) { - if (!args->ac.frag_pos[i].used) - continue; - - ret = insert_ret_of_arg(ctx, ret, - LLVMBuildExtractElement(ctx->ac.builder, pixel_coord, - LLVMConstInt(ctx->ac.i32, i, 0), ""), - args->ac.frag_pos[i].arg_index); - } - } - /* Tell LLVM to insert WQM instruction sequence when needed. */ if (key->ps_prolog.wqm) { LLVMAddTargetDependentFunctionAttr(func, "amdgpu-ps-wqm-outputs", ""); diff --git a/src/gallium/drivers/radeonsi/gfx/si_shader_variant_info.c b/src/gallium/drivers/radeonsi/gfx/si_shader_variant_info.c index 984bcdf46cf..e2d129057f2 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_shader_variant_info.c +++ b/src/gallium/drivers/radeonsi/gfx/si_shader_variant_info.c @@ -37,7 +37,7 @@ void si_shader_update_spi_shader_formats(struct si_shader *shader, nir_shader *n for (i = 0; i < num_targets; i++) { unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; - if (spi_format && (colors_written & 1u << num_mrts)) { + if (spi_format && (colors_written & 1u << i)) { value |= spi_format << (num_mrts * 4); num_mrts++; } @@ -390,7 +390,7 @@ void si_get_shader_variant_info(struct si_shader *shader, * Reserve register locations for VGPR inputs the PS prolog may need. */ shader->config.spi_ps_input_addr = shader->config.spi_ps_input_ena | - SI_SPI_PS_INPUT_ADDR_FOR_PROLOG; + si_get_spi_ps_input_addr_for_prolog(shader->selector); } } @@ -580,12 +580,6 @@ void si_set_spi_ps_input_config_for_separate_prolog(struct si_shader *shader) /* The sample mask fixup has an optimization that replaces the sample mask with the sample ID. */ if (key->ps.part.prolog.samplemask_log_ps_iter == 3) shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA; - - if (key->ps.part.prolog.get_frag_coord_from_pixel_coord) { - shader->config.spi_ps_input_ena &= C_0286CC_POS_X_FLOAT_ENA; - shader->config.spi_ps_input_ena &= C_0286CC_POS_Y_FLOAT_ENA; - shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1); - } } void si_fixup_spi_ps_input_config(struct si_shader *shader) @@ -613,3 +607,51 @@ void si_fixup_spi_ps_input_config(struct si_shader *shader) shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1); } } + +unsigned si_get_spi_ps_input_addr_for_prolog(struct si_shader_selector *sel) +{ + unsigned spi_ps_input_addr = S_0286D0_PERSP_SAMPLE_ENA(1) | + S_0286D0_PERSP_CENTER_ENA(1) | + S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_POS_FIXED_PT_ENA(1); + + /* This includes color interpolation at centroid even if the main shader part doesn't + * use the barycentrics. The PS prolog can still use them. + */ + if (sel->info.uses_sysval_persp_centroid) + spi_ps_input_addr |= S_0286D0_PERSP_CENTROID_ENA(1); + + /* The PS prolog can change one to the other, so we need both. */ + if (sel->info.uses_sysval_linear_sample || + sel->info.uses_sysval_linear_center) { + spi_ps_input_addr |= S_0286D0_LINEAR_SAMPLE_ENA(1) | + S_0286D0_LINEAR_CENTER_ENA(1); + } + + if (sel->info.uses_sysval_linear_centroid) + spi_ps_input_addr |= S_0286D0_LINEAR_CENTROID_ENA(1); + + /* If barycentrics and pos.w aren't used, we may need LINE_STIPPLE_TEX_ENA as the filler + * input VGPR. See si_fixup_spi_ps_input_config for more information. + */ + if (!sel->info.uses_sysval_persp_sample && + !sel->info.uses_sysval_persp_center && + !sel->info.uses_sysval_persp_centroid && + !sel->info.uses_sysval_linear_sample && + !sel->info.uses_sysval_linear_center && + !sel->info.uses_sysval_linear_centroid && + !sel->info.uses_interp_color && + !sel->info.uses_sysval_frag_coord_w && + /* We don't set LINE_STIPPLE_TEX_ENA with LLVM, and never on GFX12. */ + sel->info.base.use_aco_amd && + sel->screen->info.gfx_level != GFX12) + spi_ps_input_addr |= S_0286D0_LINE_STIPPLE_TEX_ENA(1); + + if (sel->info.uses_sysval_ancillary) + spi_ps_input_addr |= S_0286D0_ANCILLARY_ENA(1); + + if (sel->info.uses_sysval_sample_mask_in) + spi_ps_input_addr |= S_0286D0_SAMPLE_COVERAGE_ENA(1); + + return spi_ps_input_addr; +} diff --git a/src/gallium/drivers/radeonsi/gfx/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/gfx/si_state_shaders.cpp index 660c500db56..6639a815b07 100644 --- a/src/gallium/drivers/radeonsi/gfx/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/gfx/si_state_shaders.cpp @@ -2424,8 +2424,6 @@ void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *s uses_persp_center && uses_persp_centroid; key->ps.part.prolog.bc_optimize_for_linear = sel->info.uses_sysval_linear_center && sel->info.uses_sysval_linear_centroid; - key->ps.part.prolog.get_frag_coord_from_pixel_coord = - !sel->info.base.fs.uses_sample_shading && sel->info.reads_frag_coord_mask & 0x3; key->ps.part.prolog.force_samplemask_to_helper_invocation = 0; key->ps.mono.force_mono = 0; key->ps.mono.interpolate_at_sample_force_center = 0; @@ -2444,8 +2442,6 @@ void si_ps_key_update_framebuffer_rasterizer_sample_shading(struct si_context *s sel->info.uses_sysval_linear_sample > 1; key->ps.part.prolog.bc_optimize_for_persp = 0; key->ps.part.prolog.bc_optimize_for_linear = 0; - key->ps.part.prolog.get_frag_coord_from_pixel_coord = - !!(sel->info.reads_frag_coord_mask & 0x3); key->ps.part.prolog.force_samplemask_to_helper_invocation = sel->info.uses_sysval_sample_mask_in; key->ps.mono.force_mono = 0; key->ps.mono.interpolate_at_sample_force_center = sel->info.uses_interp_at_sample;