radeonsi: precompute more spi_map code

This replaces vs_output_param_offset by vs_output_ps_input_cntl,
which is easier to use.

For geometry shaders, vs_output_ps_input_cntl is stored in the GS si_shader
structure, not gs_copy_shader. This requires that gs_copy_shader compilation
is finished before the GS main shader part, so that GS can initialize
vs_output_ps_input_cntl using the compiled GS copy shader.

output_semantic_to_slot becomes unused, so it's removed.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
Marek Olšák 2021-08-11 03:54:14 -04:00 committed by Marge Bot
parent dba914de85
commit 5824ab569e
7 changed files with 80 additions and 66 deletions

View file

@ -1433,8 +1433,10 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
si_dump_streamout(&sel->so);
}
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
sizeof(shader->info.vs_output_param_offset));
/* Initialize vs_output_ps_input_cntl to default. */
for (unsigned i = 0; i < ARRAY_SIZE(shader->info.vs_output_ps_input_cntl); i++)
shader->info.vs_output_ps_input_cntl[i] = SI_PS_INPUT_CNTL_UNUSED;
shader->info.vs_output_ps_input_cntl[VARYING_SLOT_COL0] = SI_PS_INPUT_CNTL_UNUSED_COLOR0;
shader->info.uses_instanceid = sel->info.uses_instanceid;
@ -1445,6 +1447,43 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
if (!si_llvm_compile_shader(sscreen, compiler, shader, debug, nir, free_nir))
return false;
/* Compute vs_output_ps_input_cntl. */
if ((sel->info.stage == MESA_SHADER_VERTEX ||
sel->info.stage == MESA_SHADER_TESS_EVAL ||
sel->info.stage == MESA_SHADER_GEOMETRY) &&
!shader->key.as_ls && !shader->key.as_es) {
ubyte *vs_output_param_offset = shader->info.vs_output_param_offset;
if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.as_ngg)
vs_output_param_offset = sel->gs_copy_shader->info.vs_output_param_offset;
/* VS and TES should also set primitive ID output if it's used. */
unsigned num_outputs_with_prim_id = sel->info.num_outputs +
shader->key.mono.u.vs_export_prim_id;
for (unsigned i = 0; i < num_outputs_with_prim_id; i++) {
unsigned semantic = sel->info.output_semantic[i];
unsigned offset = vs_output_param_offset[i];
unsigned ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {
/* The input is loaded from parameter memory. */
ps_input_cntl = S_028644_OFFSET(offset);
} else {
/* The input is a DEFAULT_VAL constant. */
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
/* OFFSET=0x20 means that DEFAULT_VAL is used. */
ps_input_cntl = S_028644_OFFSET(0x20) |
S_028644_DEFAULT_VAL(offset);
}
shader->info.vs_output_ps_input_cntl[semantic] = ps_input_cntl;
}
}
/* Validate SGPR and VGPR usage for compute to detect compiler bugs. */
if (sel->info.stage == MESA_SHADER_COMPUTE) {
unsigned wave_size = sscreen->compute_wave_size;
@ -2002,8 +2041,8 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
shader->info.ancillary_vgpr_index = mainp->info.ancillary_vgpr_index;
memcpy(shader->info.vs_output_param_offset, mainp->info.vs_output_param_offset,
sizeof(mainp->info.vs_output_param_offset));
memcpy(shader->info.vs_output_ps_input_cntl, mainp->info.vs_output_ps_input_cntl,
sizeof(mainp->info.vs_output_ps_input_cntl));
shader->info.uses_instanceid = mainp->info.uses_instanceid;
shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
shader->info.nr_param_exports = mainp->info.nr_param_exports;

View file

@ -158,6 +158,12 @@ struct si_context;
#define SI_NGG_PRIM_EDGE_FLAG_BITS ((1 << 9) | (1 << 19) | (1 << 29))
#define SI_PS_INPUT_CNTL_0000 (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(0))
#define SI_PS_INPUT_CNTL_0001 (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(3))
#define SI_PS_INPUT_CNTL_UNUSED SI_PS_INPUT_CNTL_0000
/* D3D9 behaviour for COLOR0 requires 0001. GL is undefined. */
#define SI_PS_INPUT_CNTL_UNUSED_COLOR0 SI_PS_INPUT_CNTL_0001
/* SGPR user data indices */
enum
{
@ -342,7 +348,6 @@ struct si_shader_info {
ubyte num_outputs;
union si_input_info input[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1];
ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS];
ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
@ -707,6 +712,7 @@ struct si_shader_key {
/* GCN-specific shader info. */
struct si_shader_binary_info {
ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS];
ubyte num_input_sgprs;
ubyte num_input_vgprs;
signed char face_vgpr_index;

View file

@ -22,6 +22,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "ac_exp_param.h"
#include "ac_nir_to_llvm.h"
#include "ac_rtld.h"
#include "si_pipe.h"

View file

@ -26,6 +26,7 @@
#include "si_shader_internal.h"
#include "sid.h"
#include "util/u_memory.h"
#include "ac_exp_param.h"
static LLVMValueRef unpack_sint16(struct si_shader_context *ctx, LLVMValueRef i32, unsigned index)
{
@ -452,6 +453,9 @@ static void si_prepare_param_exports(struct si_shader_context *ctx,
struct si_shader *shader = ctx->shader;
unsigned param_count = 0;
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
sizeof(shader->info.vs_output_param_offset));
for (unsigned i = 0; i < noutput; i++) {
unsigned semantic = outputs[i].semantic;

View file

@ -134,13 +134,11 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
} else {
/* Outputs. */
assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
assert(semantic + num_slots < ARRAY_SIZE(info->output_semantic_to_slot));
for (unsigned i = 0; i < num_slots; i++) {
unsigned loc = driver_location + i;
info->output_semantic[loc] = semantic + i;
info->output_semantic_to_slot[semantic + i] = loc;
if (is_output_load) {
/* Output loads have only a few things that we need to track. */
@ -479,8 +477,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
info->writes_position = nir->info.outputs_written & VARYING_BIT_POS;
}
memset(info->output_semantic_to_slot, -1, sizeof(info->output_semantic_to_slot));
func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
nir_foreach_block (block, func->impl) {
nir_foreach_instr (instr, block)
@ -493,7 +489,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
* and si_emit_spi_map uses this unconditionally when such a pixel shader is used.
*/
info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID;
info->output_semantic_to_slot[VARYING_SLOT_PRIMITIVE_ID] = info->num_outputs;
info->output_type[info->num_outputs] = nir_type_uint32;
info->output_usagemask[info->num_outputs] = 0x1;
}

View file

@ -52,7 +52,6 @@ template<int NUM_INTERP>
static void si_emit_spi_map(struct si_context *sctx)
{
struct si_shader *ps = sctx->shader.ps.current;
struct si_shader *vs;
struct si_shader_info *psinfo = ps ? &ps->selector->info : NULL;
unsigned spi_ps_input_cntl[NUM_INTERP];
@ -61,56 +60,24 @@ static void si_emit_spi_map(struct si_context *sctx)
if (!NUM_INTERP)
return;
/* With legacy GS, only the GS copy shader contains information about param exports. */
if (sctx->shader.gs.cso && !sctx->ngg)
vs = sctx->shader.gs.cso->gs_copy_shader;
else
vs = si_get_vs(sctx)->current;
struct si_shader_info *vsinfo = &vs->selector->info;
struct si_shader *vs = si_get_vs(sctx)->current;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
for (unsigned i = 0; i < NUM_INTERP; i++) {
union si_input_info input = psinfo->input[i];
unsigned ps_input_cntl = 0;
unsigned ps_input_cntl = vs->info.vs_output_ps_input_cntl[input.semantic];
bool non_default_val = G_028644_OFFSET(ps_input_cntl) != 0x20;
int vs_slot = vsinfo->output_semantic_to_slot[input.semantic];
if (vs_slot >= 0) {
unsigned offset = vs->info.vs_output_param_offset[vs_slot];
if (offset <= AC_EXP_PARAM_OFFSET_31) {
/* The input is loaded from parameter memory. */
ps_input_cntl |= S_028644_OFFSET(offset);
if (input.interpolate == INTERP_MODE_FLAT ||
(input.interpolate == INTERP_MODE_COLOR && rs->flatshade)) {
ps_input_cntl |= S_028644_FLAT_SHADE(1);
}
} else {
/* The input is a DEFAULT_VAL constant. */
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
/* Overwrite the whole value. OFFSET=0x20 means that DEFAULT_VAL is used. */
ps_input_cntl = S_028644_OFFSET(0x20) |
S_028644_DEFAULT_VAL(offset);
}
if (non_default_val) {
if (input.interpolate == INTERP_MODE_FLAT ||
(input.interpolate == INTERP_MODE_COLOR && rs->flatshade))
ps_input_cntl |= S_028644_FLAT_SHADE(1);
if (input.fp16_lo_hi_valid) {
assert(offset <= AC_EXP_PARAM_OFFSET_31 || offset == AC_EXP_PARAM_DEFAULT_VAL_0000);
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
S_028644_USE_DEFAULT_ATTR1(offset == AC_EXP_PARAM_DEFAULT_VAL_0000) |
S_028644_DEFAULT_VAL_ATTR1(0) |
S_028644_ATTR0_VALID(1) | /* this must be set if FP16_INTERP_MODE is set */
S_028644_ATTR1_VALID(!!(input.fp16_lo_hi_valid & 0x2));
}
} else {
/* No corresponding output found, load defaults into input. */
ps_input_cntl = S_028644_OFFSET(0x20) |
/* D3D 9 behaviour for COLOR0. GL is undefined */
S_028644_DEFAULT_VAL(input.semantic == VARYING_SLOT_COL1 ? 3 : 0);
}
if (input.semantic == VARYING_SLOT_PNTC ||

View file

@ -2632,6 +2632,19 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
if (!compiler->passes)
si_init_compiler(sscreen, compiler);
/* The GS copy shader is always pre-compiled. */
if (sel->info.stage == MESA_SHADER_GEOMETRY &&
(!sscreen->use_ngg || !sscreen->use_ngg_streamout || /* also for PRIMITIVES_GENERATED */
sel->tess_turns_off_ngg)) {
sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
if (!sel->gs_copy_shader) {
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
return;
}
si_shader_vs(sscreen, sel->gs_copy_shader, sel);
}
/* Serialize NIR to save memory. Monolithic shader variants
* have to deserialize NIR before compilation.
*/
@ -2716,14 +2729,16 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
unsigned i;
for (i = 0; i < sel->info.num_outputs; i++) {
unsigned offset = shader->info.vs_output_param_offset[i];
unsigned semantic = sel->info.output_semantic[i];
unsigned ps_input_cntl = shader->info.vs_output_ps_input_cntl[semantic];
if (offset <= AC_EXP_PARAM_OFFSET_31)
/* OFFSET=0x20 means DEFAULT_VAL, which means VS doesn't export it. */
if (G_028644_OFFSET(ps_input_cntl) != 0x20)
continue;
unsigned semantic = sel->info.output_semantic[i];
unsigned id;
/* Remove the output from the mask. */
if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
semantic != VARYING_SLOT_POS &&
semantic != VARYING_SLOT_PSIZ &&
@ -2736,19 +2751,6 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
}
}
/* The GS copy shader is always pre-compiled. */
if (sel->info.stage == MESA_SHADER_GEOMETRY &&
(!sscreen->use_ngg || !sscreen->use_ngg_streamout || /* also for PRIMITIVES_GENERATED */
sel->tess_turns_off_ngg)) {
sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
if (!sel->gs_copy_shader) {
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
return;
}
si_shader_vs(sscreen, sel->gs_copy_shader, sel);
}
/* Free NIR. We only keep serialized NIR after this point. */
if (sel->nir) {
ralloc_free(sel->nir);