mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 19:40:10 +01:00
radeonsi: precompute more spi_map code
This replaces vs_output_param_offset by vs_output_ps_input_cntl, which is easier to use. For geometry shaders, vs_output_ps_input_cntl is stored in the GS si_shader structure, not gs_copy_shader. This requires that gs_copy_shader compilation is finished before the GS main shader part, so that GS can initialize vs_output_ps_input_cntl using the compiled GS copy shader. output_semantic_to_slot becomes unused, so it's removed. Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
parent
dba914de85
commit
5824ab569e
7 changed files with 80 additions and 66 deletions
|
|
@ -1433,8 +1433,10 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
si_dump_streamout(&sel->so);
|
||||
}
|
||||
|
||||
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
|
||||
sizeof(shader->info.vs_output_param_offset));
|
||||
/* Initialize vs_output_ps_input_cntl to default. */
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(shader->info.vs_output_ps_input_cntl); i++)
|
||||
shader->info.vs_output_ps_input_cntl[i] = SI_PS_INPUT_CNTL_UNUSED;
|
||||
shader->info.vs_output_ps_input_cntl[VARYING_SLOT_COL0] = SI_PS_INPUT_CNTL_UNUSED_COLOR0;
|
||||
|
||||
shader->info.uses_instanceid = sel->info.uses_instanceid;
|
||||
|
||||
|
|
@ -1445,6 +1447,43 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
if (!si_llvm_compile_shader(sscreen, compiler, shader, debug, nir, free_nir))
|
||||
return false;
|
||||
|
||||
/* Compute vs_output_ps_input_cntl. */
|
||||
if ((sel->info.stage == MESA_SHADER_VERTEX ||
|
||||
sel->info.stage == MESA_SHADER_TESS_EVAL ||
|
||||
sel->info.stage == MESA_SHADER_GEOMETRY) &&
|
||||
!shader->key.as_ls && !shader->key.as_es) {
|
||||
ubyte *vs_output_param_offset = shader->info.vs_output_param_offset;
|
||||
|
||||
if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.as_ngg)
|
||||
vs_output_param_offset = sel->gs_copy_shader->info.vs_output_param_offset;
|
||||
|
||||
/* VS and TES should also set primitive ID output if it's used. */
|
||||
unsigned num_outputs_with_prim_id = sel->info.num_outputs +
|
||||
shader->key.mono.u.vs_export_prim_id;
|
||||
|
||||
for (unsigned i = 0; i < num_outputs_with_prim_id; i++) {
|
||||
unsigned semantic = sel->info.output_semantic[i];
|
||||
unsigned offset = vs_output_param_offset[i];
|
||||
unsigned ps_input_cntl;
|
||||
|
||||
if (offset <= AC_EXP_PARAM_OFFSET_31) {
|
||||
/* The input is loaded from parameter memory. */
|
||||
ps_input_cntl = S_028644_OFFSET(offset);
|
||||
} else {
|
||||
/* The input is a DEFAULT_VAL constant. */
|
||||
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
|
||||
offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
|
||||
offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
|
||||
|
||||
/* OFFSET=0x20 means that DEFAULT_VAL is used. */
|
||||
ps_input_cntl = S_028644_OFFSET(0x20) |
|
||||
S_028644_DEFAULT_VAL(offset);
|
||||
}
|
||||
|
||||
shader->info.vs_output_ps_input_cntl[semantic] = ps_input_cntl;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate SGPR and VGPR usage for compute to detect compiler bugs. */
|
||||
if (sel->info.stage == MESA_SHADER_COMPUTE) {
|
||||
unsigned wave_size = sscreen->compute_wave_size;
|
||||
|
|
@ -2002,8 +2041,8 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
|||
shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
|
||||
shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
|
||||
shader->info.ancillary_vgpr_index = mainp->info.ancillary_vgpr_index;
|
||||
memcpy(shader->info.vs_output_param_offset, mainp->info.vs_output_param_offset,
|
||||
sizeof(mainp->info.vs_output_param_offset));
|
||||
memcpy(shader->info.vs_output_ps_input_cntl, mainp->info.vs_output_ps_input_cntl,
|
||||
sizeof(mainp->info.vs_output_ps_input_cntl));
|
||||
shader->info.uses_instanceid = mainp->info.uses_instanceid;
|
||||
shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
|
||||
shader->info.nr_param_exports = mainp->info.nr_param_exports;
|
||||
|
|
|
|||
|
|
@ -158,6 +158,12 @@ struct si_context;
|
|||
|
||||
#define SI_NGG_PRIM_EDGE_FLAG_BITS ((1 << 9) | (1 << 19) | (1 << 29))
|
||||
|
||||
#define SI_PS_INPUT_CNTL_0000 (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(0))
|
||||
#define SI_PS_INPUT_CNTL_0001 (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(3))
|
||||
#define SI_PS_INPUT_CNTL_UNUSED SI_PS_INPUT_CNTL_0000
|
||||
/* D3D9 behaviour for COLOR0 requires 0001. GL is undefined. */
|
||||
#define SI_PS_INPUT_CNTL_UNUSED_COLOR0 SI_PS_INPUT_CNTL_0001
|
||||
|
||||
/* SGPR user data indices */
|
||||
enum
|
||||
{
|
||||
|
|
@ -342,7 +348,6 @@ struct si_shader_info {
|
|||
ubyte num_outputs;
|
||||
union si_input_info input[PIPE_MAX_SHADER_INPUTS];
|
||||
ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
|
||||
char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1];
|
||||
ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
|
||||
ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS];
|
||||
ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
|
||||
|
|
@ -707,6 +712,7 @@ struct si_shader_key {
|
|||
/* GCN-specific shader info. */
|
||||
struct si_shader_binary_info {
|
||||
ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
|
||||
uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS];
|
||||
ubyte num_input_sgprs;
|
||||
ubyte num_input_vgprs;
|
||||
signed char face_vgpr_index;
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ac_exp_param.h"
|
||||
#include "ac_nir_to_llvm.h"
|
||||
#include "ac_rtld.h"
|
||||
#include "si_pipe.h"
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
#include "si_shader_internal.h"
|
||||
#include "sid.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "ac_exp_param.h"
|
||||
|
||||
static LLVMValueRef unpack_sint16(struct si_shader_context *ctx, LLVMValueRef i32, unsigned index)
|
||||
{
|
||||
|
|
@ -452,6 +453,9 @@ static void si_prepare_param_exports(struct si_shader_context *ctx,
|
|||
struct si_shader *shader = ctx->shader;
|
||||
unsigned param_count = 0;
|
||||
|
||||
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
|
||||
sizeof(shader->info.vs_output_param_offset));
|
||||
|
||||
for (unsigned i = 0; i < noutput; i++) {
|
||||
unsigned semantic = outputs[i].semantic;
|
||||
|
||||
|
|
|
|||
|
|
@ -134,13 +134,11 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
|||
} else {
|
||||
/* Outputs. */
|
||||
assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
|
||||
assert(semantic + num_slots < ARRAY_SIZE(info->output_semantic_to_slot));
|
||||
|
||||
for (unsigned i = 0; i < num_slots; i++) {
|
||||
unsigned loc = driver_location + i;
|
||||
|
||||
info->output_semantic[loc] = semantic + i;
|
||||
info->output_semantic_to_slot[semantic + i] = loc;
|
||||
|
||||
if (is_output_load) {
|
||||
/* Output loads have only a few things that we need to track. */
|
||||
|
|
@ -479,8 +477,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
|
|||
info->writes_position = nir->info.outputs_written & VARYING_BIT_POS;
|
||||
}
|
||||
|
||||
memset(info->output_semantic_to_slot, -1, sizeof(info->output_semantic_to_slot));
|
||||
|
||||
func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
|
||||
nir_foreach_block (block, func->impl) {
|
||||
nir_foreach_instr (instr, block)
|
||||
|
|
@ -493,7 +489,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
|
|||
* and si_emit_spi_map uses this unconditionally when such a pixel shader is used.
|
||||
*/
|
||||
info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID;
|
||||
info->output_semantic_to_slot[VARYING_SLOT_PRIMITIVE_ID] = info->num_outputs;
|
||||
info->output_type[info->num_outputs] = nir_type_uint32;
|
||||
info->output_usagemask[info->num_outputs] = 0x1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,7 +52,6 @@ template<int NUM_INTERP>
|
|||
static void si_emit_spi_map(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader *ps = sctx->shader.ps.current;
|
||||
struct si_shader *vs;
|
||||
struct si_shader_info *psinfo = ps ? &ps->selector->info : NULL;
|
||||
unsigned spi_ps_input_cntl[NUM_INTERP];
|
||||
|
||||
|
|
@ -61,56 +60,24 @@ static void si_emit_spi_map(struct si_context *sctx)
|
|||
if (!NUM_INTERP)
|
||||
return;
|
||||
|
||||
/* With legacy GS, only the GS copy shader contains information about param exports. */
|
||||
if (sctx->shader.gs.cso && !sctx->ngg)
|
||||
vs = sctx->shader.gs.cso->gs_copy_shader;
|
||||
else
|
||||
vs = si_get_vs(sctx)->current;
|
||||
|
||||
struct si_shader_info *vsinfo = &vs->selector->info;
|
||||
struct si_shader *vs = si_get_vs(sctx)->current;
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
|
||||
for (unsigned i = 0; i < NUM_INTERP; i++) {
|
||||
union si_input_info input = psinfo->input[i];
|
||||
unsigned ps_input_cntl = 0;
|
||||
unsigned ps_input_cntl = vs->info.vs_output_ps_input_cntl[input.semantic];
|
||||
bool non_default_val = G_028644_OFFSET(ps_input_cntl) != 0x20;
|
||||
|
||||
int vs_slot = vsinfo->output_semantic_to_slot[input.semantic];
|
||||
if (vs_slot >= 0) {
|
||||
unsigned offset = vs->info.vs_output_param_offset[vs_slot];
|
||||
|
||||
if (offset <= AC_EXP_PARAM_OFFSET_31) {
|
||||
/* The input is loaded from parameter memory. */
|
||||
ps_input_cntl |= S_028644_OFFSET(offset);
|
||||
|
||||
if (input.interpolate == INTERP_MODE_FLAT ||
|
||||
(input.interpolate == INTERP_MODE_COLOR && rs->flatshade)) {
|
||||
ps_input_cntl |= S_028644_FLAT_SHADE(1);
|
||||
}
|
||||
} else {
|
||||
/* The input is a DEFAULT_VAL constant. */
|
||||
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
|
||||
offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
|
||||
offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
|
||||
|
||||
/* Overwrite the whole value. OFFSET=0x20 means that DEFAULT_VAL is used. */
|
||||
ps_input_cntl = S_028644_OFFSET(0x20) |
|
||||
S_028644_DEFAULT_VAL(offset);
|
||||
}
|
||||
if (non_default_val) {
|
||||
if (input.interpolate == INTERP_MODE_FLAT ||
|
||||
(input.interpolate == INTERP_MODE_COLOR && rs->flatshade))
|
||||
ps_input_cntl |= S_028644_FLAT_SHADE(1);
|
||||
|
||||
if (input.fp16_lo_hi_valid) {
|
||||
assert(offset <= AC_EXP_PARAM_OFFSET_31 || offset == AC_EXP_PARAM_DEFAULT_VAL_0000);
|
||||
|
||||
ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
|
||||
S_028644_USE_DEFAULT_ATTR1(offset == AC_EXP_PARAM_DEFAULT_VAL_0000) |
|
||||
S_028644_DEFAULT_VAL_ATTR1(0) |
|
||||
S_028644_ATTR0_VALID(1) | /* this must be set if FP16_INTERP_MODE is set */
|
||||
S_028644_ATTR1_VALID(!!(input.fp16_lo_hi_valid & 0x2));
|
||||
}
|
||||
} else {
|
||||
/* No corresponding output found, load defaults into input. */
|
||||
ps_input_cntl = S_028644_OFFSET(0x20) |
|
||||
/* D3D 9 behaviour for COLOR0. GL is undefined */
|
||||
S_028644_DEFAULT_VAL(input.semantic == VARYING_SLOT_COL1 ? 3 : 0);
|
||||
}
|
||||
|
||||
if (input.semantic == VARYING_SLOT_PNTC ||
|
||||
|
|
|
|||
|
|
@ -2632,6 +2632,19 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
|
|||
if (!compiler->passes)
|
||||
si_init_compiler(sscreen, compiler);
|
||||
|
||||
/* The GS copy shader is always pre-compiled. */
|
||||
if (sel->info.stage == MESA_SHADER_GEOMETRY &&
|
||||
(!sscreen->use_ngg || !sscreen->use_ngg_streamout || /* also for PRIMITIVES_GENERATED */
|
||||
sel->tess_turns_off_ngg)) {
|
||||
sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
|
||||
if (!sel->gs_copy_shader) {
|
||||
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
|
||||
return;
|
||||
}
|
||||
|
||||
si_shader_vs(sscreen, sel->gs_copy_shader, sel);
|
||||
}
|
||||
|
||||
/* Serialize NIR to save memory. Monolithic shader variants
|
||||
* have to deserialize NIR before compilation.
|
||||
*/
|
||||
|
|
@ -2716,14 +2729,16 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
|
|||
unsigned i;
|
||||
|
||||
for (i = 0; i < sel->info.num_outputs; i++) {
|
||||
unsigned offset = shader->info.vs_output_param_offset[i];
|
||||
unsigned semantic = sel->info.output_semantic[i];
|
||||
unsigned ps_input_cntl = shader->info.vs_output_ps_input_cntl[semantic];
|
||||
|
||||
if (offset <= AC_EXP_PARAM_OFFSET_31)
|
||||
/* OFFSET=0x20 means DEFAULT_VAL, which means VS doesn't export it. */
|
||||
if (G_028644_OFFSET(ps_input_cntl) != 0x20)
|
||||
continue;
|
||||
|
||||
unsigned semantic = sel->info.output_semantic[i];
|
||||
unsigned id;
|
||||
|
||||
/* Remove the output from the mask. */
|
||||
if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
|
||||
semantic != VARYING_SLOT_POS &&
|
||||
semantic != VARYING_SLOT_PSIZ &&
|
||||
|
|
@ -2736,19 +2751,6 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
|
|||
}
|
||||
}
|
||||
|
||||
/* The GS copy shader is always pre-compiled. */
|
||||
if (sel->info.stage == MESA_SHADER_GEOMETRY &&
|
||||
(!sscreen->use_ngg || !sscreen->use_ngg_streamout || /* also for PRIMITIVES_GENERATED */
|
||||
sel->tess_turns_off_ngg)) {
|
||||
sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
|
||||
if (!sel->gs_copy_shader) {
|
||||
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
|
||||
return;
|
||||
}
|
||||
|
||||
si_shader_vs(sscreen, sel->gs_copy_shader, sel);
|
||||
}
|
||||
|
||||
/* Free NIR. We only keep serialized NIR after this point. */
|
||||
if (sel->nir) {
|
||||
ralloc_free(sel->nir);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue