diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b3d84eec8fe..b381d5d09a3 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1433,8 +1433,10 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi si_dump_streamout(&sel->so); } - memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000, - sizeof(shader->info.vs_output_param_offset)); + /* Initialize vs_output_ps_input_cntl to default. */ + for (unsigned i = 0; i < ARRAY_SIZE(shader->info.vs_output_ps_input_cntl); i++) + shader->info.vs_output_ps_input_cntl[i] = SI_PS_INPUT_CNTL_UNUSED; + shader->info.vs_output_ps_input_cntl[VARYING_SLOT_COL0] = SI_PS_INPUT_CNTL_UNUSED_COLOR0; shader->info.uses_instanceid = sel->info.uses_instanceid; @@ -1445,6 +1447,43 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi if (!si_llvm_compile_shader(sscreen, compiler, shader, debug, nir, free_nir)) return false; + /* Compute vs_output_ps_input_cntl. */ + if ((sel->info.stage == MESA_SHADER_VERTEX || + sel->info.stage == MESA_SHADER_TESS_EVAL || + sel->info.stage == MESA_SHADER_GEOMETRY) && + !shader->key.as_ls && !shader->key.as_es) { + ubyte *vs_output_param_offset = shader->info.vs_output_param_offset; + + if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.as_ngg) + vs_output_param_offset = sel->gs_copy_shader->info.vs_output_param_offset; + + /* VS and TES should also set primitive ID output if it's used. */ + unsigned num_outputs_with_prim_id = sel->info.num_outputs + + shader->key.mono.u.vs_export_prim_id; + + for (unsigned i = 0; i < num_outputs_with_prim_id; i++) { + unsigned semantic = sel->info.output_semantic[i]; + unsigned offset = vs_output_param_offset[i]; + unsigned ps_input_cntl; + + if (offset <= AC_EXP_PARAM_OFFSET_31) { + /* The input is loaded from parameter memory. */ + ps_input_cntl = S_028644_OFFSET(offset); + } else { + /* The input is a DEFAULT_VAL constant. */ + assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && + offset <= AC_EXP_PARAM_DEFAULT_VAL_1111); + offset -= AC_EXP_PARAM_DEFAULT_VAL_0000; + + /* OFFSET=0x20 means that DEFAULT_VAL is used. */ + ps_input_cntl = S_028644_OFFSET(0x20) | + S_028644_DEFAULT_VAL(offset); + } + + shader->info.vs_output_ps_input_cntl[semantic] = ps_input_cntl; + } + } + /* Validate SGPR and VGPR usage for compute to detect compiler bugs. */ if (sel->info.stage == MESA_SHADER_COMPUTE) { unsigned wave_size = sscreen->compute_wave_size; @@ -2002,8 +2041,8 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler shader->info.num_input_vgprs = mainp->info.num_input_vgprs; shader->info.face_vgpr_index = mainp->info.face_vgpr_index; shader->info.ancillary_vgpr_index = mainp->info.ancillary_vgpr_index; - memcpy(shader->info.vs_output_param_offset, mainp->info.vs_output_param_offset, - sizeof(mainp->info.vs_output_param_offset)); + memcpy(shader->info.vs_output_ps_input_cntl, mainp->info.vs_output_ps_input_cntl, + sizeof(mainp->info.vs_output_ps_input_cntl)); shader->info.uses_instanceid = mainp->info.uses_instanceid; shader->info.nr_pos_exports = mainp->info.nr_pos_exports; shader->info.nr_param_exports = mainp->info.nr_param_exports; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 44946891a19..de0dc232e12 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -158,6 +158,12 @@ struct si_context; #define SI_NGG_PRIM_EDGE_FLAG_BITS ((1 << 9) | (1 << 19) | (1 << 29)) +#define SI_PS_INPUT_CNTL_0000 (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(0)) +#define SI_PS_INPUT_CNTL_0001 (S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(3)) +#define SI_PS_INPUT_CNTL_UNUSED SI_PS_INPUT_CNTL_0000 +/* D3D9 behaviour for COLOR0 requires 0001. GL is undefined. */ +#define SI_PS_INPUT_CNTL_UNUSED_COLOR0 SI_PS_INPUT_CNTL_0001 + /* SGPR user data indices */ enum { @@ -342,7 +348,6 @@ struct si_shader_info { ubyte num_outputs; union si_input_info input[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS]; - char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1]; ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS]; ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS]; ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS]; @@ -707,6 +712,7 @@ struct si_shader_key { /* GCN-specific shader info. */ struct si_shader_binary_info { ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; + uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS]; ubyte num_input_sgprs; ubyte num_input_vgprs; signed char face_vgpr_index; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index c975581fe4f..083d73fca75 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "ac_exp_param.h" #include "ac_nir_to_llvm.h" #include "ac_rtld.h" #include "si_pipe.h" diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index cf57a6e77e8..d35c296c219 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -26,6 +26,7 @@ #include "si_shader_internal.h" #include "sid.h" #include "util/u_memory.h" +#include "ac_exp_param.h" static LLVMValueRef unpack_sint16(struct si_shader_context *ctx, LLVMValueRef i32, unsigned index) { @@ -452,6 +453,9 @@ static void si_prepare_param_exports(struct si_shader_context *ctx, struct si_shader *shader = ctx->shader; unsigned param_count = 0; + memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000, + sizeof(shader->info.vs_output_param_offset)); + for (unsigned i = 0; i < noutput; i++) { unsigned semantic = outputs[i].semantic; diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index c3ac1dbef04..ed07fa7e0a7 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -134,13 +134,11 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr } else { /* Outputs. */ assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask)); - assert(semantic + num_slots < ARRAY_SIZE(info->output_semantic_to_slot)); for (unsigned i = 0; i < num_slots; i++) { unsigned loc = driver_location + i; info->output_semantic[loc] = semantic + i; - info->output_semantic_to_slot[semantic + i] = loc; if (is_output_load) { /* Output loads have only a few things that we need to track. */ @@ -479,8 +477,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf info->writes_position = nir->info.outputs_written & VARYING_BIT_POS; } - memset(info->output_semantic_to_slot, -1, sizeof(info->output_semantic_to_slot)); - func = (struct nir_function *)exec_list_get_head_const(&nir->functions); nir_foreach_block (block, func->impl) { nir_foreach_instr (instr, block) @@ -493,7 +489,6 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf * and si_emit_spi_map uses this unconditionally when such a pixel shader is used. */ info->output_semantic[info->num_outputs] = VARYING_SLOT_PRIMITIVE_ID; - info->output_semantic_to_slot[VARYING_SLOT_PRIMITIVE_ID] = info->num_outputs; info->output_type[info->num_outputs] = nir_type_uint32; info->output_usagemask[info->num_outputs] = 0x1; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 6f5f13ca06b..b6ce4fd7174 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -52,7 +52,6 @@ template static void si_emit_spi_map(struct si_context *sctx) { struct si_shader *ps = sctx->shader.ps.current; - struct si_shader *vs; struct si_shader_info *psinfo = ps ? &ps->selector->info : NULL; unsigned spi_ps_input_cntl[NUM_INTERP]; @@ -61,56 +60,24 @@ static void si_emit_spi_map(struct si_context *sctx) if (!NUM_INTERP) return; - /* With legacy GS, only the GS copy shader contains information about param exports. */ - if (sctx->shader.gs.cso && !sctx->ngg) - vs = sctx->shader.gs.cso->gs_copy_shader; - else - vs = si_get_vs(sctx)->current; - - struct si_shader_info *vsinfo = &vs->selector->info; + struct si_shader *vs = si_get_vs(sctx)->current; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; for (unsigned i = 0; i < NUM_INTERP; i++) { union si_input_info input = psinfo->input[i]; - unsigned ps_input_cntl = 0; + unsigned ps_input_cntl = vs->info.vs_output_ps_input_cntl[input.semantic]; + bool non_default_val = G_028644_OFFSET(ps_input_cntl) != 0x20; - int vs_slot = vsinfo->output_semantic_to_slot[input.semantic]; - if (vs_slot >= 0) { - unsigned offset = vs->info.vs_output_param_offset[vs_slot]; - - if (offset <= AC_EXP_PARAM_OFFSET_31) { - /* The input is loaded from parameter memory. */ - ps_input_cntl |= S_028644_OFFSET(offset); - - if (input.interpolate == INTERP_MODE_FLAT || - (input.interpolate == INTERP_MODE_COLOR && rs->flatshade)) { - ps_input_cntl |= S_028644_FLAT_SHADE(1); - } - } else { - /* The input is a DEFAULT_VAL constant. */ - assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && - offset <= AC_EXP_PARAM_DEFAULT_VAL_1111); - offset -= AC_EXP_PARAM_DEFAULT_VAL_0000; - - /* Overwrite the whole value. OFFSET=0x20 means that DEFAULT_VAL is used. */ - ps_input_cntl = S_028644_OFFSET(0x20) | - S_028644_DEFAULT_VAL(offset); - } + if (non_default_val) { + if (input.interpolate == INTERP_MODE_FLAT || + (input.interpolate == INTERP_MODE_COLOR && rs->flatshade)) + ps_input_cntl |= S_028644_FLAT_SHADE(1); if (input.fp16_lo_hi_valid) { - assert(offset <= AC_EXP_PARAM_OFFSET_31 || offset == AC_EXP_PARAM_DEFAULT_VAL_0000); - ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | - S_028644_USE_DEFAULT_ATTR1(offset == AC_EXP_PARAM_DEFAULT_VAL_0000) | - S_028644_DEFAULT_VAL_ATTR1(0) | S_028644_ATTR0_VALID(1) | /* this must be set if FP16_INTERP_MODE is set */ S_028644_ATTR1_VALID(!!(input.fp16_lo_hi_valid & 0x2)); } - } else { - /* No corresponding output found, load defaults into input. */ - ps_input_cntl = S_028644_OFFSET(0x20) | - /* D3D 9 behaviour for COLOR0. GL is undefined */ - S_028644_DEFAULT_VAL(input.semantic == VARYING_SLOT_COL1 ? 3 : 0); } if (input.semantic == VARYING_SLOT_PNTC || diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index fac6b488825..02fc51b5b59 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2632,6 +2632,19 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind if (!compiler->passes) si_init_compiler(sscreen, compiler); + /* The GS copy shader is always pre-compiled. */ + if (sel->info.stage == MESA_SHADER_GEOMETRY && + (!sscreen->use_ngg || !sscreen->use_ngg_streamout || /* also for PRIMITIVES_GENERATED */ + sel->tess_turns_off_ngg)) { + sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug); + if (!sel->gs_copy_shader) { + fprintf(stderr, "radeonsi: can't create GS copy shader\n"); + return; + } + + si_shader_vs(sscreen, sel->gs_copy_shader, sel); + } + /* Serialize NIR to save memory. Monolithic shader variants * have to deserialize NIR before compilation. */ @@ -2716,14 +2729,16 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind unsigned i; for (i = 0; i < sel->info.num_outputs; i++) { - unsigned offset = shader->info.vs_output_param_offset[i]; + unsigned semantic = sel->info.output_semantic[i]; + unsigned ps_input_cntl = shader->info.vs_output_ps_input_cntl[semantic]; - if (offset <= AC_EXP_PARAM_OFFSET_31) + /* OFFSET=0x20 means DEFAULT_VAL, which means VS doesn't export it. */ + if (G_028644_OFFSET(ps_input_cntl) != 0x20) continue; - unsigned semantic = sel->info.output_semantic[i]; unsigned id; + /* Remove the output from the mask. */ if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) && semantic != VARYING_SLOT_POS && semantic != VARYING_SLOT_PSIZ && @@ -2736,19 +2751,6 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind } } - /* The GS copy shader is always pre-compiled. */ - if (sel->info.stage == MESA_SHADER_GEOMETRY && - (!sscreen->use_ngg || !sscreen->use_ngg_streamout || /* also for PRIMITIVES_GENERATED */ - sel->tess_turns_off_ngg)) { - sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug); - if (!sel->gs_copy_shader) { - fprintf(stderr, "radeonsi: can't create GS copy shader\n"); - return; - } - - si_shader_vs(sscreen, sel->gs_copy_shader, sel); - } - /* Free NIR. We only keep serialized NIR after this point. */ if (sel->nir) { ralloc_free(sel->nir);