radeonsi/gfx10: implement si_shader_vs

Only used with tessellation + GS instancing.

Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
Marek Olšák 2019-06-24 17:39:39 -04:00
parent 6cf2fb1fc4
commit 214ddfb688
3 changed files with 30 additions and 20 deletions

View file

@ -3656,7 +3656,8 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
}
}
if (ctx->shader->selector->so.num_outputs)
if (ctx->ac.chip_class <= GFX9 &&
ctx->shader->selector->so.num_outputs)
si_llvm_emit_streamout(ctx, outputs, i, 0);
/* Export PrimitiveID. */
@ -4448,7 +4449,8 @@ static void declare_streamout_params(struct si_shader_context *ctx,
struct pipe_stream_output_info *so,
struct si_function_info *fninfo)
{
int i;
if (ctx->ac.chip_class >= GFX10)
return;
/* Streamout SGPRs. */
if (so->num_outputs) {
@ -4460,7 +4462,7 @@ static void declare_streamout_params(struct si_shader_context *ctx,
ctx->param_streamout_write_index = add_arg(fninfo, ARG_SGPR, ctx->ac.i32);
}
/* A streamout buffer offset is loaded if the stride is non-zero. */
for (i = 0; i < 4; i++) {
for (int i = 0; i < 4; i++) {
if (!so->stride[i])
continue;
@ -5789,7 +5791,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
}
/* Streamout and exports. */
if (gs_selector->so.num_outputs) {
if (ctx.ac.chip_class <= GFX9 && gs_selector->so.num_outputs) {
si_llvm_emit_streamout(&ctx, outputs,
gsinfo->num_outputs,
stream);

View file

@ -5565,6 +5565,7 @@ static void si_init_config(struct si_context *sctx)
*/
si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL,
S_028C50_MAX_DEALLOCS_IN_WAVE(512));
si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
sscreen->info.pa_sc_tile_steering_override);

View file

@ -418,7 +418,8 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen,
{
unsigned type = sel->type;
if (sscreen->info.family < CHIP_POLARIS10)
if (sscreen->info.family < CHIP_POLARIS10 ||
sscreen->info.chip_class >= GFX10)
return;
/* VS as VS, or VS as ES: */
@ -1371,21 +1372,27 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(va >> 40));
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B128_DX10_CLAMP(1) |
S_00B128_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
S_00B12C_USER_SGPR(num_user_sgprs) |
S_00B12C_OC_LDS_EN(oc_lds_en) |
S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
S_00B12C_SO_EN(!!shader->selector->so.num_outputs) |
S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
uint32_t rsrc1 = S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
S_00B128_DX10_CLAMP(1) |
S_00B128_MEM_ORDERED(sscreen->info.chip_class >= GFX10) |
S_00B128_FLOAT_MODE(shader->config.float_mode);
uint32_t rsrc2 = S_00B12C_USER_SGPR(num_user_sgprs) |
S_00B12C_OC_LDS_EN(oc_lds_en) |
S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
if (sscreen->info.chip_class <= GFX9) {
rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
S_00B12C_SO_EN(!!shader->selector->so.num_outputs);
}
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, rsrc2);
if (window_space)
shader->ctx_reg.vs.pa_cl_vte_cntl =