diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h index 306ec92950d..b550be72ba8 100644 --- a/src/gallium/drivers/radeonsi/si_debug_options.h +++ b/src/gallium/drivers/radeonsi/si_debug_options.h @@ -12,5 +12,6 @@ OPT_BOOL(no_infinite_interp, false, "Kill PS with infinite interp coeff") OPT_BOOL(clamp_div_by_zero, false, "Clamp div by zero (x / 0 becomes FLT_MAX instead of NaN)") OPT_BOOL(no_trunc_coord, false, "Always set TRUNC_COORD=0") OPT_BOOL(shader_culling, false, "Cull primitives in shaders when benefical (without tess and GS)") +OPT_BOOL(vrs2x2, false, "Enable 2x2 coarse shading for non-GUI elements") #undef OPT_BOOL diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index 75986169d9a..4747ffd5a35 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -347,6 +347,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003; ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000; + ctx->tracked_regs.reg_value[SI_TRACKED_DB_VRS_OVERRIDE_CNTL] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000; ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 8ffe3f96ede..9255002c33a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1080,6 +1080,9 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, #include "si_debug_options.h" } + if (sscreen->info.chip_class < GFX10_3) + sscreen->options.vrs2x2 = false; + si_disk_cache_create(sscreen); /* Determine the number of shader compiler threads. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index bafe964c84c..2d766532cbe 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -605,12 +605,13 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize; bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg; + bool writes_vrs = ctx->screen->options.vrs2x2; /* Write the misc vector (point size, edgeflag, layer, viewport). */ - if (writes_psize || pos_writes_edgeflag || + if (writes_psize || pos_writes_edgeflag || writes_vrs || shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) { pos_args[1].enabled_channels = writes_psize | - (pos_writes_edgeflag << 1) | + ((pos_writes_edgeflag | writes_vrs) << 1) | (shader->selector->info.writes_layer << 2); pos_args[1].valid_mask = 0; /* EXEC mask */ @@ -635,6 +636,32 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx, pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value); } + if (writes_vrs) { + /* Bits [2:3] = VRS rate X + * Bits [4:5] = VRS rate Y + * + * The range is [-2, 1]. Values: + * 1: 2x coarser shading rate in that direction. + * 0: normal shading rate + * -1: 2x finer shading rate (sample shading, not directional) + * -2: 4x finer shading rate (sample shading, not directional) + * + * Sample shading can't go above 8 samples, so both numbers can't be -2 + * at the same time. + */ + LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, (1 << 2) | (1 << 4), 0); + + /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */ + rates = LLVMBuildSelect(ctx->ac.builder, + LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE, + pos_args[0].out[3], ctx->ac.f32_1, ""), + rates, ctx->ac.i32_0, ""); + + LLVMValueRef v = ac_to_integer(&ctx->ac, pos_args[1].out[1]); + v = LLVMBuildOr(ctx->ac.builder, v, rates, ""); + pos_args[1].out[1] = ac_to_float(&ctx->ac, v); + } + if (ctx->screen->info.chip_class >= GFX9) { /* GFX9 has the layer in out.z[10:0] and the viewport * index in out.z[19:16]. diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b7b2bb2c209..3bd84e84719 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -751,7 +751,8 @@ static void si_emit_clip_regs(struct si_context *sctx) unsigned initial_cdw = sctx->gfx_cs->current.cdw; unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) | S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) | - S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3) | + S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 && + !sctx->screen->options.vrs2x2) | S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) | clipdist_mask | (culldist_mask << 8); @@ -1407,6 +1408,21 @@ static void si_emit_db_render_state(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, db_shader_control); + if (sctx->screen->options.vrs2x2) { + /* If the shader is using discard, turn off coarse shading because + * discard at 2x2 pixel granularity degrades quality too much. + * + * MIN allows sample shading but not coarse shading. + */ + unsigned mode = G_02880C_KILL_ENABLE(db_shader_control) ? V_028064_VRS_COMB_MODE_MIN + : V_028064_VRS_COMB_MODE_PASSTHRU; + radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL, + SI_TRACKED_DB_VRS_OVERRIDE_CNTL, + S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | + S_028064_VRS_OVERRIDE_RATE_X(0) | + S_028064_VRS_OVERRIDE_RATE_Y(0)); + } + if (initial_cdw != sctx->gfx_cs->current.cdw) sctx->context_roll = true; } @@ -5366,9 +5382,18 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) if (sctx->chip_class >= GFX10_3) { si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); - /* This allows sample shading. */ + /* The rate combiners have no effect if they are disabled like this: + * VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 1 + * PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1 + * HTILE_RATE: VRS_HTILE_ENCODING = 0 + * SAMPLE_ITER: PS_ITER_SAMPLE = 0 + * + * Use OVERRIDE, which will ignore results from previous combiners. + * (e.g. enabled sample shading overrides the vertex rate) + */ si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL, - S_028848_SAMPLE_ITER_COMBINER_MODE(1)); + S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) | + S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE)); } sctx->cs_preamble_state = pm4; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 9ac4d51889c..1d94d8c8c76 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -250,7 +250,8 @@ struct si_shader_data { #define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK \ (S_02881C_USE_VTX_POINT_SIZE(1) | S_02881C_USE_VTX_EDGE_FLAG(1) | \ S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | S_02881C_USE_VTX_VIEWPORT_INDX(1) | \ - S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1)) + S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | \ + S_02881C_USE_VTX_VRS_RATE(1)) /* The list of registers whose emitted values are remembered by si_context. */ enum si_tracked_reg @@ -283,6 +284,7 @@ enum si_tracked_reg SI_TRACKED_PA_SC_BINNER_CNTL_0, SI_TRACKED_DB_DFSM_CONTROL, + SI_TRACKED_DB_VRS_OVERRIDE_CNTL, SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */ SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 3d0baf04646..60441df8418 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -89,6 +89,12 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es, shader_variant_flags |= 1 << 8; if (sel->screen->debug_flags & DBG(GISEL)) shader_variant_flags |= 1 << 9; + if ((sel->info.stage == MESA_SHADER_VERTEX || + sel->info.stage == MESA_SHADER_TESS_EVAL || + sel->info.stage == MESA_SHADER_GEOMETRY) && + !es && + sel->screen->options.vrs2x2) + shader_variant_flags |= 1 << 10; struct mesa_sha1 ctx; _mesa_sha1_init(&ctx); @@ -1056,9 +1062,11 @@ static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, writes_psize &= !shader->key.opt.kill_pointsize; bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) || + sel->screen->options.vrs2x2 || sel->info.writes_layer || sel->info.writes_viewport_index; return S_02881C_USE_VTX_POINT_SIZE(writes_psize) | S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) | + S_02881C_USE_VTX_VRS_RATE(sel->screen->options.vrs2x2) | S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) | S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) | S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |