radv: rewrite RADV_FORCE_VRS directly in NIR

This introduces a small NIR pass that exports
VARYING_SLOT_PRIMITIVE_SHADING_RATE if RADV_FORCE_VRS is used,
instead of doing this in both backend compilers.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14907>
This commit is contained in:
Samuel Pitoiset 2021-07-13 13:29:57 +02:00
parent 7955df28a6
commit 2451290bc4
6 changed files with 123 additions and 86 deletions

View file

@ -10746,30 +10746,6 @@ export_vs_psiz_layer_viewport_vrs(isel_context* ctx, int* next_pos,
if (ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_SHADING_RATE]) {
exp->operands[1] = Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]);
exp->enabled_mask |= 0x2;
} else if (ctx->options->force_vrs_rates) {
/* Bits [2:3] = VRS rate X
* Bits [4:5] = VRS rate Y
*
* The range is [-2, 1]. Values:
* 1: 2x coarser shading rate in that direction.
* 0: normal shading rate
* -1: 2x finer shading rate (sample shading, not directional)
* -2: 4x finer shading rate (sample shading, not directional)
*
* Sample shading can't go above 8 samples, so both numbers can't be -2
* at the same time.
*/
Builder bld(ctx->program, ctx->block);
Temp rates = bld.copy(bld.def(v1), Operand::c32((unsigned)ctx->options->force_vrs_rates));
/* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
Temp cond = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), Operand::c32(0x3f800000u),
Operand(ctx->outputs.temps[VARYING_SLOT_POS + 3]));
rates = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
bld.copy(bld.def(v1), Operand::zero()), rates, cond);
exp->operands[1] = Operand(rates);
exp->enabled_mask |= 0x2;
}
exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0;
@ -10818,11 +10794,8 @@ create_vs_exports(isel_context* ctx)
int next_pos = 0;
export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
bool force_vrs_per_vertex = ctx->options->force_vrs_rates && ctx->stage != mesh_ngg;
bool writes_primitive_shading_rate =
outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
writes_primitive_shading_rate) {
outinfo->writes_primitive_shading_rate) {
export_vs_psiz_layer_viewport_vrs(ctx, &next_pos, outinfo);
}
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)

View file

@ -1257,13 +1257,10 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
pos_args[0].out[3] = ctx->ac.f32_1; /* W */
}
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
ctx->options->force_vrs_rates;
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer ||
outinfo->writes_viewport_index || writes_primitive_shading_rate) {
outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate) {
pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
(writes_primitive_shading_rate == true ? 2 : 0) |
(outinfo->writes_primitive_shading_rate == true ? 2 : 0) |
(outinfo->writes_layer == true ? 4 : 0));
pos_args[1].valid_mask = 0;
pos_args[1].done = 0;
@ -1298,27 +1295,6 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
if (outinfo->writes_primitive_shading_rate) {
pos_args[1].out[1] = primitive_shading_rate;
} else if (ctx->options->force_vrs_rates) {
/* Bits [2:3] = VRS rate X
* Bits [4:5] = VRS rate Y
*
* The range is [-2, 1]. Values:
* 1: 2x coarser shading rate in that direction.
* 0: normal shading rate
* -1: 2x finer shading rate (sample shading, not directional)
* -2: 4x finer shading rate (sample shading, not directional)
*
* Sample shading can't go above 8 samples, so both numbers can't be -2 at the same time.
*/
LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, ctx->options->force_vrs_rates, false);
LLVMValueRef cond;
LLVMValueRef v;
/* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE, pos_args[0].out[3], ctx->ac.f32_1, "");
v = LLVMBuildSelect(ctx->ac.builder, cond, rates, ctx->ac.i32_0, "");
pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
}
}

View file

@ -3629,6 +3629,26 @@ radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
return VK_SUCCESS;
}
static bool
radv_consider_force_vrs(const struct radv_pipeline *pipeline, nir_shader **nir)
{
struct radv_device *device = pipeline->device;
if (device->force_vrs == RADV_FORCE_VRS_NONE)
return false;
/* Only VS and GS are supported for now. */
if (pipeline->graphics.last_vgt_api_stage != MESA_SHADER_VERTEX &&
pipeline->graphics.last_vgt_api_stage != MESA_SHADER_GEOMETRY)
return false;
nir_shader *last_vgt_shader = nir[pipeline->graphics.last_vgt_api_stage];
if (last_vgt_shader->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE))
return false;
return true;
}
VkResult
radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout,
struct radv_device *device, struct radv_pipeline_cache *cache,
@ -3729,6 +3749,14 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
radv_stop_feedback(stage_feedbacks[i], false);
}
/* Force per-vertex VRS. */
if (radv_consider_force_vrs(pipeline, nir)) {
assert(pipeline->graphics.last_vgt_api_stage == MESA_SHADER_VERTEX ||
pipeline->graphics.last_vgt_api_stage == MESA_SHADER_GEOMETRY);
nir_shader *last_vgt_shader = nir[pipeline->graphics.last_vgt_api_stage];
NIR_PASS_V(last_vgt_shader, radv_force_primitive_shading_rate, device);
}
bool optimize_conservatively = pipeline_key->optimisations_disabled;
radv_link_shaders(pipeline, pipeline_key, nir, optimize_conservatively);
@ -4738,10 +4766,8 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
cull_dist_mask = outinfo->cull_dist_mask;
total_mask = clip_dist_mask | cull_dist_mask;
bool writes_primitive_shading_rate =
outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
outinfo->writes_viewport_index || writes_primitive_shading_rate;
outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
unsigned spi_vs_out_config, nparams;
/* VS is required to export at least one param. */
@ -4768,7 +4794,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
@ -4857,13 +4883,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
cull_dist_mask = outinfo->cull_dist_mask;
total_mask = clip_dist_mask | cull_dist_mask;
/* Primitive shading rate is written as a per-primitive output in mesh shaders. */
bool force_vrs_per_vertex =
pipeline->device->force_vrs != RADV_FORCE_VRS_NONE && es_type != MESA_SHADER_MESH;
bool writes_primitive_shading_rate =
outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
outinfo->writes_viewport_index || writes_primitive_shading_rate;
outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id);
bool break_wave_at_eoi = false;
unsigned ge_cntl;
@ -4906,7 +4927,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |

View file

@ -458,6 +458,90 @@ radv_lower_primitive_shading_rate(nir_shader *nir)
return progress;
}
bool
radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device)
{
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
bool progress = false;
unsigned vrs_rate = 0;
nir_builder b;
nir_builder_init(&b, impl);
/* Bits [2:3] = VRS rate X
* Bits [4:5] = VRS rate Y
*
* The range is [-2, 1]. Values:
* 1: 2x coarser shading rate in that direction.
* 0: normal shading rate
* -1: 2x finer shading rate (sample shading, not directional)
* -2: 4x finer shading rate (sample shading, not directional)
*
* Sample shading can't go above 8 samples, so both numbers can't be -2
* at the same time.
*/
switch (device->force_vrs) {
case RADV_FORCE_VRS_2x2:
vrs_rate = (1u << 2) | (1u << 4);
break;
case RADV_FORCE_VRS_2x1:
vrs_rate = (1u << 2) | (0u << 4);
break;
case RADV_FORCE_VRS_1x2:
vrs_rate = (0u << 2) | (1u << 4);
break;
default:
unreachable("Invalid RADV_FORCE_VRS value");
}
nir_foreach_block_reverse(block, impl) {
nir_foreach_instr_reverse(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_store_deref)
continue;
nir_variable *var = nir_intrinsic_get_var(intr, 0);
if (var->data.mode != nir_var_shader_out ||
var->data.location != VARYING_SLOT_POS)
continue;
b.cursor = nir_after_instr(instr);
nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intr->src[1].ssa, 3);
/* Use coarse shading if the value of Pos.W can't be determined or if its value is != 1
* (typical for non-GUI elements).
*/
if (!nir_ssa_scalar_is_const(scalar_idx) ||
nir_ssa_scalar_as_uint(scalar_idx) != 0x3f800000u) {
var = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), "vrs rate");
var->data.location = VARYING_SLOT_PRIMITIVE_SHADING_RATE;
var->data.interpolation = INTERP_MODE_NONE;
nir_ssa_def *pos_w = nir_channel(&b, intr->src[1].ssa, 3);
nir_ssa_def *val = nir_bcsel(&b, nir_fneu(&b, pos_w, nir_imm_float(&b, 1.0f)),
nir_imm_int(&b, vrs_rate), nir_imm_int(&b, 0));
nir_deref_instr *deref = nir_build_deref_var(&b, var);
nir_store_deref(&b, deref, val, 0x1);
/* Update outputs_written to reflect that the pass added a new output. */
nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE);
progress = true;
if (nir->info.stage == MESA_SHADER_VERTEX)
return progress;
}
}
}
return progress;
}
nir_shader *
radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
const char *entrypoint_name, gl_shader_stage stage,
@ -802,6 +886,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
nir->info.stage == MESA_SHADER_GEOMETRY ||
nir->info.stage == MESA_SHADER_MESH) &&
nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) {
/* Lower primitive shading rate to match HW requirements. */
NIR_PASS_V(nir, radv_lower_primitive_shading_rate);
}
@ -1868,20 +1953,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
options->debug.func = radv_compiler_debug;
options->debug.private_data = &debug_data;
switch (options->key.ps.force_vrs) {
case RADV_FORCE_VRS_2x2:
options->force_vrs_rates = (1u << 2) | (1u << 4);
break;
case RADV_FORCE_VRS_2x1:
options->force_vrs_rates = (1u << 2) | (0u << 4);
break;
case RADV_FORCE_VRS_1x2:
options->force_vrs_rates = (0u << 2) | (1u << 4);
break;
default:
break;
}
struct radv_shader_args args = {0};
args.is_gs_copy_shader = gs_copy_shader;
args.is_trap_handler_shader = trap_handler_shader;

View file

@ -127,7 +127,6 @@ struct radv_nir_compiler_options {
enum chip_class chip_class;
const struct radeon_info *info;
uint32_t address32_hi;
uint8_t force_vrs_rates;
struct {
void (*func)(void *private_data, enum radv_compiler_debug_level level, const char *message);
@ -663,4 +662,6 @@ bool radv_consider_culling(struct radv_device *device, struct nir_shader *nir,
void radv_get_nir_options(struct radv_physical_device *device);
bool radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device);
#endif

View file

@ -678,15 +678,10 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
outinfo->writes_layer = true;
}
/* VS/TES/GS: shading rate is per-vertex, MS: it's per-primitive. */
bool force_vrs_per_vertex =
device->force_vrs != RADV_FORCE_VRS_NONE && nir->info.stage != MESA_SHADER_MESH;
bool writes_primitive_shading_rate =
outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
int pos_written = 0x1;
if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer ||
writes_primitive_shading_rate)
outinfo->writes_primitive_shading_rate)
pos_written |= 1 << 1;
unsigned num_clip_distances = util_bitcount(outinfo->clip_dist_mask);