mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
radv: rewrite RADV_FORCE_VRS directly in NIR
This introduces a small NIR pass that exports VARYING_SLOT_PRIMITIVE_SHADING_RATE if RADV_FORCE_VRS is used, instead of doing this in both backend compilers. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14907>
This commit is contained in:
parent
7955df28a6
commit
2451290bc4
6 changed files with 123 additions and 86 deletions
|
|
@ -10746,30 +10746,6 @@ export_vs_psiz_layer_viewport_vrs(isel_context* ctx, int* next_pos,
|
|||
if (ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_SHADING_RATE]) {
|
||||
exp->operands[1] = Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]);
|
||||
exp->enabled_mask |= 0x2;
|
||||
} else if (ctx->options->force_vrs_rates) {
|
||||
/* Bits [2:3] = VRS rate X
|
||||
* Bits [4:5] = VRS rate Y
|
||||
*
|
||||
* The range is [-2, 1]. Values:
|
||||
* 1: 2x coarser shading rate in that direction.
|
||||
* 0: normal shading rate
|
||||
* -1: 2x finer shading rate (sample shading, not directional)
|
||||
* -2: 4x finer shading rate (sample shading, not directional)
|
||||
*
|
||||
* Sample shading can't go above 8 samples, so both numbers can't be -2
|
||||
* at the same time.
|
||||
*/
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp rates = bld.copy(bld.def(v1), Operand::c32((unsigned)ctx->options->force_vrs_rates));
|
||||
|
||||
/* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
|
||||
Temp cond = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), Operand::c32(0x3f800000u),
|
||||
Operand(ctx->outputs.temps[VARYING_SLOT_POS + 3]));
|
||||
rates = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
|
||||
bld.copy(bld.def(v1), Operand::zero()), rates, cond);
|
||||
|
||||
exp->operands[1] = Operand(rates);
|
||||
exp->enabled_mask |= 0x2;
|
||||
}
|
||||
|
||||
exp->valid_mask = ctx->options->chip_class == GFX10 && *next_pos == 0;
|
||||
|
|
@ -10818,11 +10794,8 @@ create_vs_exports(isel_context* ctx)
|
|||
int next_pos = 0;
|
||||
export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
|
||||
|
||||
bool force_vrs_per_vertex = ctx->options->force_vrs_rates && ctx->stage != mesh_ngg;
|
||||
bool writes_primitive_shading_rate =
|
||||
outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
|
||||
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
|
||||
writes_primitive_shading_rate) {
|
||||
outinfo->writes_primitive_shading_rate) {
|
||||
export_vs_psiz_layer_viewport_vrs(ctx, &next_pos, outinfo);
|
||||
}
|
||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
|
||||
|
|
|
|||
|
|
@ -1257,13 +1257,10 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
|
|||
pos_args[0].out[3] = ctx->ac.f32_1; /* W */
|
||||
}
|
||||
|
||||
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
|
||||
ctx->options->force_vrs_rates;
|
||||
|
||||
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer ||
|
||||
outinfo->writes_viewport_index || writes_primitive_shading_rate) {
|
||||
outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate) {
|
||||
pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
|
||||
(writes_primitive_shading_rate == true ? 2 : 0) |
|
||||
(outinfo->writes_primitive_shading_rate == true ? 2 : 0) |
|
||||
(outinfo->writes_layer == true ? 4 : 0));
|
||||
pos_args[1].valid_mask = 0;
|
||||
pos_args[1].done = 0;
|
||||
|
|
@ -1298,27 +1295,6 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
|
|||
|
||||
if (outinfo->writes_primitive_shading_rate) {
|
||||
pos_args[1].out[1] = primitive_shading_rate;
|
||||
} else if (ctx->options->force_vrs_rates) {
|
||||
/* Bits [2:3] = VRS rate X
|
||||
* Bits [4:5] = VRS rate Y
|
||||
*
|
||||
* The range is [-2, 1]. Values:
|
||||
* 1: 2x coarser shading rate in that direction.
|
||||
* 0: normal shading rate
|
||||
* -1: 2x finer shading rate (sample shading, not directional)
|
||||
* -2: 4x finer shading rate (sample shading, not directional)
|
||||
*
|
||||
* Sample shading can't go above 8 samples, so both numbers can't be -2 at the same time.
|
||||
*/
|
||||
LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, ctx->options->force_vrs_rates, false);
|
||||
LLVMValueRef cond;
|
||||
LLVMValueRef v;
|
||||
|
||||
/* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
|
||||
cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE, pos_args[0].out[3], ctx->ac.f32_1, "");
|
||||
v = LLVMBuildSelect(ctx->ac.builder, cond, rates, ctx->ac.i32_0, "");
|
||||
|
||||
pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3629,6 +3629,26 @@ radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_consider_force_vrs(const struct radv_pipeline *pipeline, nir_shader **nir)
|
||||
{
|
||||
struct radv_device *device = pipeline->device;
|
||||
|
||||
if (device->force_vrs == RADV_FORCE_VRS_NONE)
|
||||
return false;
|
||||
|
||||
/* Only VS and GS are supported for now. */
|
||||
if (pipeline->graphics.last_vgt_api_stage != MESA_SHADER_VERTEX &&
|
||||
pipeline->graphics.last_vgt_api_stage != MESA_SHADER_GEOMETRY)
|
||||
return false;
|
||||
|
||||
nir_shader *last_vgt_shader = nir[pipeline->graphics.last_vgt_api_stage];
|
||||
if (last_vgt_shader->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
VkResult
|
||||
radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout,
|
||||
struct radv_device *device, struct radv_pipeline_cache *cache,
|
||||
|
|
@ -3729,6 +3749,14 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||
radv_stop_feedback(stage_feedbacks[i], false);
|
||||
}
|
||||
|
||||
/* Force per-vertex VRS. */
|
||||
if (radv_consider_force_vrs(pipeline, nir)) {
|
||||
assert(pipeline->graphics.last_vgt_api_stage == MESA_SHADER_VERTEX ||
|
||||
pipeline->graphics.last_vgt_api_stage == MESA_SHADER_GEOMETRY);
|
||||
nir_shader *last_vgt_shader = nir[pipeline->graphics.last_vgt_api_stage];
|
||||
NIR_PASS_V(last_vgt_shader, radv_force_primitive_shading_rate, device);
|
||||
}
|
||||
|
||||
bool optimize_conservatively = pipeline_key->optimisations_disabled;
|
||||
|
||||
radv_link_shaders(pipeline, pipeline_key, nir, optimize_conservatively);
|
||||
|
|
@ -4738,10 +4766,8 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
cull_dist_mask = outinfo->cull_dist_mask;
|
||||
total_mask = clip_dist_mask | cull_dist_mask;
|
||||
|
||||
bool writes_primitive_shading_rate =
|
||||
outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
|
||||
bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
|
||||
outinfo->writes_viewport_index || writes_primitive_shading_rate;
|
||||
outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
|
||||
unsigned spi_vs_out_config, nparams;
|
||||
|
||||
/* VS is required to export at least one param. */
|
||||
|
|
@ -4768,7 +4794,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
|
||||
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
|
||||
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
|
||||
S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
|
||||
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
|
||||
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
|
||||
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
|
||||
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
|
||||
|
|
@ -4857,13 +4883,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
cull_dist_mask = outinfo->cull_dist_mask;
|
||||
total_mask = clip_dist_mask | cull_dist_mask;
|
||||
|
||||
/* Primitive shading rate is written as a per-primitive output in mesh shaders. */
|
||||
bool force_vrs_per_vertex =
|
||||
pipeline->device->force_vrs != RADV_FORCE_VRS_NONE && es_type != MESA_SHADER_MESH;
|
||||
bool writes_primitive_shading_rate =
|
||||
outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
|
||||
bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
|
||||
outinfo->writes_viewport_index || writes_primitive_shading_rate;
|
||||
outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate;
|
||||
bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id);
|
||||
bool break_wave_at_eoi = false;
|
||||
unsigned ge_cntl;
|
||||
|
|
@ -4906,7 +4927,7 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
|
|||
S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
|
||||
S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
|
||||
S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
|
||||
S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
|
||||
S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) |
|
||||
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
|
||||
S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
|
||||
S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
|
||||
|
|
|
|||
|
|
@ -458,6 +458,90 @@ radv_lower_primitive_shading_rate(nir_shader *nir)
|
|||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
bool progress = false;
|
||||
unsigned vrs_rate = 0;
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
/* Bits [2:3] = VRS rate X
|
||||
* Bits [4:5] = VRS rate Y
|
||||
*
|
||||
* The range is [-2, 1]. Values:
|
||||
* 1: 2x coarser shading rate in that direction.
|
||||
* 0: normal shading rate
|
||||
* -1: 2x finer shading rate (sample shading, not directional)
|
||||
* -2: 4x finer shading rate (sample shading, not directional)
|
||||
*
|
||||
* Sample shading can't go above 8 samples, so both numbers can't be -2
|
||||
* at the same time.
|
||||
*/
|
||||
switch (device->force_vrs) {
|
||||
case RADV_FORCE_VRS_2x2:
|
||||
vrs_rate = (1u << 2) | (1u << 4);
|
||||
break;
|
||||
case RADV_FORCE_VRS_2x1:
|
||||
vrs_rate = (1u << 2) | (0u << 4);
|
||||
break;
|
||||
case RADV_FORCE_VRS_1x2:
|
||||
vrs_rate = (0u << 2) | (1u << 4);
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid RADV_FORCE_VRS value");
|
||||
}
|
||||
|
||||
nir_foreach_block_reverse(block, impl) {
|
||||
nir_foreach_instr_reverse(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_store_deref)
|
||||
continue;
|
||||
|
||||
nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
||||
if (var->data.mode != nir_var_shader_out ||
|
||||
var->data.location != VARYING_SLOT_POS)
|
||||
continue;
|
||||
|
||||
b.cursor = nir_after_instr(instr);
|
||||
|
||||
nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intr->src[1].ssa, 3);
|
||||
|
||||
/* Use coarse shading if the value of Pos.W can't be determined or if its value is != 1
|
||||
* (typical for non-GUI elements).
|
||||
*/
|
||||
if (!nir_ssa_scalar_is_const(scalar_idx) ||
|
||||
nir_ssa_scalar_as_uint(scalar_idx) != 0x3f800000u) {
|
||||
|
||||
var = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), "vrs rate");
|
||||
var->data.location = VARYING_SLOT_PRIMITIVE_SHADING_RATE;
|
||||
var->data.interpolation = INTERP_MODE_NONE;
|
||||
|
||||
nir_ssa_def *pos_w = nir_channel(&b, intr->src[1].ssa, 3);
|
||||
nir_ssa_def *val = nir_bcsel(&b, nir_fneu(&b, pos_w, nir_imm_float(&b, 1.0f)),
|
||||
nir_imm_int(&b, vrs_rate), nir_imm_int(&b, 0));
|
||||
|
||||
nir_deref_instr *deref = nir_build_deref_var(&b, var);
|
||||
nir_store_deref(&b, deref, val, 0x1);
|
||||
|
||||
/* Update outputs_written to reflect that the pass added a new output. */
|
||||
nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE);
|
||||
|
||||
progress = true;
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX)
|
||||
return progress;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
nir_shader *
|
||||
radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
|
||||
const char *entrypoint_name, gl_shader_stage stage,
|
||||
|
|
@ -802,6 +886,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
|
|||
nir->info.stage == MESA_SHADER_GEOMETRY ||
|
||||
nir->info.stage == MESA_SHADER_MESH) &&
|
||||
nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) {
|
||||
/* Lower primitive shading rate to match HW requirements. */
|
||||
NIR_PASS_V(nir, radv_lower_primitive_shading_rate);
|
||||
}
|
||||
|
||||
|
|
@ -1868,20 +1953,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
|
|||
options->debug.func = radv_compiler_debug;
|
||||
options->debug.private_data = &debug_data;
|
||||
|
||||
switch (options->key.ps.force_vrs) {
|
||||
case RADV_FORCE_VRS_2x2:
|
||||
options->force_vrs_rates = (1u << 2) | (1u << 4);
|
||||
break;
|
||||
case RADV_FORCE_VRS_2x1:
|
||||
options->force_vrs_rates = (1u << 2) | (0u << 4);
|
||||
break;
|
||||
case RADV_FORCE_VRS_1x2:
|
||||
options->force_vrs_rates = (0u << 2) | (1u << 4);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
struct radv_shader_args args = {0};
|
||||
args.is_gs_copy_shader = gs_copy_shader;
|
||||
args.is_trap_handler_shader = trap_handler_shader;
|
||||
|
|
|
|||
|
|
@ -127,7 +127,6 @@ struct radv_nir_compiler_options {
|
|||
enum chip_class chip_class;
|
||||
const struct radeon_info *info;
|
||||
uint32_t address32_hi;
|
||||
uint8_t force_vrs_rates;
|
||||
|
||||
struct {
|
||||
void (*func)(void *private_data, enum radv_compiler_debug_level level, const char *message);
|
||||
|
|
@ -663,4 +662,6 @@ bool radv_consider_culling(struct radv_device *device, struct nir_shader *nir,
|
|||
|
||||
void radv_get_nir_options(struct radv_physical_device *device);
|
||||
|
||||
bool radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -678,15 +678,10 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
outinfo->writes_layer = true;
|
||||
}
|
||||
|
||||
/* VS/TES/GS: shading rate is per-vertex, MS: it's per-primitive. */
|
||||
bool force_vrs_per_vertex =
|
||||
device->force_vrs != RADV_FORCE_VRS_NONE && nir->info.stage != MESA_SHADER_MESH;
|
||||
bool writes_primitive_shading_rate =
|
||||
outinfo->writes_primitive_shading_rate || force_vrs_per_vertex;
|
||||
int pos_written = 0x1;
|
||||
|
||||
if (outinfo->writes_pointsize || outinfo->writes_viewport_index || outinfo->writes_layer ||
|
||||
writes_primitive_shading_rate)
|
||||
outinfo->writes_primitive_shading_rate)
|
||||
pos_written |= 1 << 1;
|
||||
|
||||
unsigned num_clip_distances = util_bitcount(outinfo->clip_dist_mask);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue