mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
zink: enable opt_varyings with ZINK_DEBUG=ioopt
uses copied instruction costs from radeonsi for AMD, need info for other platforms... Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28580>
This commit is contained in:
parent
942f3d3a3f
commit
0e0effe59b
3 changed files with 157 additions and 0 deletions
|
|
@ -1209,6 +1209,133 @@ zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
|
|||
nir_metadata_dominance, NULL);
|
||||
}
|
||||
|
||||
/* from radeonsi */
|
||||
static unsigned
|
||||
amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
|
||||
{
|
||||
/* TODO: maybe implement shader profiles to disable, cf. 39804ebf1766d38004259085e1fec4ed8db86f1c */
|
||||
|
||||
switch (consumer->info.stage) {
|
||||
case MESA_SHADER_TESS_CTRL: /* VS->TCS */
|
||||
/* Non-amplifying shaders can always have their variyng expressions
|
||||
* moved into later shaders.
|
||||
*/
|
||||
return UINT_MAX;
|
||||
|
||||
case MESA_SHADER_GEOMETRY: /* VS->GS, TES->GS */
|
||||
return consumer->info.gs.vertices_in == 1 ? UINT_MAX :
|
||||
consumer->info.gs.vertices_in == 2 ? 20 : 14;
|
||||
|
||||
case MESA_SHADER_TESS_EVAL: /* VS->TES, TCS->TES */
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
/* Up to 3 uniforms and 5 ALUs. */
|
||||
return 14;
|
||||
|
||||
default:
|
||||
unreachable("unexpected shader stage");
|
||||
}
|
||||
}
|
||||
|
||||
/* from radeonsi */
|
||||
static unsigned
|
||||
amd_varying_estimate_instr_cost(nir_instr *instr)
|
||||
{
|
||||
unsigned dst_bit_size, src_bit_size, num_dst_dwords;
|
||||
nir_op alu_op;
|
||||
|
||||
/* This is a very loose approximation based on gfx10. */
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu:
|
||||
dst_bit_size = nir_instr_as_alu(instr)->def.bit_size;
|
||||
src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size;
|
||||
alu_op = nir_instr_as_alu(instr)->op;
|
||||
num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
|
||||
|
||||
switch (alu_op) {
|
||||
case nir_op_mov:
|
||||
case nir_op_vec2:
|
||||
case nir_op_vec3:
|
||||
case nir_op_vec4:
|
||||
case nir_op_vec5:
|
||||
case nir_op_vec8:
|
||||
case nir_op_vec16:
|
||||
case nir_op_fabs:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fsat:
|
||||
return 0;
|
||||
|
||||
case nir_op_imul:
|
||||
case nir_op_umul_low:
|
||||
return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords;
|
||||
|
||||
case nir_op_imul_high:
|
||||
case nir_op_umul_high:
|
||||
case nir_op_imul_2x32_64:
|
||||
case nir_op_umul_2x32_64:
|
||||
return 4;
|
||||
|
||||
case nir_op_fexp2:
|
||||
case nir_op_flog2:
|
||||
case nir_op_frcp:
|
||||
case nir_op_frsq:
|
||||
case nir_op_fsqrt:
|
||||
case nir_op_fsin:
|
||||
case nir_op_fcos:
|
||||
case nir_op_fsin_amd:
|
||||
case nir_op_fcos_amd:
|
||||
return 4; /* FP16 & FP32. */
|
||||
|
||||
case nir_op_fpow:
|
||||
return 4 + 1 + 4; /* log2 + mul + exp2 */
|
||||
|
||||
case nir_op_fsign:
|
||||
return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */
|
||||
|
||||
case nir_op_idiv:
|
||||
case nir_op_udiv:
|
||||
case nir_op_imod:
|
||||
case nir_op_umod:
|
||||
case nir_op_irem:
|
||||
return dst_bit_size == 64 ? 80 : 40;
|
||||
|
||||
case nir_op_fdiv:
|
||||
return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */
|
||||
|
||||
case nir_op_fmod:
|
||||
case nir_op_frem:
|
||||
return dst_bit_size == 64 ? 80 : 8;
|
||||
|
||||
default:
|
||||
/* Double opcodes. Comparisons have always full performance. */
|
||||
if ((dst_bit_size == 64 &&
|
||||
nir_op_infos[alu_op].output_type & nir_type_float) ||
|
||||
(dst_bit_size >= 8 && src_bit_size == 64 &&
|
||||
nir_op_infos[alu_op].input_types[0] & nir_type_float))
|
||||
return 16;
|
||||
|
||||
return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32);
|
||||
}
|
||||
|
||||
case nir_instr_type_intrinsic:
|
||||
dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size;
|
||||
num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
|
||||
|
||||
switch (nir_instr_as_intrinsic(instr)->intrinsic) {
|
||||
case nir_intrinsic_load_deref:
|
||||
/* Uniform or UBO load.
|
||||
* Set a low cost to balance the number of scalar loads and ALUs.
|
||||
*/
|
||||
return 3 * num_dst_dwords;
|
||||
|
||||
default:
|
||||
unreachable("unexpected intrinsic");
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("unexpected instr type");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
zink_screen_init_compiler(struct zink_screen *screen)
|
||||
{
|
||||
|
|
@ -1275,6 +1402,23 @@ zink_screen_init_compiler(struct zink_screen *screen)
|
|||
screen->nir_options.max_unroll_iterations_fp64 = 32;
|
||||
}
|
||||
|
||||
if (screen->driver_workarounds.io_opt) {
|
||||
screen->nir_options.io_options |= nir_io_glsl_opt_varyings;
|
||||
|
||||
switch (screen->info.driver_props.driverID) {
|
||||
case VK_DRIVER_ID_MESA_RADV:
|
||||
case VK_DRIVER_ID_AMD_OPEN_SOURCE:
|
||||
case VK_DRIVER_ID_AMD_PROPRIETARY:
|
||||
screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
|
||||
screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
|
||||
break;
|
||||
default:
|
||||
mesa_logw("zink: instruction costs not implemented for this implementation!");
|
||||
screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
|
||||
screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
The OpFRem and OpFMod instructions use cheap approximations of remainder,
|
||||
and the error can be large due to the discontinuity in trunc() and floor().
|
||||
|
|
@ -3806,6 +3950,14 @@ compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *n
|
|||
struct zink_shader_info *sinfo = &zs->sinfo;
|
||||
prune_io(nir);
|
||||
|
||||
switch (nir->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
NIR_PASS_V(nir, nir_divergence_analysis);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
NIR_PASS_V(nir, nir_convert_from_ssa, true);
|
||||
|
||||
if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
|
||||
|
|
|
|||
|
|
@ -117,6 +117,7 @@ zink_debug_options[] = {
|
|||
{ "dgc", ZINK_DEBUG_DGC, "Use DGC (driver testing only)" },
|
||||
{ "mem", ZINK_DEBUG_MEM, "Debug memory allocations" },
|
||||
{ "quiet", ZINK_DEBUG_QUIET, "Suppress warnings" },
|
||||
{ "ioopt", ZINK_DEBUG_IOOPT, "Optimize IO" },
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
|
||||
|
|
@ -3526,6 +3527,8 @@ zink_internal_create_screen(const struct pipe_screen_config *config, int64_t dev
|
|||
}
|
||||
zink_screen_fence_init(&screen->base);
|
||||
|
||||
if (zink_debug & ZINK_DEBUG_IOOPT)
|
||||
screen->driver_workarounds.io_opt = true;
|
||||
zink_screen_init_compiler(screen);
|
||||
if (!disk_cache_init(screen)) {
|
||||
mesa_loge("ZINK: failed to initialize disk cache");
|
||||
|
|
|
|||
|
|
@ -241,6 +241,7 @@ enum zink_debug {
|
|||
ZINK_DEBUG_DGC = (1<<17),
|
||||
ZINK_DEBUG_MEM = (1<<18),
|
||||
ZINK_DEBUG_QUIET = (1<<19),
|
||||
ZINK_DEBUG_IOOPT = (1<<20),
|
||||
};
|
||||
|
||||
enum zink_pv_emulation_primitive {
|
||||
|
|
@ -1565,6 +1566,7 @@ struct zink_screen {
|
|||
bool lower_robustImageAccess2;
|
||||
bool needs_zs_shader_swizzle;
|
||||
bool can_do_invalid_linear_modifier;
|
||||
bool io_opt;
|
||||
unsigned z16_unscaled_bias;
|
||||
unsigned z24_unscaled_bias;
|
||||
} driver_workarounds;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue