mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 06:50:11 +01:00
radeonsi: eliminate trivial constant VS outputs
These constant value VS PARAM exports: - 0,0,0,0 - 0,0,0,1 - 1,1,1,0 - 1,1,1,1 can be loaded into PS inputs using the DEFAULT_VAL field, and the VS exports can be removed from the IR to save export & parameter memory. After LLVM optimizations, analyze the IR to see which exports are equal to the ones listed above (or undef) and remove them if they are. Targeted use cases: - All DX9 eON ports always clear 10 VS outputs to 0.0 even if most of them are unused by PS (such as Witcher 2 below). - VS output arrays with unused elements that the GLSL compiler can't eliminate (such as Batman below). The shader-db deltas are quite interesting: (not from upstream si-report.py, it won't be upstreamed) PERCENTAGE DELTAS Shaders PARAM exports (affected only) batman_arkham_origins 589 -67.17 % bioshock-infinite 1769 -0.47 % dirt-showdown 548 -2.68 % dota2 1747 -3.36 % f1-2015 776 -4.94 % left_4_dead_2 1762 -0.07 % metro_2033_redux 2670 -0.43 % portal 474 -0.22 % talos_principle 324 -3.63 % warsow 176 -2.20 % witcher2 1040 -73.78 % ---------------------------------------- All affected 991 -65.37 % ... 9681 -> 3353 ---------------------------------------- Total 26725 -10.82 % ... 58490 -> 52162 v2: treat Undef as both 0 and 1 Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (v1) Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com> (v1)
This commit is contained in:
parent
041da0ae81
commit
3ec9975555
3 changed files with 186 additions and 2 deletions
|
|
@ -6523,6 +6523,159 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
|
||||||
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
|
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return true if the PARAM export has been eliminated. */
|
||||||
|
static bool si_eliminate_const_output(struct si_shader_context *ctx,
|
||||||
|
LLVMValueRef inst, unsigned offset)
|
||||||
|
{
|
||||||
|
struct si_shader *shader = ctx->shader;
|
||||||
|
unsigned num_outputs = shader->selector->info.num_outputs;
|
||||||
|
unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
|
||||||
|
bool is_zero[4] = {}, is_one[4] = {};
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
LLVMBool loses_info;
|
||||||
|
LLVMValueRef p = LLVMGetOperand(inst, 5 + i);
|
||||||
|
if (!LLVMIsConstant(p))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* It's a constant expression. Undef outputs are eliminated too. */
|
||||||
|
if (LLVMIsUndef(p)) {
|
||||||
|
is_zero[i] = true;
|
||||||
|
is_one[i] = true;
|
||||||
|
} else {
|
||||||
|
double a = LLVMConstRealGetDouble(p, &loses_info);
|
||||||
|
|
||||||
|
if (a == 0)
|
||||||
|
is_zero[i] = true;
|
||||||
|
else if (a == 1)
|
||||||
|
is_one[i] = true;
|
||||||
|
else
|
||||||
|
return false; /* other constant */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Only certain combinations of 0 and 1 can be eliminated. */
|
||||||
|
if (is_zero[0] && is_zero[1] && is_zero[2])
|
||||||
|
default_val = is_zero[3] ? 0 : 1;
|
||||||
|
else if (is_one[0] && is_one[1] && is_one[2])
|
||||||
|
default_val = is_zero[3] ? 2 : 3;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
|
||||||
|
LLVMInstructionEraseFromParent(inst);
|
||||||
|
|
||||||
|
/* Change OFFSET to DEFAULT_VAL. */
|
||||||
|
for (i = 0; i < num_outputs; i++) {
|
||||||
|
if (shader->info.vs_output_param_offset[i] == offset) {
|
||||||
|
shader->info.vs_output_param_offset[i] =
|
||||||
|
EXP_PARAM_DEFAULT_VAL_0000 + default_val;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct si_vs_exports {
|
||||||
|
unsigned num;
|
||||||
|
unsigned offset[SI_MAX_VS_OUTPUTS];
|
||||||
|
LLVMValueRef inst[SI_MAX_VS_OUTPUTS];
|
||||||
|
};
|
||||||
|
|
||||||
|
static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
|
||||||
|
{
|
||||||
|
struct si_shader *shader = ctx->shader;
|
||||||
|
struct tgsi_shader_info *info = &shader->selector->info;
|
||||||
|
LLVMBasicBlockRef bb;
|
||||||
|
struct si_vs_exports exports;
|
||||||
|
bool removed_any = false;
|
||||||
|
|
||||||
|
exports.num = 0;
|
||||||
|
|
||||||
|
if ((ctx->type == PIPE_SHADER_VERTEX &&
|
||||||
|
(shader->key.vs.as_es || shader->key.vs.as_ls)) ||
|
||||||
|
(ctx->type == PIPE_SHADER_TESS_EVAL && shader->key.tes.as_es))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Process all LLVM instructions. */
|
||||||
|
bb = LLVMGetFirstBasicBlock(ctx->main_fn);
|
||||||
|
while (bb) {
|
||||||
|
LLVMValueRef inst = LLVMGetFirstInstruction(bb);
|
||||||
|
|
||||||
|
while (inst) {
|
||||||
|
LLVMValueRef cur = inst;
|
||||||
|
inst = LLVMGetNextInstruction(inst);
|
||||||
|
|
||||||
|
if (LLVMGetInstructionOpcode(cur) != LLVMCall)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
LLVMValueRef callee = LLVMGetCalledValue(cur);
|
||||||
|
LLVMValueKind kind = LLVMGetValueKind(callee);
|
||||||
|
|
||||||
|
if (kind != LLVMFunctionValueKind)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const char *name = LLVMGetValueName(callee);
|
||||||
|
unsigned num_args = LLVMCountParams(callee);
|
||||||
|
|
||||||
|
/* Check if this is an export instruction. */
|
||||||
|
if (num_args != 9 || strcmp(name, "llvm.SI.export"))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
LLVMValueRef arg = LLVMGetOperand(cur, 3);
|
||||||
|
unsigned target = LLVMConstIntGetZExtValue(arg);
|
||||||
|
|
||||||
|
if (target < V_008DFC_SQ_EXP_PARAM)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
target -= V_008DFC_SQ_EXP_PARAM;
|
||||||
|
|
||||||
|
/* Eliminate constant value PARAM exports. */
|
||||||
|
if (si_eliminate_const_output(ctx, cur, target)) {
|
||||||
|
removed_any = true;
|
||||||
|
} else {
|
||||||
|
exports.offset[exports.num] = target;
|
||||||
|
exports.inst[exports.num] = cur;
|
||||||
|
exports.num++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bb = LLVMGetNextBasicBlock(bb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Remove holes in export memory due to removed PARAM exports.
|
||||||
|
* This is done by renumbering all PARAM exports.
|
||||||
|
*/
|
||||||
|
if (removed_any) {
|
||||||
|
ubyte current_offset[SI_MAX_VS_OUTPUTS];
|
||||||
|
unsigned new_count = 0;
|
||||||
|
unsigned out, i;
|
||||||
|
|
||||||
|
/* Make a copy of the offsets. We need the old version while
|
||||||
|
* we are modifying some of them. */
|
||||||
|
assert(sizeof(current_offset) ==
|
||||||
|
sizeof(shader->info.vs_output_param_offset));
|
||||||
|
memcpy(current_offset, shader->info.vs_output_param_offset,
|
||||||
|
sizeof(current_offset));
|
||||||
|
|
||||||
|
for (i = 0; i < exports.num; i++) {
|
||||||
|
unsigned offset = exports.offset[i];
|
||||||
|
|
||||||
|
for (out = 0; out < info->num_outputs; out++) {
|
||||||
|
if (current_offset[out] != offset)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
LLVMSetOperand(exports.inst[i], 3,
|
||||||
|
LLVMConstInt(ctx->i32,
|
||||||
|
V_008DFC_SQ_EXP_PARAM + new_count, 0));
|
||||||
|
shader->info.vs_output_param_offset[out] = new_count;
|
||||||
|
new_count++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
shader->info.nr_param_exports = new_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int si_compile_tgsi_shader(struct si_screen *sscreen,
|
int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||||
LLVMTargetMachineRef tm,
|
LLVMTargetMachineRef tm,
|
||||||
struct si_shader *shader,
|
struct si_shader *shader,
|
||||||
|
|
@ -6546,6 +6699,9 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||||
si_init_shader_ctx(&ctx, sscreen, shader, tm);
|
si_init_shader_ctx(&ctx, sscreen, shader, tm);
|
||||||
ctx.is_monolithic = is_monolithic;
|
ctx.is_monolithic = is_monolithic;
|
||||||
|
|
||||||
|
memset(shader->info.vs_output_param_offset, 0xff,
|
||||||
|
sizeof(shader->info.vs_output_param_offset));
|
||||||
|
|
||||||
shader->info.uses_instanceid = sel->info.uses_instanceid;
|
shader->info.uses_instanceid = sel->info.uses_instanceid;
|
||||||
|
|
||||||
bld_base = &ctx.soa.bld_base;
|
bld_base = &ctx.soa.bld_base;
|
||||||
|
|
@ -6630,6 +6786,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||||
si_llvm_finalize_module(&ctx,
|
si_llvm_finalize_module(&ctx,
|
||||||
r600_extra_shader_checks(&sscreen->b, ctx.type));
|
r600_extra_shader_checks(&sscreen->b, ctx.type));
|
||||||
|
|
||||||
|
/* Post-optimization transformations. */
|
||||||
|
si_eliminate_const_vs_outputs(&ctx);
|
||||||
|
|
||||||
|
/* Compile to bytecode. */
|
||||||
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
|
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
|
||||||
mod, debug, ctx.type, "TGSI shader");
|
mod, debug, ctx.type, "TGSI shader");
|
||||||
if (r) {
|
if (r) {
|
||||||
|
|
|
||||||
|
|
@ -415,6 +415,17 @@ struct si_shader_config {
|
||||||
unsigned rsrc2;
|
unsigned rsrc2;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
|
||||||
|
EXP_PARAM_OFFSET_0 = 0,
|
||||||
|
EXP_PARAM_OFFSET_31 = 31,
|
||||||
|
/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
|
||||||
|
EXP_PARAM_DEFAULT_VAL_0000 = 64,
|
||||||
|
EXP_PARAM_DEFAULT_VAL_0001,
|
||||||
|
EXP_PARAM_DEFAULT_VAL_1110,
|
||||||
|
EXP_PARAM_DEFAULT_VAL_1111,
|
||||||
|
};
|
||||||
|
|
||||||
/* GCN-specific shader info. */
|
/* GCN-specific shader info. */
|
||||||
struct si_shader_info {
|
struct si_shader_info {
|
||||||
ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
|
ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
|
||||||
|
|
|
||||||
|
|
@ -1567,7 +1567,7 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
|
||||||
unsigned index, unsigned interpolate)
|
unsigned index, unsigned interpolate)
|
||||||
{
|
{
|
||||||
struct tgsi_shader_info *vsinfo = &vs->selector->info;
|
struct tgsi_shader_info *vsinfo = &vs->selector->info;
|
||||||
unsigned j, ps_input_cntl = 0;
|
unsigned j, offset, ps_input_cntl = 0;
|
||||||
|
|
||||||
if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
|
if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
|
||||||
(interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
|
(interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
|
||||||
|
|
@ -1582,7 +1582,20 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
|
||||||
for (j = 0; j < vsinfo->num_outputs; j++) {
|
for (j = 0; j < vsinfo->num_outputs; j++) {
|
||||||
if (name == vsinfo->output_semantic_name[j] &&
|
if (name == vsinfo->output_semantic_name[j] &&
|
||||||
index == vsinfo->output_semantic_index[j]) {
|
index == vsinfo->output_semantic_index[j]) {
|
||||||
ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[j]);
|
offset = vs->info.vs_output_param_offset[j];
|
||||||
|
|
||||||
|
if (offset <= EXP_PARAM_OFFSET_31) {
|
||||||
|
/* The input is loaded from parameter memory. */
|
||||||
|
ps_input_cntl |= S_028644_OFFSET(offset);
|
||||||
|
} else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
|
||||||
|
/* The input is a DEFAULT_VAL constant. */
|
||||||
|
assert(offset >= EXP_PARAM_DEFAULT_VAL_0000 &&
|
||||||
|
offset <= EXP_PARAM_DEFAULT_VAL_1111);
|
||||||
|
|
||||||
|
offset -= EXP_PARAM_DEFAULT_VAL_0000;
|
||||||
|
ps_input_cntl = S_028644_OFFSET(0x20) |
|
||||||
|
S_028644_DEFAULT_VAL(offset);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue