i965: Replace cfg instances with calls to calculate_cfg().

Avoids regenerating it unnecessarily.

Every program in shader-db improved, none by an amount less than a 1/3
reduction. One Dota2 shader decreased from 62 -> 24.

cfg calculations:     429492 -> 193197 (-55.02%)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
This commit is contained in:
Matt Turner 2014-07-11 21:24:02 -07:00
parent dd65a6d9ad
commit 1761671b06
5 changed files with 22 additions and 22 deletions

View file

@ -591,31 +591,33 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
bool
fs_visitor::opt_copy_propagate()
{
calculate_cfg();
bool progress = false;
void *copy_prop_ctx = ralloc_context(NULL);
cfg_t cfg(&instructions);
exec_list *out_acp[cfg.num_blocks];
for (int i = 0; i < cfg.num_blocks; i++)
exec_list *out_acp[cfg->num_blocks];
for (int i = 0; i < cfg->num_blocks; i++)
out_acp[i] = new exec_list [ACP_HASH_SIZE];
/* First, walk through each block doing local copy propagation and getting
* the set of copies available at the end of the block.
*/
for (int b = 0; b < cfg.num_blocks; b++) {
bblock_t *block = cfg.blocks[b];
for (int b = 0; b < cfg->num_blocks; b++) {
bblock_t *block = cfg->blocks[b];
progress = opt_copy_propagate_local(copy_prop_ctx, block,
out_acp[b]) || progress;
}
/* Do dataflow analysis for those available copies. */
fs_copy_prop_dataflow dataflow(copy_prop_ctx, &cfg, out_acp);
fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, out_acp);
/* Next, re-run local copy propagation, this time with the set of copies
* provided by the dataflow analysis available at the start of a block.
*/
for (int b = 0; b < cfg.num_blocks; b++) {
bblock_t *block = cfg.blocks[b];
for (int b = 0; b < cfg->num_blocks; b++) {
bblock_t *block = cfg->blocks[b];
exec_list in_acp[ACP_HASH_SIZE];
for (int i = 0; i < dataflow.num_acp; i++) {
@ -628,7 +630,7 @@ fs_visitor::opt_copy_propagate()
progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || progress;
}
for (int i = 0; i < cfg.num_blocks; i++)
for (int i = 0; i < cfg->num_blocks; i++)
delete [] out_acp[i];
ralloc_free(copy_prop_ctx);

View file

@ -45,10 +45,10 @@ fs_visitor::opt_peephole_predicated_break()
{
bool progress = false;
cfg_t cfg(&instructions);
calculate_cfg();
for (int b = 0; b < cfg.num_blocks; b++) {
bblock_t *block = cfg.blocks[b];
for (int b = 0; b < cfg->num_blocks; b++) {
bblock_t *block = cfg->blocks[b];
/* BREAK and CONTINUE instructions, by definition, can only be found at
* the ends of basic blocks.

View file

@ -127,10 +127,10 @@ fs_visitor::opt_peephole_sel()
{
bool progress = false;
cfg_t cfg(&instructions);
calculate_cfg();
for (int b = 0; b < cfg.num_blocks; b++) {
bblock_t *block = cfg.blocks[b];
for (int b = 0; b < cfg->num_blocks; b++) {
bblock_t *block = cfg->blocks[b];
/* IF instructions, by definition, can only be found at the ends of
* basic blocks.

View file

@ -748,13 +748,13 @@ vec4_visitor::opt_set_dependency_control()
vec4_instruction *last_mrf_write[BRW_MAX_GRF];
uint8_t mrf_channels_written[BRW_MAX_GRF];
cfg_t cfg(&instructions);
calculate_cfg();
assert(prog_data->total_grf ||
!"Must be called after register allocation");
for (int i = 0; i < cfg.num_blocks; i++) {
bblock_t *bblock = cfg.blocks[i];
for (int i = 0; i < cfg->num_blocks; i++) {
bblock_t *bblock = cfg->blocks[i];
vec4_instruction *inst;
memset(last_grf_write, 0, sizeof(last_grf_write));

View file

@ -254,10 +254,8 @@ vec4_visitor::opt_cse()
calculate_live_intervals();
cfg_t cfg(&instructions);
for (int b = 0; b < cfg.num_blocks; b++) {
bblock_t *block = cfg.blocks[b];
for (int b = 0; b < cfg->num_blocks; b++) {
bblock_t *block = cfg->blocks[b];
progress = opt_cse_local(block) || progress;
}