diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 31b2efd3224..a714aeab010 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -1702,6 +1702,7 @@ struct brw_compile_stats { uint32_t cycles; uint32_t spills; uint32_t fills; + uint32_t max_live_registers; }; /** @} */ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 8fc28a50abb..b2739dccc8f 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6456,6 +6456,18 @@ fs_visitor::fixup_nomask_control_flow() return progress; } +uint32_t +fs_visitor::compute_max_register_pressure() +{ + const register_pressure &rp = regpressure_analysis.require(); + uint32_t ip = 0, max_pressure = 0; + foreach_block_and_inst(block, backend_instruction, inst, cfg) { + max_pressure = MAX2(max_pressure, rp.regs_live_at_ip[ip]); + ip++; + } + return max_pressure; +} + void fs_visitor::allocate_registers(bool allow_spilling) { @@ -6475,6 +6487,9 @@ fs_visitor::allocate_registers(bool allow_spilling) "lifo" }; + if (needs_register_pressure) + shader_stats.max_register_pressure = compute_max_register_pressure(); + bool spill_all = allow_spilling && INTEL_DEBUG(DEBUG_SPILL_FS); /* Before we schedule anything, stash off the instruction order as an array @@ -7443,6 +7458,7 @@ brw_compile_fs(const struct brw_compiler *compiler, v8 = std::make_unique(compiler, params->log_data, mem_ctx, &key->base, &prog_data->base, nir, 8, + params->stats != NULL, debug_enabled); if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) { params->error_str = ralloc_strdup(mem_ctx, v8->fail_msg); @@ -7485,6 +7501,7 @@ brw_compile_fs(const struct brw_compiler *compiler, /* Try a SIMD16 compile */ v16 = std::make_unique(compiler, params->log_data, mem_ctx, &key->base, &prog_data->base, nir, 16, + params->stats != NULL, debug_enabled); v16->import_uniforms(v8.get()); if (!v16->run_fs(allow_spilling, params->use_rep_send)) { @@ -7512,6 +7529,7 @@ brw_compile_fs(const struct brw_compiler *compiler, /* Try a SIMD32 compile */ v32 = std::make_unique(compiler, params->log_data, mem_ctx, &key->base, &prog_data->base, nir, 32, + params->stats != NULL, debug_enabled); v32->import_uniforms(v8.get()); if (!v32->run_fs(allow_spilling, false)) { @@ -7789,7 +7807,8 @@ brw_compile_cs(const struct brw_compiler *compiler, key->base.robust_buffer_access); v[simd] = std::make_unique(compiler, params->log_data, mem_ctx, &key->base, - &prog_data->base, shader, dispatch_width, + &prog_data->base, shader, dispatch_width, + params->stats != NULL, debug_enabled); const int first = brw_simd_first_compiled(simd_state); @@ -7922,7 +7941,9 @@ compile_single_bs(const struct brw_compiler *compiler, void *log_data, v[simd] = std::make_unique(compiler, log_data, mem_ctx, &key->base, &prog_data->base, shader, - dispatch_width, debug_enabled); + dispatch_width, + stats != NULL, + debug_enabled); const bool allow_spilling = !brw_simd_any_compiled(simd_state); if (v[simd]->run_bs(allow_spilling)) { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 56b50bd4be1..805c60a61cb 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -83,6 +83,7 @@ struct shader_stats { unsigned promoted_constants; unsigned spill_count; unsigned fill_count; + unsigned max_register_pressure; }; /** Register numbers for thread payload fields. */ @@ -188,12 +189,14 @@ public: struct brw_stage_prog_data *prog_data, const nir_shader *shader, unsigned dispatch_width, + bool needs_register_pressure, bool debug_enabled); fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, struct brw_gs_compile *gs_compile, struct brw_gs_prog_data *prog_data, const nir_shader *shader, + bool needs_register_pressure, bool debug_enabled); void init(); ~fs_visitor(); @@ -220,6 +223,7 @@ public: bool run_mesh(bool allow_spilling); void optimize(); void allocate_registers(bool allow_spilling); + uint32_t compute_max_register_pressure(); bool fixup_sends_duplicate_payload(); void fixup_3src_null_dest(); void emit_dummy_memory_fence_before_eot(); @@ -526,6 +530,7 @@ public: unsigned grf_used; bool spilled_any_registers; + bool needs_register_pressure; const unsigned dispatch_width; /**< 8, 16 or 32 */ unsigned max_dispatch_width; diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 17b123a478f..7a26bff58e1 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2537,6 +2537,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, stats->cycles = perf.latency; stats->spills = shader_stats.spill_count; stats->fills = shader_stats.fill_count; + stats->max_live_registers = shader_stats.max_register_pressure; } return start_offset; diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index f43e4787bc0..d62296676d0 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -1353,12 +1353,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, struct brw_stage_prog_data *prog_data, const nir_shader *shader, unsigned dispatch_width, + bool needs_register_pressure, bool debug_enabled) : backend_shader(compiler, log_data, mem_ctx, shader, prog_data, debug_enabled), key(key), gs_compile(NULL), prog_data(prog_data), live_analysis(this), regpressure_analysis(this), performance_analysis(this), + needs_register_pressure(needs_register_pressure), dispatch_width(dispatch_width), bld(fs_builder(this, dispatch_width).at_end()) { @@ -1370,6 +1372,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, struct brw_gs_compile *c, struct brw_gs_prog_data *prog_data, const nir_shader *shader, + bool needs_register_pressure, bool debug_enabled) : backend_shader(compiler, log_data, mem_ctx, shader, &prog_data->base.base, debug_enabled), @@ -1377,6 +1380,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, prog_data(&prog_data->base.base), live_analysis(this), regpressure_analysis(this), performance_analysis(this), + needs_register_pressure(needs_register_pressure), dispatch_width(8), bld(fs_builder(this, dispatch_width).at_end()) { @@ -1411,10 +1415,7 @@ fs_visitor::init() this->last_scratch = 0; this->push_constant_loc = NULL; - this->shader_stats.scheduler_mode = NULL; - this->shader_stats.promoted_constants = 0, - this->shader_stats.spill_count = 0, - this->shader_stats.fill_count = 0, + memset(&this->shader_stats, 0, sizeof(this->shader_stats)); this->grf_used = 0; this->spilled_any_registers = false; diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp index f42997da2c6..05bd938412d 100644 --- a/src/intel/compiler/brw_mesh.cpp +++ b/src/intel/compiler/brw_mesh.cpp @@ -334,6 +334,7 @@ brw_compile_task(const struct brw_compiler *compiler, v[simd] = std::make_unique(compiler, params->log_data, mem_ctx, &key->base, &prog_data->base.base, shader, dispatch_width, + params->stats != NULL, debug_enabled); if (prog_data->base.prog_mask) { @@ -1042,6 +1043,7 @@ brw_compile_mesh(const struct brw_compiler *compiler, v[simd] = std::make_unique(compiler, params->log_data, mem_ctx, &key->base, &prog_data->base.base, shader, dispatch_width, + params->stats != NULL, debug_enabled); if (prog_data->base.prog_mask) { diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index c6543e8bc28..a8705835f1b 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -1394,7 +1394,7 @@ brw_compile_tes(const struct brw_compiler *compiler, if (is_scalar) { fs_visitor v(compiler, params->log_data, mem_ctx, &key->base, &prog_data->base.base, nir, 8, - debug_enabled); + params->stats != NULL, debug_enabled); if (!v.run_tes()) { params->error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 64e6f29f52a..eda343b48ed 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -2642,7 +2642,7 @@ brw_compile_vs(const struct brw_compiler *compiler, fs_visitor v(compiler, params->log_data, mem_ctx, &key->base, &prog_data->base.base, nir, 8, - debug_enabled); + params->stats != NULL, debug_enabled); if (!v.run_vs()) { params->error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index ac998586125..d3fc8bb401e 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -820,7 +820,7 @@ brw_compile_gs(const struct brw_compiler *compiler, if (is_scalar) { fs_visitor v(compiler, params->log_data, mem_ctx, &c, prog_data, nir, - debug_enabled); + params->stats != NULL, debug_enabled); if (v.run_gs()) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs; diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index f5e5954ee40..d0dcaff8ecc 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -449,7 +449,8 @@ brw_compile_tcs(const struct brw_compiler *compiler, if (is_scalar) { fs_visitor v(compiler, params->log_data, mem_ctx, &key->base, - &prog_data->base.base, nir, 8, debug_enabled); + &prog_data->base.base, nir, 8, params->stats != NULL, + debug_enabled); if (!v.run_tcs()) { params->error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp index b7812c389ce..cfc99a96fef 100644 --- a/src/intel/compiler/test_fs_cmod_propagation.cpp +++ b/src/intel/compiler/test_fs_cmod_propagation.cpp @@ -60,7 +60,7 @@ public: struct brw_wm_prog_data *prog_data, nir_shader *shader) : fs_visitor(compiler, NULL, mem_ctx, NULL, - &prog_data->base, shader, 8, false) {} + &prog_data->base, shader, 8, false, false) {} }; diff --git a/src/intel/compiler/test_fs_copy_propagation.cpp b/src/intel/compiler/test_fs_copy_propagation.cpp index 0e7ab6c8f11..2de13476032 100644 --- a/src/intel/compiler/test_fs_copy_propagation.cpp +++ b/src/intel/compiler/test_fs_copy_propagation.cpp @@ -49,7 +49,7 @@ public: struct brw_wm_prog_data *prog_data, nir_shader *shader) : fs_visitor(compiler, NULL, mem_ctx, NULL, - &prog_data->base, shader, 8, false) {} + &prog_data->base, shader, 8, false, false) {} }; diff --git a/src/intel/compiler/test_fs_saturate_propagation.cpp b/src/intel/compiler/test_fs_saturate_propagation.cpp index c6560a0d058..5c85c0165c1 100644 --- a/src/intel/compiler/test_fs_saturate_propagation.cpp +++ b/src/intel/compiler/test_fs_saturate_propagation.cpp @@ -49,7 +49,7 @@ public: struct brw_wm_prog_data *prog_data, nir_shader *shader) : fs_visitor(compiler, NULL, mem_ctx, NULL, - &prog_data->base, shader, 16, false) {} + &prog_data->base, shader, 16, false, false) {} }; diff --git a/src/intel/compiler/test_fs_scoreboard.cpp b/src/intel/compiler/test_fs_scoreboard.cpp index adfa09b00c2..39b2b85eb07 100644 --- a/src/intel/compiler/test_fs_scoreboard.cpp +++ b/src/intel/compiler/test_fs_scoreboard.cpp @@ -52,7 +52,8 @@ void scoreboard_test::SetUp() nir_shader *shader = nir_shader_create(ctx, MESA_SHADER_FRAGMENT, NULL, NULL); - v = new fs_visitor(compiler, NULL, ctx, NULL, &prog_data->base, shader, 8, false); + v = new fs_visitor(compiler, NULL, ctx, NULL, &prog_data->base, shader, 8, + false, false); devinfo->ver = 12; devinfo->verx10 = devinfo->ver * 10;