diff --git a/src/intel/compiler/brw_compile_bs.cpp b/src/intel/compiler/brw_compile_bs.cpp index 9eeba745160..ab71d3dbfa7 100644 --- a/src/intel/compiler/brw_compile_bs.cpp +++ b/src/intel/compiler/brw_compile_bs.cpp @@ -147,6 +147,10 @@ compile_single_bs(const struct brw_compiler *compiler, else assert(offset == 0); + if (!prog_offset) + prog_data->base.grf_used = MAX2(prog_data->base.grf_used, + selected->grf_used); + return dispatch_width; } diff --git a/src/intel/compiler/brw_compile_cs.cpp b/src/intel/compiler/brw_compile_cs.cpp index dd86044f5b8..59e26e64da6 100644 --- a/src/intel/compiler/brw_compile_cs.cpp +++ b/src/intel/compiler/brw_compile_cs.cpp @@ -263,6 +263,10 @@ brw_compile_cs(const struct brw_compiler *compiler, if (stats) stats->max_dispatch_width = max_dispatch_width; stats = stats ? stats + 1 : NULL; + + prog_data->base.grf_used = MAX2(prog_data->base.grf_used, + v[simd]->grf_used); + max_dispatch_width = 8u << simd; } } diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index 22fbc31d233..31d9e312139 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -1606,6 +1606,8 @@ brw_compile_fs(const struct brw_compiler *compiler, assert(v8->payload().num_regs % reg_unit(devinfo) == 0); prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs / reg_unit(devinfo); + prog_data->base.grf_used = MAX2(prog_data->base.grf_used, + v8->grf_used); const performance &perf = v8->performance_analysis.require(); throughput = MAX2(throughput, perf.throughput); @@ -1714,8 +1716,8 @@ brw_compile_fs(const struct brw_compiler *compiler, simd32_cfg = v32->cfg; assert(v32->payload().num_regs % reg_unit(devinfo) == 0); prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs / reg_unit(devinfo); - prog_data->base.grf_used = std::max(prog_data->base.grf_used, - v32->grf_used); + prog_data->base.grf_used = MAX2(prog_data->base.grf_used, + v32->grf_used); } } @@ -1735,8 +1737,8 @@ brw_compile_fs(const struct brw_compiler *compiler, assert(v16->payload().num_regs % reg_unit(devinfo) == 0); prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs / reg_unit(devinfo); - prog_data->base.grf_used = std::max(prog_data->base.grf_used, - v16->grf_used); + prog_data->base.grf_used = MAX2(prog_data->base.grf_used, + v16->grf_used); } } @@ -1760,6 +1762,8 @@ brw_compile_fs(const struct brw_compiler *compiler, assert(v16->payload().num_regs % reg_unit(devinfo) == 0); prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs / reg_unit(devinfo); + prog_data->base.grf_used = MAX2(prog_data->base.grf_used, + v16->grf_used); const performance &perf = v16->performance_analysis.require(); throughput = MAX2(throughput, perf.throughput); @@ -1801,6 +1805,8 @@ brw_compile_fs(const struct brw_compiler *compiler, assert(v32->payload().num_regs % reg_unit(devinfo) == 0); prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs / reg_unit(devinfo); + prog_data->base.grf_used = MAX2(prog_data->base.grf_used, + v32->grf_used); throughput = MAX2(throughput, perf.throughput); } @@ -1877,6 +1883,8 @@ brw_compile_fs(const struct brw_compiler *compiler, if (multi_cfg) { assert(vmulti->payload().num_regs % reg_unit(devinfo) == 0); prog_data->base.dispatch_grf_start_reg = vmulti->payload().num_regs / reg_unit(devinfo); + prog_data->base.grf_used = MAX2(prog_data->base.grf_used, + vmulti->grf_used); } /* When the caller compiles a repclear or fast clear shader, they diff --git a/src/intel/compiler/brw_compile_gs.cpp b/src/intel/compiler/brw_compile_gs.cpp index f205bb4fcf2..14a577bb4ce 100644 --- a/src/intel/compiler/brw_compile_gs.cpp +++ b/src/intel/compiler/brw_compile_gs.cpp @@ -358,6 +358,7 @@ brw_compile_gs(const struct brw_compiler *compiler, assert(v.payload().num_regs % reg_unit(compiler->devinfo) == 0); prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(compiler->devinfo); + prog_data->base.base.grf_used = v.grf_used; brw_generator g(compiler, ¶ms->base, &prog_data->base.base, MESA_SHADER_GEOMETRY); diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp index 8316e090357..59e38b37259 100644 --- a/src/intel/compiler/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw_compile_mesh.cpp @@ -495,6 +495,8 @@ brw_compile_task(const struct brw_compiler *compiler, fs_visitor *selected = v[selected_simd].get(); prog_data->base.prog_mask = 1 << selected_simd; + prog_data->base.base.grf_used = MAX2(prog_data->base.base.grf_used, + selected->grf_used); if (unlikely(debug_enabled)) { fprintf(stderr, "Task Output "); @@ -1816,6 +1818,8 @@ brw_compile_mesh(const struct brw_compiler *compiler, fs_visitor *selected = v[selected_simd].get(); prog_data->base.prog_mask = 1 << selected_simd; + prog_data->base.base.grf_used = MAX2(prog_data->base.base.grf_used, + selected->grf_used); if (unlikely(debug_enabled)) { if (params->tue_map) { diff --git a/src/intel/compiler/brw_compile_tcs.cpp b/src/intel/compiler/brw_compile_tcs.cpp index 23d70651fcf..b0380b4523f 100644 --- a/src/intel/compiler/brw_compile_tcs.cpp +++ b/src/intel/compiler/brw_compile_tcs.cpp @@ -287,6 +287,7 @@ brw_compile_tcs(const struct brw_compiler *compiler, assert(v.payload().num_regs % reg_unit(devinfo) == 0); prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo); + prog_data->base.base.grf_used = v.grf_used; brw_generator g(compiler, ¶ms->base, &prog_data->base.base, MESA_SHADER_TESS_CTRL); diff --git a/src/intel/compiler/brw_compile_tes.cpp b/src/intel/compiler/brw_compile_tes.cpp index c0cc7ab58b9..96af0d405ee 100644 --- a/src/intel/compiler/brw_compile_tes.cpp +++ b/src/intel/compiler/brw_compile_tes.cpp @@ -164,7 +164,7 @@ brw_compile_tes(const struct brw_compiler *compiler, assert(v.payload().num_regs % reg_unit(devinfo) == 0); prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo); - + prog_data->base.base.grf_used = v.grf_used; prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8; brw_generator g(compiler, ¶ms->base, diff --git a/src/intel/compiler/brw_compile_vs.cpp b/src/intel/compiler/brw_compile_vs.cpp index 3e6e5befefb..83c990668d9 100644 --- a/src/intel/compiler/brw_compile_vs.cpp +++ b/src/intel/compiler/brw_compile_vs.cpp @@ -161,6 +161,7 @@ brw_compile_vs(const struct brw_compiler *compiler, assert(v.payload().num_regs % reg_unit(compiler->devinfo) == 0); prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(compiler->devinfo); + prog_data->base.base.grf_used = v.grf_used; brw_generator g(compiler, ¶ms->base, &prog_data->base.base, diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 6880ca0406b..ce6c5f5fd5b 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -548,6 +548,9 @@ struct brw_stage_prog_data { */ unsigned dispatch_grf_start_reg; + /** Number of GRF registers used. */ + unsigned grf_used; + bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */ /* 32-bit identifiers for all push/pull parameters. These can be anything diff --git a/src/intel/compiler/intel_clc.c b/src/intel/compiler/intel_clc.c index dd2ee81974a..22754240f9a 100644 --- a/src/intel/compiler/intel_clc.c +++ b/src/intel/compiler/intel_clc.c @@ -169,6 +169,7 @@ print_cs_prog_data_fields(FILE *fp, const char *prefix, const char *pad, PROG_DATA_FIELD("%u", base.const_data_offset); PROG_DATA_FIELD("%u", base.num_relocs); fprintf(fp, "%s.base.relocs = %s_relocs,\n", pad, prefix); + PROG_DATA_FIELD("%u", base.grf_used); PROG_DATA_FIELD("%u", base.printf_info_count); fprintf(fp, "%s.base.printf_info = (u_printf_info *)%s_printfs,\n", pad, prefix); assert(!cs_prog_data->base.has_ubo_pull);