intel/brw: Report number of GRF registers used in brw_stage_prog_data.

This is similar to what we used to do on pre-SNB platforms, the number
of GRF registers used by the shader will be used on Xe3+ to adjust the
trade-off between thread-level parallelism and size of the GRF file.
Plumb the value through prog_data so the driver can set up the
hardware state accordingly.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32664>
This commit is contained in:
Francisco Jerez 2024-09-18 14:32:58 -07:00 committed by Marge Bot
parent 6513bf65c3
commit 70fecb1483
10 changed files with 32 additions and 5 deletions

View file

@ -147,6 +147,10 @@ compile_single_bs(const struct brw_compiler *compiler,
else
assert(offset == 0);
if (!prog_offset)
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
selected->grf_used);
return dispatch_width;
}

View file

@ -263,6 +263,10 @@ brw_compile_cs(const struct brw_compiler *compiler,
if (stats)
stats->max_dispatch_width = max_dispatch_width;
stats = stats ? stats + 1 : NULL;
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
v[simd]->grf_used);
max_dispatch_width = 8u << simd;
}
}

View file

@ -1606,6 +1606,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
assert(v8->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
v8->grf_used);
const performance &perf = v8->performance_analysis.require();
throughput = MAX2(throughput, perf.throughput);
@ -1714,8 +1716,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
simd32_cfg = v32->cfg;
assert(v32->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = std::max(prog_data->base.grf_used,
v32->grf_used);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
v32->grf_used);
}
}
@ -1735,8 +1737,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
assert(v16->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = std::max(prog_data->base.grf_used,
v16->grf_used);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
v16->grf_used);
}
}
@ -1760,6 +1762,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
assert(v16->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
v16->grf_used);
const performance &perf = v16->performance_analysis.require();
throughput = MAX2(throughput, perf.throughput);
@ -1801,6 +1805,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
assert(v32->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
v32->grf_used);
throughput = MAX2(throughput, perf.throughput);
}
@ -1877,6 +1883,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
if (multi_cfg) {
assert(vmulti->payload().num_regs % reg_unit(devinfo) == 0);
prog_data->base.dispatch_grf_start_reg = vmulti->payload().num_regs / reg_unit(devinfo);
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
vmulti->grf_used);
}
/* When the caller compiles a repclear or fast clear shader, they

View file

@ -358,6 +358,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
assert(v.payload().num_regs % reg_unit(compiler->devinfo) == 0);
prog_data->base.base.dispatch_grf_start_reg =
v.payload().num_regs / reg_unit(compiler->devinfo);
prog_data->base.base.grf_used = v.grf_used;
brw_generator g(compiler, &params->base,
&prog_data->base.base, MESA_SHADER_GEOMETRY);

View file

@ -495,6 +495,8 @@ brw_compile_task(const struct brw_compiler *compiler,
fs_visitor *selected = v[selected_simd].get();
prog_data->base.prog_mask = 1 << selected_simd;
prog_data->base.base.grf_used = MAX2(prog_data->base.base.grf_used,
selected->grf_used);
if (unlikely(debug_enabled)) {
fprintf(stderr, "Task Output ");
@ -1816,6 +1818,8 @@ brw_compile_mesh(const struct brw_compiler *compiler,
fs_visitor *selected = v[selected_simd].get();
prog_data->base.prog_mask = 1 << selected_simd;
prog_data->base.base.grf_used = MAX2(prog_data->base.base.grf_used,
selected->grf_used);
if (unlikely(debug_enabled)) {
if (params->tue_map) {

View file

@ -287,6 +287,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
assert(v.payload().num_regs % reg_unit(devinfo) == 0);
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
prog_data->base.base.grf_used = v.grf_used;
brw_generator g(compiler, &params->base,
&prog_data->base.base, MESA_SHADER_TESS_CTRL);

View file

@ -164,7 +164,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
assert(v.payload().num_regs % reg_unit(devinfo) == 0);
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
prog_data->base.base.grf_used = v.grf_used;
prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8;
brw_generator g(compiler, &params->base,

View file

@ -161,6 +161,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
assert(v.payload().num_regs % reg_unit(compiler->devinfo) == 0);
prog_data->base.base.dispatch_grf_start_reg =
v.payload().num_regs / reg_unit(compiler->devinfo);
prog_data->base.base.grf_used = v.grf_used;
brw_generator g(compiler, &params->base,
&prog_data->base.base,

View file

@ -548,6 +548,9 @@ struct brw_stage_prog_data {
*/
unsigned dispatch_grf_start_reg;
/** Number of GRF registers used. */
unsigned grf_used;
bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
/* 32-bit identifiers for all push/pull parameters. These can be anything

View file

@ -169,6 +169,7 @@ print_cs_prog_data_fields(FILE *fp, const char *prefix, const char *pad,
PROG_DATA_FIELD("%u", base.const_data_offset);
PROG_DATA_FIELD("%u", base.num_relocs);
fprintf(fp, "%s.base.relocs = %s_relocs,\n", pad, prefix);
PROG_DATA_FIELD("%u", base.grf_used);
PROG_DATA_FIELD("%u", base.printf_info_count);
fprintf(fp, "%s.base.printf_info = (u_printf_info *)%s_printfs,\n", pad, prefix);
assert(!cs_prog_data->base.has_ubo_pull);