mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-12 05:18:18 +02:00
intel/brw: Report number of GRF registers used in brw_stage_prog_data.
This is similar to what we used to do on pre-SNB platforms, the number of GRF registers used by the shader will be used on Xe3+ to adjust the trade-off between thread-level parallelism and size of the GRF file. Plumb the value through prog_data so the driver can set up the hardware state accordingly. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32664>
This commit is contained in:
parent
6513bf65c3
commit
70fecb1483
10 changed files with 32 additions and 5 deletions
|
|
@ -147,6 +147,10 @@ compile_single_bs(const struct brw_compiler *compiler,
|
|||
else
|
||||
assert(offset == 0);
|
||||
|
||||
if (!prog_offset)
|
||||
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
||||
selected->grf_used);
|
||||
|
||||
return dispatch_width;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -263,6 +263,10 @@ brw_compile_cs(const struct brw_compiler *compiler,
|
|||
if (stats)
|
||||
stats->max_dispatch_width = max_dispatch_width;
|
||||
stats = stats ? stats + 1 : NULL;
|
||||
|
||||
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
||||
v[simd]->grf_used);
|
||||
|
||||
max_dispatch_width = 8u << simd;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1606,6 +1606,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
|
||||
assert(v8->payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->base.dispatch_grf_start_reg = v8->payload().num_regs / reg_unit(devinfo);
|
||||
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
||||
v8->grf_used);
|
||||
|
||||
const performance &perf = v8->performance_analysis.require();
|
||||
throughput = MAX2(throughput, perf.throughput);
|
||||
|
|
@ -1714,8 +1716,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
simd32_cfg = v32->cfg;
|
||||
assert(v32->payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs / reg_unit(devinfo);
|
||||
prog_data->base.grf_used = std::max(prog_data->base.grf_used,
|
||||
v32->grf_used);
|
||||
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
||||
v32->grf_used);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1735,8 +1737,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
|
||||
assert(v16->payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs / reg_unit(devinfo);
|
||||
prog_data->base.grf_used = std::max(prog_data->base.grf_used,
|
||||
v16->grf_used);
|
||||
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
||||
v16->grf_used);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1760,6 +1762,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
|
||||
assert(v16->payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->dispatch_grf_start_reg_16 = v16->payload().num_regs / reg_unit(devinfo);
|
||||
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
||||
v16->grf_used);
|
||||
|
||||
const performance &perf = v16->performance_analysis.require();
|
||||
throughput = MAX2(throughput, perf.throughput);
|
||||
|
|
@ -1801,6 +1805,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
|
||||
assert(v32->payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->dispatch_grf_start_reg_32 = v32->payload().num_regs / reg_unit(devinfo);
|
||||
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
||||
v32->grf_used);
|
||||
|
||||
throughput = MAX2(throughput, perf.throughput);
|
||||
}
|
||||
|
|
@ -1877,6 +1883,8 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
if (multi_cfg) {
|
||||
assert(vmulti->payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->base.dispatch_grf_start_reg = vmulti->payload().num_regs / reg_unit(devinfo);
|
||||
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
||||
vmulti->grf_used);
|
||||
}
|
||||
|
||||
/* When the caller compiles a repclear or fast clear shader, they
|
||||
|
|
|
|||
|
|
@ -358,6 +358,7 @@ brw_compile_gs(const struct brw_compiler *compiler,
|
|||
assert(v.payload().num_regs % reg_unit(compiler->devinfo) == 0);
|
||||
prog_data->base.base.dispatch_grf_start_reg =
|
||||
v.payload().num_regs / reg_unit(compiler->devinfo);
|
||||
prog_data->base.base.grf_used = v.grf_used;
|
||||
|
||||
brw_generator g(compiler, ¶ms->base,
|
||||
&prog_data->base.base, MESA_SHADER_GEOMETRY);
|
||||
|
|
|
|||
|
|
@ -495,6 +495,8 @@ brw_compile_task(const struct brw_compiler *compiler,
|
|||
|
||||
fs_visitor *selected = v[selected_simd].get();
|
||||
prog_data->base.prog_mask = 1 << selected_simd;
|
||||
prog_data->base.base.grf_used = MAX2(prog_data->base.base.grf_used,
|
||||
selected->grf_used);
|
||||
|
||||
if (unlikely(debug_enabled)) {
|
||||
fprintf(stderr, "Task Output ");
|
||||
|
|
@ -1816,6 +1818,8 @@ brw_compile_mesh(const struct brw_compiler *compiler,
|
|||
|
||||
fs_visitor *selected = v[selected_simd].get();
|
||||
prog_data->base.prog_mask = 1 << selected_simd;
|
||||
prog_data->base.base.grf_used = MAX2(prog_data->base.base.grf_used,
|
||||
selected->grf_used);
|
||||
|
||||
if (unlikely(debug_enabled)) {
|
||||
if (params->tue_map) {
|
||||
|
|
|
|||
|
|
@ -287,6 +287,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
|
||||
assert(v.payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
|
||||
prog_data->base.base.grf_used = v.grf_used;
|
||||
|
||||
brw_generator g(compiler, ¶ms->base,
|
||||
&prog_data->base.base, MESA_SHADER_TESS_CTRL);
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
|
||||
assert(v.payload().num_regs % reg_unit(devinfo) == 0);
|
||||
prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
|
||||
|
||||
prog_data->base.base.grf_used = v.grf_used;
|
||||
prog_data->base.dispatch_mode = INTEL_DISPATCH_MODE_SIMD8;
|
||||
|
||||
brw_generator g(compiler, ¶ms->base,
|
||||
|
|
|
|||
|
|
@ -161,6 +161,7 @@ brw_compile_vs(const struct brw_compiler *compiler,
|
|||
assert(v.payload().num_regs % reg_unit(compiler->devinfo) == 0);
|
||||
prog_data->base.base.dispatch_grf_start_reg =
|
||||
v.payload().num_regs / reg_unit(compiler->devinfo);
|
||||
prog_data->base.base.grf_used = v.grf_used;
|
||||
|
||||
brw_generator g(compiler, ¶ms->base,
|
||||
&prog_data->base.base,
|
||||
|
|
|
|||
|
|
@ -548,6 +548,9 @@ struct brw_stage_prog_data {
|
|||
*/
|
||||
unsigned dispatch_grf_start_reg;
|
||||
|
||||
/** Number of GRF registers used. */
|
||||
unsigned grf_used;
|
||||
|
||||
bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
|
||||
|
||||
/* 32-bit identifiers for all push/pull parameters. These can be anything
|
||||
|
|
|
|||
|
|
@ -169,6 +169,7 @@ print_cs_prog_data_fields(FILE *fp, const char *prefix, const char *pad,
|
|||
PROG_DATA_FIELD("%u", base.const_data_offset);
|
||||
PROG_DATA_FIELD("%u", base.num_relocs);
|
||||
fprintf(fp, "%s.base.relocs = %s_relocs,\n", pad, prefix);
|
||||
PROG_DATA_FIELD("%u", base.grf_used);
|
||||
PROG_DATA_FIELD("%u", base.printf_info_count);
|
||||
fprintf(fp, "%s.base.printf_info = (u_printf_info *)%s_printfs,\n", pad, prefix);
|
||||
assert(!cs_prog_data->base.has_ubo_pull);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue