mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-24 14:58:10 +02:00
Since inline parameter is the last field of the thread payload, the backend can always assume they may exist. They won't affect the position of other payload fields and the register allocator will reuse any unused space. In Anv, also update EmitInlineParameter for Task/Mesh/CS to reflect previous changes in inline parameter setup. Remove/Update some stale comments since we are here. Finally, remove the prog_key/prog_data bits that tracked whether inline data or a push address was needed. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41230>
281 lines
8.7 KiB
C++
281 lines
8.7 KiB
C++
/*
|
|
* Copyright © 2010 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "brw_shader.h"
|
|
#include "brw_analysis.h"
|
|
#include "brw_builder.h"
|
|
#include "brw_generator.h"
|
|
#include "brw_nir.h"
|
|
#include "brw_cfg.h"
|
|
#include "brw_private.h"
|
|
#include "intel_nir.h"
|
|
#include "shader_enums.h"
|
|
#include "dev/intel_debug.h"
|
|
#include "dev/intel_wa.h"
|
|
|
|
#include <memory>
|
|
|
|
static void
|
|
fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
|
|
{
|
|
block->dwords = dwords;
|
|
block->regs = DIV_ROUND_UP(dwords, 8);
|
|
block->size = block->regs * 32;
|
|
}
|
|
|
|
extern "C" void
|
|
brw_cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
|
struct brw_cs_prog_data *cs_prog_data,
|
|
int subgroup_id_index)
|
|
{
|
|
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
|
|
|
unsigned cross_thread_dwords, per_thread_dwords;
|
|
if (devinfo->verx10 < 125 && subgroup_id_index >= 0) {
|
|
/* Fill all but the last register with cross-thread payload */
|
|
cross_thread_dwords = 8 * (subgroup_id_index / 8);
|
|
per_thread_dwords = prog_data->push_sizes[0] / 4 - cross_thread_dwords;
|
|
assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
|
|
} else {
|
|
/* Fill all data using cross-thread payload */
|
|
cross_thread_dwords = prog_data->push_sizes[0] / 4;
|
|
per_thread_dwords = 0u;
|
|
}
|
|
|
|
fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
|
|
fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
|
|
|
|
assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
|
|
cs_prog_data->push.per_thread.size == 0);
|
|
assert(cs_prog_data->push.cross_thread.dwords +
|
|
cs_prog_data->push.per_thread.dwords ==
|
|
prog_data->push_sizes[0] / 4);
|
|
}
|
|
|
|
static bool
|
|
run_cs(brw_shader &s, bool allow_spilling)
|
|
{
|
|
assert(mesa_shader_stage_is_compute(s.stage));
|
|
|
|
s.payload_ = new brw_cs_thread_payload(s);
|
|
|
|
brw_from_nir(&s);
|
|
|
|
if (s.failed)
|
|
return false;
|
|
|
|
s.emit_cs_terminate();
|
|
|
|
brw_calculate_cfg(s);
|
|
|
|
brw_optimize(s);
|
|
|
|
s.assign_curb_setup();
|
|
|
|
brw_lower_3src_null_dest(s);
|
|
brw_workaround_emit_dummy_mov_instruction(s);
|
|
|
|
brw_allocate_registers(s, allow_spilling);
|
|
|
|
brw_workaround_source_arf_before_eot(s);
|
|
|
|
return !s.failed;
|
|
}
|
|
|
|
static bool
|
|
instr_uses_sampler(nir_builder *b, nir_instr *instr, void *cb_data)
|
|
{
|
|
if (instr->type != nir_instr_type_tex)
|
|
return false;
|
|
|
|
switch (nir_instr_as_tex(instr)->op) {
|
|
case nir_texop_tex:
|
|
case nir_texop_txd:
|
|
case nir_texop_txf:
|
|
case nir_texop_txl:
|
|
case nir_texop_txb:
|
|
case nir_texop_txf_ms:
|
|
case nir_texop_txf_ms_mcs_intel:
|
|
case nir_texop_lod:
|
|
case nir_texop_tg4:
|
|
case nir_texop_texture_samples:
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
brw_nir_uses_sampler(nir_shader *shader)
|
|
{
|
|
return nir_shader_instructions_pass(shader, instr_uses_sampler,
|
|
nir_metadata_all,
|
|
NULL);
|
|
}
|
|
|
|
const unsigned *
|
|
brw_compile_cs(const struct brw_compiler *compiler,
|
|
struct brw_compile_cs_params *params)
|
|
{
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
|
struct nir_shader *nir = params->base.nir;
|
|
const struct brw_cs_prog_key *key = params->key;
|
|
struct brw_cs_prog_data *prog_data = params->prog_data;
|
|
|
|
const bool debug_enabled =
|
|
brw_should_print_shader(nir, params->base.debug_flag ?
|
|
params->base.debug_flag : DEBUG_CS,
|
|
params->base.source_hash);
|
|
|
|
brw_prog_data_init(&prog_data->base, ¶ms->base);
|
|
|
|
if (!nir->info.workgroup_size_variable) {
|
|
prog_data->local_size[0] = nir->info.workgroup_size[0];
|
|
prog_data->local_size[1] = nir->info.workgroup_size[1];
|
|
prog_data->local_size[2] = nir->info.workgroup_size[2];
|
|
}
|
|
|
|
brw_pass_tracker pt_ = {
|
|
.nir = nir,
|
|
.dispatch_width = 0,
|
|
.compiler = compiler,
|
|
.key = &key->base,
|
|
.archiver = params->base.archiver,
|
|
}, *pt = &pt_;
|
|
|
|
BRW_NIR_SNAPSHOT("first");
|
|
brw_postprocess_nir_opts(pt);
|
|
|
|
brw_simd_selection_state simd_state{
|
|
.devinfo = compiler->devinfo,
|
|
.prog_data = prog_data,
|
|
.required_width = brw_required_dispatch_width(&nir->info),
|
|
};
|
|
|
|
unsigned pressure[SIMD_COUNT];
|
|
brw_nir_quick_pressure_estimate(nir, devinfo, pressure);
|
|
|
|
for (unsigned i = 0; i < SIMD_COUNT; i++) {
|
|
simd_state.beyond_threshold[i] =
|
|
pressure[i] > compiler->register_pressure_threshold;
|
|
}
|
|
|
|
prog_data->uses_sampler = brw_nir_uses_sampler(params->base.nir);
|
|
|
|
std::unique_ptr<brw_shader> v[3];
|
|
|
|
for (unsigned i = 0; i < 3; i++) {
|
|
const unsigned simd = devinfo->ver >= 30 ? 2 - i : i;
|
|
|
|
if (!brw_simd_should_compile(simd_state, simd))
|
|
continue;
|
|
|
|
const unsigned dispatch_width = 8u << simd;
|
|
|
|
nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir);
|
|
|
|
pt_ = {
|
|
.nir = shader,
|
|
.dispatch_width = dispatch_width,
|
|
.compiler = compiler,
|
|
.archiver = params->base.archiver,
|
|
};
|
|
|
|
BRW_NIR_SNAPSHOT("first");
|
|
brw_nir_apply_key(pt, &key->base, dispatch_width);
|
|
|
|
brw_nir_optimize(pt);
|
|
/* brw_nir_optimize undoes late lowerings. */
|
|
BRW_NIR_PASS(nir_opt_algebraic_late);
|
|
brw_postprocess_nir_out_of_ssa(pt, debug_enabled);
|
|
|
|
const brw_shader_params shader_params = {
|
|
.compiler = compiler,
|
|
.mem_ctx = params->base.mem_ctx,
|
|
.nir = shader,
|
|
.key = &key->base,
|
|
.prog_data = &prog_data->base,
|
|
.dispatch_width = dispatch_width,
|
|
.needs_register_pressure = params->base.stats != NULL,
|
|
.log_data = params->base.log_data,
|
|
.debug_enabled = debug_enabled,
|
|
.archiver = params->base.archiver,
|
|
};
|
|
v[simd] = std::make_unique<brw_shader>(&shader_params);
|
|
|
|
const bool allow_spilling = simd == 0 ||
|
|
(!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) ||
|
|
nir->info.workgroup_size_variable;
|
|
|
|
if (devinfo->ver < 30 || nir->info.workgroup_size_variable) {
|
|
ASSERTED const int first = brw_simd_first_compiled(simd_state);
|
|
assert(allow_spilling == (first < 0 || nir->info.workgroup_size_variable));
|
|
}
|
|
|
|
if (run_cs(*v[simd], allow_spilling)) {
|
|
brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
|
|
|
|
if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers &&
|
|
!nir->info.workgroup_size_variable)
|
|
break;
|
|
} else {
|
|
simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg);
|
|
if (simd > 0) {
|
|
brw_shader_perf_log(compiler, params->base.log_data,
|
|
"SIMD%u shader failed to compile: %s\n",
|
|
dispatch_width, v[simd]->fail_msg);
|
|
}
|
|
}
|
|
}
|
|
|
|
const int selected_simd = brw_simd_select(simd_state);
|
|
if (selected_simd < 0) {
|
|
params->base.error_str =
|
|
ralloc_asprintf(params->base.mem_ctx,
|
|
"Can't compile shader: "
|
|
"SIMD8 '%s', SIMD16 '%s' and SIMD32 '%s'.\n",
|
|
simd_state.error[0], simd_state.error[1],
|
|
simd_state.error[2]);
|
|
return NULL;
|
|
}
|
|
|
|
assert(selected_simd < 3);
|
|
|
|
if (!nir->info.workgroup_size_variable)
|
|
prog_data->prog_mask = 1 << selected_simd;
|
|
|
|
brw_generator g(compiler, ¶ms->base, &prog_data->base,
|
|
MESA_SHADER_COMPUTE);
|
|
if (unlikely(debug_enabled)) {
|
|
char *name = ralloc_asprintf(params->base.mem_ctx,
|
|
"%s compute shader %s",
|
|
nir->info.label ?
|
|
nir->info.label : "unnamed",
|
|
nir->info.name);
|
|
g.enable_debug(name);
|
|
}
|
|
|
|
const uint32_t max_dispatch_width =
|
|
8u << (util_last_bit(prog_data->prog_mask) - 1);
|
|
|
|
struct genisa_stats *stats = params->base.stats;
|
|
for (unsigned simd = 0; simd < 3; simd++) {
|
|
if (prog_data->prog_mask & (1u << simd)) {
|
|
assert(v[simd]);
|
|
prog_data->prog_offset[simd] = g.generate_code(*v[simd], stats);
|
|
if (stats)
|
|
stats->max_dispatch_width = max_dispatch_width;
|
|
stats = stats ? stats + 1 : NULL;
|
|
|
|
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
|
v[simd]->grf_used);
|
|
}
|
|
}
|
|
|
|
g.add_const_data(nir->constant_data, nir->constant_data_size);
|
|
|
|
return g.get_assembly();
|
|
}
|