2024-07-12 13:52:46 -07:00
|
|
|
/*
|
|
|
|
|
* Copyright © 2010 Intel Corporation
|
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "brw_fs.h"
|
2025-01-15 08:20:46 -08:00
|
|
|
#include "brw_builder.h"
|
2024-07-12 13:52:46 -07:00
|
|
|
#include "brw_fs_live_variables.h"
|
2024-12-06 16:17:46 -08:00
|
|
|
#include "brw_generator.h"
|
2024-07-12 13:52:46 -07:00
|
|
|
#include "brw_nir.h"
|
|
|
|
|
#include "brw_cfg.h"
|
|
|
|
|
#include "brw_private.h"
|
|
|
|
|
#include "intel_nir.h"
|
|
|
|
|
#include "shader_enums.h"
|
|
|
|
|
#include "dev/intel_debug.h"
|
|
|
|
|
#include "dev/intel_wa.h"
|
|
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
|
|
2024-07-12 14:20:57 -07:00
|
|
|
using namespace brw;
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
static void
|
|
|
|
|
fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
|
|
|
|
|
{
|
|
|
|
|
block->dwords = dwords;
|
|
|
|
|
block->regs = DIV_ROUND_UP(dwords, 8);
|
|
|
|
|
block->size = block->regs * 32;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
|
|
|
|
struct brw_cs_prog_data *cs_prog_data)
|
|
|
|
|
{
|
|
|
|
|
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
|
|
|
|
int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data);
|
|
|
|
|
|
|
|
|
|
/* The thread ID should be stored in the last param dword */
|
|
|
|
|
assert(subgroup_id_index == -1 ||
|
|
|
|
|
subgroup_id_index == (int)prog_data->nr_params - 1);
|
|
|
|
|
|
|
|
|
|
unsigned cross_thread_dwords, per_thread_dwords;
|
|
|
|
|
if (subgroup_id_index >= 0) {
|
|
|
|
|
/* Fill all but the last register with cross-thread payload */
|
|
|
|
|
cross_thread_dwords = 8 * (subgroup_id_index / 8);
|
|
|
|
|
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
|
|
|
|
|
assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
|
|
|
|
|
} else {
|
|
|
|
|
/* Fill all data using cross-thread payload */
|
|
|
|
|
cross_thread_dwords = prog_data->nr_params;
|
|
|
|
|
per_thread_dwords = 0u;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
|
|
|
|
|
fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
|
|
|
|
|
|
|
|
|
|
assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
|
|
|
|
|
cs_prog_data->push.per_thread.size == 0);
|
|
|
|
|
assert(cs_prog_data->push.cross_thread.dwords +
|
|
|
|
|
cs_prog_data->push.per_thread.dwords ==
|
|
|
|
|
prog_data->nr_params);
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-12 14:20:57 -07:00
|
|
|
static bool
|
|
|
|
|
run_cs(fs_visitor &s, bool allow_spilling)
|
|
|
|
|
{
|
|
|
|
|
assert(gl_shader_stage_is_compute(s.stage));
|
2024-12-29 15:41:04 -08:00
|
|
|
const brw_builder bld = brw_builder(&s).at_end();
|
2024-07-12 14:20:57 -07:00
|
|
|
|
|
|
|
|
s.payload_ = new cs_thread_payload(s);
|
|
|
|
|
|
|
|
|
|
if (s.devinfo->platform == INTEL_PLATFORM_HSW && s.prog_data->total_shared > 0) {
|
|
|
|
|
/* Move SLM index from g0.0[27:24] to sr0.1[11:8] */
|
2024-12-29 15:41:04 -08:00
|
|
|
const brw_builder abld = bld.exec_all().group(1, 0);
|
2024-07-12 14:20:57 -07:00
|
|
|
abld.MOV(retype(brw_sr0_reg(1), BRW_TYPE_UW),
|
|
|
|
|
suboffset(retype(brw_vec1_grf(0, 0), BRW_TYPE_UW), 1));
|
|
|
|
|
}
|
|
|
|
|
|
2024-12-07 09:36:03 -08:00
|
|
|
brw_from_nir(&s);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
|
|
|
|
if (s.failed)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
s.emit_cs_terminate();
|
|
|
|
|
|
2024-07-12 17:08:46 -07:00
|
|
|
brw_calculate_cfg(s);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
2024-12-06 11:37:57 -08:00
|
|
|
brw_optimize(s);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
|
|
|
|
s.assign_curb_setup();
|
|
|
|
|
|
2024-12-06 11:37:57 -08:00
|
|
|
brw_lower_3src_null_dest(s);
|
|
|
|
|
brw_workaround_memory_fence_before_eot(s);
|
|
|
|
|
brw_workaround_emit_dummy_mov_instruction(s);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
2024-07-12 16:55:33 -07:00
|
|
|
brw_allocate_registers(s, allow_spilling);
|
2024-07-12 14:20:57 -07:00
|
|
|
|
2024-12-06 11:37:57 -08:00
|
|
|
brw_workaround_source_arf_before_eot(s);
|
2024-10-19 12:53:21 +03:00
|
|
|
|
2024-07-12 14:20:57 -07:00
|
|
|
return !s.failed;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-09 17:15:11 -07:00
|
|
|
static bool
|
|
|
|
|
instr_uses_sampler(nir_builder *b, nir_instr *instr, void *cb_data)
|
|
|
|
|
{
|
|
|
|
|
if (instr->type != nir_instr_type_tex)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
switch (nir_instr_as_tex(instr)->op) {
|
|
|
|
|
case nir_texop_tex:
|
|
|
|
|
case nir_texop_txd:
|
|
|
|
|
case nir_texop_txf:
|
|
|
|
|
case nir_texop_txl:
|
|
|
|
|
case nir_texop_txb:
|
|
|
|
|
case nir_texop_txf_ms:
|
|
|
|
|
case nir_texop_txf_ms_mcs_intel:
|
|
|
|
|
case nir_texop_lod:
|
|
|
|
|
case nir_texop_tg4:
|
|
|
|
|
case nir_texop_texture_samples:
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
brw_nir_uses_sampler(nir_shader *shader)
|
|
|
|
|
{
|
|
|
|
|
return nir_shader_instructions_pass(shader, instr_uses_sampler,
|
|
|
|
|
nir_metadata_all,
|
|
|
|
|
NULL);
|
|
|
|
|
}
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
const unsigned *
|
|
|
|
|
brw_compile_cs(const struct brw_compiler *compiler,
|
|
|
|
|
struct brw_compile_cs_params *params)
|
|
|
|
|
{
|
2024-09-24 16:53:25 -07:00
|
|
|
const struct intel_device_info *devinfo = compiler->devinfo;
|
2024-09-30 08:45:21 +03:00
|
|
|
struct nir_shader *nir = params->base.nir;
|
2024-07-12 13:52:46 -07:00
|
|
|
const struct brw_cs_prog_key *key = params->key;
|
|
|
|
|
struct brw_cs_prog_data *prog_data = params->prog_data;
|
|
|
|
|
|
|
|
|
|
const bool debug_enabled =
|
|
|
|
|
brw_should_print_shader(nir, params->base.debug_flag ?
|
|
|
|
|
params->base.debug_flag : DEBUG_CS);
|
|
|
|
|
|
|
|
|
|
prog_data->base.stage = MESA_SHADER_COMPUTE;
|
|
|
|
|
prog_data->base.total_shared = nir->info.shared_size;
|
|
|
|
|
prog_data->base.ray_queries = nir->info.ray_queries;
|
|
|
|
|
prog_data->base.total_scratch = 0;
|
2024-09-30 08:45:21 +03:00
|
|
|
prog_data->uses_inline_data = brw_nir_uses_inline_data(nir);
|
|
|
|
|
assert(compiler->devinfo->verx10 >= 125 || !prog_data->uses_inline_data);
|
2024-07-12 13:52:46 -07:00
|
|
|
|
|
|
|
|
if (!nir->info.workgroup_size_variable) {
|
|
|
|
|
prog_data->local_size[0] = nir->info.workgroup_size[0];
|
|
|
|
|
prog_data->local_size[1] = nir->info.workgroup_size[1];
|
|
|
|
|
prog_data->local_size[2] = nir->info.workgroup_size[2];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
brw_simd_selection_state simd_state{
|
|
|
|
|
.devinfo = compiler->devinfo,
|
|
|
|
|
.prog_data = prog_data,
|
|
|
|
|
.required_width = brw_required_dispatch_width(&nir->info),
|
|
|
|
|
};
|
|
|
|
|
|
2023-08-09 17:15:11 -07:00
|
|
|
prog_data->uses_sampler = brw_nir_uses_sampler(params->base.nir);
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
std::unique_ptr<fs_visitor> v[3];
|
|
|
|
|
|
2024-09-24 16:53:25 -07:00
|
|
|
for (unsigned i = 0; i < 3; i++) {
|
|
|
|
|
const unsigned simd = devinfo->ver >= 30 ? 2 - i : i;
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
if (!brw_simd_should_compile(simd_state, simd))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const unsigned dispatch_width = 8u << simd;
|
|
|
|
|
|
|
|
|
|
nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir);
|
|
|
|
|
brw_nir_apply_key(shader, compiler, &key->base,
|
|
|
|
|
dispatch_width);
|
|
|
|
|
|
|
|
|
|
NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width);
|
|
|
|
|
|
|
|
|
|
/* Clean up after the local index and ID calculations. */
|
|
|
|
|
NIR_PASS(_, shader, nir_opt_constant_folding);
|
|
|
|
|
NIR_PASS(_, shader, nir_opt_dce);
|
|
|
|
|
|
|
|
|
|
brw_postprocess_nir(shader, compiler, debug_enabled,
|
|
|
|
|
key->base.robust_flags);
|
|
|
|
|
|
|
|
|
|
v[simd] = std::make_unique<fs_visitor>(compiler, ¶ms->base,
|
|
|
|
|
&key->base,
|
|
|
|
|
&prog_data->base,
|
|
|
|
|
shader, dispatch_width,
|
|
|
|
|
params->base.stats != NULL,
|
|
|
|
|
debug_enabled);
|
|
|
|
|
|
2024-09-24 16:53:25 -07:00
|
|
|
const bool allow_spilling = simd == 0 ||
|
|
|
|
|
(!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) ||
|
|
|
|
|
nir->info.workgroup_size_variable;
|
2024-07-12 13:52:46 -07:00
|
|
|
|
2024-09-24 16:53:25 -07:00
|
|
|
if (devinfo->ver < 30 || nir->info.workgroup_size_variable) {
|
|
|
|
|
const int first = brw_simd_first_compiled(simd_state);
|
|
|
|
|
if (first >= 0)
|
|
|
|
|
v[simd]->import_uniforms(v[first].get());
|
|
|
|
|
assert(allow_spilling == (first < 0 || nir->info.workgroup_size_variable));
|
|
|
|
|
}
|
2024-07-12 13:52:46 -07:00
|
|
|
|
2024-07-12 14:20:57 -07:00
|
|
|
if (run_cs(*v[simd], allow_spilling)) {
|
2024-07-12 13:52:46 -07:00
|
|
|
cs_fill_push_const_info(compiler->devinfo, prog_data);
|
|
|
|
|
|
|
|
|
|
brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
|
2024-09-24 16:53:25 -07:00
|
|
|
|
|
|
|
|
if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers &&
|
|
|
|
|
!nir->info.workgroup_size_variable)
|
|
|
|
|
break;
|
2024-07-12 13:52:46 -07:00
|
|
|
} else {
|
|
|
|
|
simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg);
|
|
|
|
|
if (simd > 0) {
|
|
|
|
|
brw_shader_perf_log(compiler, params->base.log_data,
|
|
|
|
|
"SIMD%u shader failed to compile: %s\n",
|
|
|
|
|
dispatch_width, v[simd]->fail_msg);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const int selected_simd = brw_simd_select(simd_state);
|
|
|
|
|
if (selected_simd < 0) {
|
|
|
|
|
params->base.error_str =
|
|
|
|
|
ralloc_asprintf(params->base.mem_ctx,
|
|
|
|
|
"Can't compile shader: "
|
|
|
|
|
"SIMD8 '%s', SIMD16 '%s' and SIMD32 '%s'.\n",
|
|
|
|
|
simd_state.error[0], simd_state.error[1],
|
|
|
|
|
simd_state.error[2]);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(selected_simd < 3);
|
|
|
|
|
|
|
|
|
|
if (!nir->info.workgroup_size_variable)
|
|
|
|
|
prog_data->prog_mask = 1 << selected_simd;
|
|
|
|
|
|
2024-12-06 16:33:35 -08:00
|
|
|
brw_generator g(compiler, ¶ms->base, &prog_data->base,
|
2024-07-12 13:52:46 -07:00
|
|
|
MESA_SHADER_COMPUTE);
|
|
|
|
|
if (unlikely(debug_enabled)) {
|
|
|
|
|
char *name = ralloc_asprintf(params->base.mem_ctx,
|
|
|
|
|
"%s compute shader %s",
|
|
|
|
|
nir->info.label ?
|
|
|
|
|
nir->info.label : "unnamed",
|
|
|
|
|
nir->info.name);
|
|
|
|
|
g.enable_debug(name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1);
|
|
|
|
|
|
|
|
|
|
struct brw_compile_stats *stats = params->base.stats;
|
|
|
|
|
for (unsigned simd = 0; simd < 3; simd++) {
|
|
|
|
|
if (prog_data->prog_mask & (1u << simd)) {
|
|
|
|
|
assert(v[simd]);
|
|
|
|
|
prog_data->prog_offset[simd] =
|
|
|
|
|
g.generate_code(v[simd]->cfg, 8u << simd, v[simd]->shader_stats,
|
|
|
|
|
v[simd]->performance_analysis.require(), stats);
|
|
|
|
|
if (stats)
|
|
|
|
|
stats->max_dispatch_width = max_dispatch_width;
|
|
|
|
|
stats = stats ? stats + 1 : NULL;
|
2024-09-18 14:32:58 -07:00
|
|
|
|
|
|
|
|
prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
|
|
|
|
|
v[simd]->grf_used);
|
|
|
|
|
|
2024-07-12 13:52:46 -07:00
|
|
|
max_dispatch_width = 8u << simd;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
g.add_const_data(nir->constant_data, nir->constant_data_size);
|
|
|
|
|
|
|
|
|
|
return g.get_assembly();
|
|
|
|
|
}
|